diff --git a/.clang-format b/.clang-format
index 93fcdef5..19c0ad9c 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,136 +1,336 @@
 ---
 Language:        Cpp
-# BasedOnStyle:  LLVM
 AccessModifierOffset: -2
 AlignAfterOpenBracket: Align
-AlignConsecutiveMacros: false
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlines: Right
-AlignOperands:   true
-AlignTrailingComments: true
+AlignArrayOfStructures: Right
+AlignConsecutiveAssignments:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionDeclarations: false
+  AlignFunctionPointers: false
+  PadOperators:    true
+AlignConsecutiveBitFields:
+  Enabled:         true
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionDeclarations: false
+  AlignFunctionPointers: false
+  PadOperators:    true
+AlignConsecutiveDeclarations:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionDeclarations: true
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveMacros:
+  Enabled:         true
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionDeclarations: false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveShortCaseStatements:
+  Enabled:         true
+  AcrossEmptyLines: true
+  AcrossComments:  true
+  AlignCaseArrows: true
+  AlignCaseColons: false
+AlignConsecutiveTableGenBreakingDAGArgColons:
+  Enabled:         true
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionDeclarations: false
+  AlignFunctionPointers: false
+  PadOperators:    true
+AlignConsecutiveTableGenCondOperatorColons:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionDeclarations: false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveTableGenDefinitionColons:
+  Enabled:         true
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionDeclarations: false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignEscapedNewlines: LeftWithLastLine
+AlignOperands:   Align
+AlignTrailingComments:
+  Kind:            Always
+  OverEmptyLines:  0
 AllowAllArgumentsOnNextLine: true
-AllowAllConstructorInitializersOnNextLine: true
 AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: Never
+AllowBreakBeforeNoexceptSpecifier: Never
+AllowShortBlocksOnASingleLine: Empty
+AllowShortCaseExpressionOnASingleLine: true
 AllowShortCaseLabelsOnASingleLine: false
+AllowShortCompoundRequirementOnASingleLine: true
+AllowShortEnumsOnASingleLine: true
 AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: WithoutElse
 AllowShortLambdasOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: Never
-AllowShortLoopsOnASingleLine: false
+AllowShortLoopsOnASingleLine: true
+AllowShortNamespacesOnASingleLine: false
 AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: false
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true
+AttributeMacros:
+  - __capability
+BinPackArguments: false
+#BinPackLongBracedList: true
+BinPackParameters: OnePerLine
+BitFieldColonSpacing: Both
 BraceWrapping:
   AfterCaseLabel:  false
   AfterClass:      false
-  AfterControlStatement: false
+  AfterControlStatement: Never
   AfterEnum:       false
+  AfterExternBlock: false
   AfterFunction:   false
   AfterNamespace:  false
   AfterObjCDeclaration: false
   AfterStruct:     false
   AfterUnion:      false
-  AfterExternBlock: false
   BeforeCatch:     false
   BeforeElse:      false
+  BeforeLambdaBody: false
+  BeforeWhile:     false
   IndentBraces:    false
-  SplitEmptyFunction: true
-  SplitEmptyRecord: true
+  SplitEmptyFunction: false
+  SplitEmptyRecord: false
   SplitEmptyNamespace: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeInheritanceComma: false
-BreakInheritanceList: BeforeColon
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakConstructorInitializers: BeforeColon
+BreakAdjacentStringLiterals: true
+BreakAfterAttributes: Leave
 BreakAfterJavaFieldAnnotations: false
+#BreakAfterOpenBracketBracedList: true
+#BreakAfterOpenBracketFunction: true
+#BreakAfterOpenBracketIf: true
+#BreakAfterOpenBracketLoop: true
+#BreakAfterOpenBracketSwitch: true
+BreakAfterReturnType: ExceptShortType
+BreakArrays:     false
+BreakBeforeBinaryOperators: All
+BreakBeforeBraces: Custom
+#BreakBeforeCloseBracketBracedList: true
+#BreakBeforeCloseBracketFunction: true
+#BreakBeforeCloseBracketIf: true
+#BreakBeforeCloseBracketLoop: true
+#BreakBeforeCloseBracketSwitch: true
+BreakBeforeConceptDeclarations: Always
+BreakBeforeInlineASMColon: OnlyMultiline
+#BreakBeforeTemplateCloser: true
+#BreakBeforeTernaryOperators: false
+#BreakBinaryOperations: RespectPrecedence
+#BreakConstructorInitializers: AfterColon
+#BreakFunctionDefinitionParameters: false
+#BreakInheritanceList: AfterColon
 BreakStringLiterals: true
-ColumnLimit:     0
+BreakTemplateDeclarations: Yes
+ColumnLimit:     130
 CommentPragmas:  '^ IWYU pragma:'
-CompactNamespaces: true
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
+CompactNamespaces: false
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
-DeriveLineEnding: true
 DerivePointerAlignment: false
 DisableFormat:   false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
 ExperimentalAutoDetectBinPacking: false
 FixNamespaceComments: true
 ForEachMacros:
   - foreach
   - Q_FOREACH
   - BOOST_FOREACH
-IncludeBlocks:   Preserve
+IfMacros:
+  - KJ_IF_MAYBE
+IncludeBlocks:   Regroup
 IncludeCategories:
-  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+  - Regex:           '^<ext/.*\.h>'
     Priority:        2
     SortPriority:    0
-  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
-    Priority:        3
+    CaseSensitive:   false
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*'
+    Priority:        2
     SortPriority:    0
+    CaseSensitive:   false
   - Regex:           '.*'
-    Priority:        1
+    Priority:        3
     SortPriority:    0
-IncludeIsMainRegex: '(Test)?$'
+    CaseSensitive:   false
+IncludeIsMainRegex: '([-_](test|unittest))?$'
 IncludeIsMainSourceRegex: ''
-IndentCaseLabels: false
+IndentAccessModifiers: false
+IndentCaseBlocks: false
+IndentCaseLabels: true
+IndentExportBlock: true
+IndentExternBlock: Indent
 IndentGotoLabels: true
-IndentPPDirectives: None
+IndentPPDirectives: AfterHash
+IndentRequiresClause: true
 IndentWidth:     4
 IndentWrappedFunctionNames: false
+InsertBraces:    true
+InsertNewlineAtEOF: true
+InsertTrailingCommas: Wrapped
+IntegerLiteralSeparator:
+  Binary:          0
+  BinaryMinDigits: 0
+  Decimal:         0
+  DecimalMinDigits: 0
+  Hex:             0
+  HexMinDigits:    0
 JavaScriptQuotes: Leave
 JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: true
+KeepEmptyLines:
+  AtEndOfFile:     false
+  AtStartOfBlock:  false
+  AtStartOfFile:   true
+KeepFormFeed:    false
+LambdaBodyIndentation: Signature
+LineEnding:      DeriveLF
 MacroBlockBegin: ''
 MacroBlockEnd:   ''
+MainIncludeChar: Quote
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: None
-ObjCBinPackProtocolList: Auto
+ObjCBinPackProtocolList: Never
 ObjCBlockIndentWidth: 4
+ObjCBreakBeforeNestedBlockParam: true
 ObjCSpaceAfterProperty: false
 ObjCSpaceBeforeProtocolList: true
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 19
+PackConstructorInitializers: NextLine
+PenaltyBreakAssignment: 10
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakBeforeMemberAccess: 150
 PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyBreakTemplateDeclaration: 10
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 60
+PenaltyBreakFirstLessLess: 50
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakScopeResolution: 500
+PenaltyBreakString: 400
+PenaltyBreakTemplateDeclaration: 5
+PenaltyExcessCharacter: 15
+PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 400
 PointerAlignment: Right
-ReflowComments:  true
-SortIncludes:    true
-SortUsingDeclarations: true
-SpaceAfterCStyleCast: false
+PPIndentWidth:   -1
+QualifierAlignment: Leave
+RawStringFormats:
+  - Language:        Cpp
+    Delimiters:
+      - cc
+      - CC
+      - cpp
+      - Cpp
+      - CPP
+      - 'c++'
+      - 'C++'
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+  - Language:        TextProto
+    Delimiters:
+      - pb
+      - PB
+      - proto
+      - PROTO
+    EnclosingFunctions:
+      - EqualsProto
+      - EquivToProto
+      - PARSE_PARTIAL_TEXT_PROTO
+      - PARSE_TEST_PROTO
+      - PARSE_TEXT_PROTO
+      - ParseTextOrDie
+      - ParseTextProtoOrDie
+      - ParseTestProto
+      - ParsePartialTestProto
+    CanonicalDelimiter: pb
+    BasedOnStyle:    google
+ReferenceAlignment: Pointer
+ReflowComments:  Always
+RemoveBracesLLVM: false
+RemoveEmptyLinesInUnwrappedLines: true
+RemoveParentheses: Leave
+RemoveSemicolon: false
+RequiresClausePosition: OwnLine
+RequiresExpressionIndentation: OuterScope
+SeparateDefinitionBlocks: Always
+ShortNamespaceLines: 0
+SkipMacroDefinitionBody: false
+SortIncludes:    CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: Lexicographic
+SpaceAfterCStyleCast: true
 SpaceAfterLogicalNot: false
-SpaceAfterTemplateKeyword: false
+#SpaceAfterOperatorKeyword: false
+SpaceAfterTemplateKeyword: true
+SpaceAroundPointerQualifiers: Default
 SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
 SpaceBeforeCpp11BracedList: false
 SpaceBeforeCtorInitializerColon: true
 SpaceBeforeInheritanceColon: true
-SpaceBeforeParens: ControlStatements
+SpaceBeforeJsonColon: false
+SpaceBeforeParens: Custom
+SpaceBeforeParensOptions:
+  AfterControlStatements: true
+  AfterForeachMacros: true
+  AfterFunctionDefinitionName: false
+  AfterFunctionDeclarationName: false
+  AfterIfMacros:   true
+  #AfterNot: true
+  AfterOverloadedOperator: false
+  AfterPlacementOperator: true
+  AfterRequiresInClause: true
+  AfterRequiresInExpression: true
+  BeforeNonEmptyParentheses: false
 SpaceBeforeRangeBasedForLoopColon: true
-SpaceInEmptyBlock: false
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 1
-SpacesInAngles:  false
-SpacesInConditionalStatement: false
-SpacesInContainerLiterals: false
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
 SpaceBeforeSquareBrackets: false
-Standard:        Latest
+#SpaceInEmptyBlock: true
+#SpaceInEmptyBraces: Block
+SpacesBeforeTrailingComments: 4
+SpacesInAngles:  Never
+SpacesInContainerLiterals: true
+SpacesInLineCommentPrefix:
+  Minimum:         1
+  Maximum:         -1
+SpacesInParens:  Custom
+SpacesInParensOptions:
+  ExceptDoubleParentheses: true
+  InConditionalStatements: false
+  InCStyleCasts:   false
+  InEmptyParentheses: false
+  Other:           false
+SpacesInSquareBrackets: false
+Standard:        Auto
+StatementAttributeLikeMacros:
+  - Q_EMIT
 StatementMacros:
   - Q_UNUSED
   - QT_REQUIRE_VERSION
 TabWidth:        8
-UseCRLF:         false
+TableGenBreakInsideDAGArg: DontBreak
 UseTab:          Never
+VerilogBreakBetweenInstancePorts: true
+WhitespaceSensitiveMacros:
+  - BOOST_PP_STRINGIZE
+  - CF_SWIFT_NAME
+  - NS_SWIFT_NAME
+  - PP_STRINGIZE
+  - STRINGIZE
+WrapNamespaceBodyWithEmptyLines: Always
 ...
diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 00000000..7299225d
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,24 @@
+Checks: '-*,readability-identifier-naming'
+
+CheckOptions:
+  # 1. Member Variables: camelBack with a trailing underscore (e.g., memberVariableTest_)
+  - key:             readability-identifier-naming.MemberCase
+    value:           camelBack
+  - key:             readability-identifier-naming.MemberSuffix
+    value:           _
+
+  # 2. Functions: CamelCase (e.g., CalculateTotal)
+  - key:             readability-identifier-naming.FunctionCase
+    value:           CamelCase
+
+  # 3. Local Variables/Parameters: camelBack (e.g., totalValue)
+  - key:             readability-identifier-naming.VariableCase
+    value:           camelBack
+  - key:             readability-identifier-naming.ParameterCase
+    value:           camelBack
+
+  # 4. Classes/Structs: CamelCase (e.g., MyClass)
+  - key:             readability-identifier-naming.ClassCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.StructCase
+    value:           CamelCase
\ No newline at end of file
diff --git a/apps/bsp_test_suite.cpp b/apps/bsp_test_suite.cpp
index c42f67fd..f31c1972 100644
--- a/apps/bsp_test_suite.cpp
+++ b/apps/bsp_test_suite.cpp
@@ -26,7 +26,6 @@ limitations under the License.
 using graph_t = osp::computational_dag_edge_idx_vector_impl_def_int_t;
 
 int main(int argc, char *argv[]) {
-
     osp::BspScheduleRecompTestSuiteRunner<graph_t> runner;
     return runner.run(argc, argv);
 
diff --git a/apps/coarser_plotter.cpp b/apps/coarser_plotter.cpp
index d9093b97..93cfae2c 100644
--- a/apps/coarser_plotter.cpp
+++ b/apps/coarser_plotter.cpp
@@ -35,9 +35,7 @@ int main(int argc, char *argv[]) {
     }
 
     std::string graph_file = argv[1];
-    std::string graph_name = graph_file.substr(graph_file.rfind("/") + 1,
-                                                       graph_file.rfind(".") - graph_file.rfind("/") - 1);
-
+    std::string graph_name = graph_file.substr(graph_file.rfind("/") + 1, graph_file.rfind(".") - graph_file.rfind("/") - 1);
 
     Graph_t graph;
     bool status = file_reader::readGraph(graph_file, graph);
@@ -46,8 +44,7 @@ int main(int argc, char *argv[]) {
         return 1;
     }
 
-
-    SarkarParams::MulParameters< v_workw_t<Graph_t> > params;
+    SarkarParams::MulParameters<v_workw_t<Graph_t>> params;
     params.commCostVec = std::vector<v_workw_t<Graph_t>>({1, 2, 5, 10, 20, 50, 100, 200, 500, 1000});
     params.max_num_iteration_without_changes = 3;
     params.leniency = 0.005;
@@ -63,7 +60,7 @@ int main(int argc, char *argv[]) {
 
     Graph_t graph_copy = graph;
     bool ignore_vertex_types = false;
-    
+
     if (ignore_vertex_types) {
         for (const auto &vert : graph_copy.vertices()) {
             graph_copy.set_vertex_type(vert, 0);
@@ -74,7 +71,7 @@ int main(int argc, char *argv[]) {
 
     std::vector<unsigned> colours(contraction_map.size());
     for (std::size_t i = 0; i < contraction_map.size(); ++i) {
-        colours[i] = static_cast<unsigned>( contraction_map[i] );
+        colours[i] = static_cast<unsigned>(contraction_map[i]);
     }
 
     std::ofstream out_dot(argv[2]);
@@ -86,7 +83,7 @@ int main(int argc, char *argv[]) {
     DotFileWriter writer;
     writer.write_colored_graph(out_dot, graph, colours);
 
-    if (argc >=4 ) {
+    if (argc >= 4) {
         std::ofstream coarse_out_dot(argv[3]);
         if (!coarse_out_dot.is_open()) {
             std::cout << "Unable to write/open output file.\n";
@@ -100,4 +97,4 @@ int main(int argc, char *argv[]) {
     }
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/graph_analyser.cpp b/apps/graph_analyser.cpp
index 25106519..48e4f3ee 100644
--- a/apps/graph_analyser.cpp
+++ b/apps/graph_analyser.cpp
@@ -23,11 +23,11 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
+#include "osp/auxiliary/io/general_file_reader.hpp"
 #include "osp/auxiliary/misc.hpp"
 #include "osp/graph_algorithms/directed_graph_path_util.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
-#include "osp/auxiliary/io/general_file_reader.hpp"
 
 using namespace osp;
 
@@ -151,22 +151,24 @@ int main(int argc, char *argv[]) {
                        << std::endl;
 
     for (const auto &dirEntry : std::filesystem::recursive_directory_iterator(graph_dir)) {
-        if (std::filesystem::is_directory(dirEntry))
+        if (std::filesystem::is_directory(dirEntry)) {
             continue;
+        }
 
         std::cout << "Processing: " << dirEntry << std::endl;
 
         std::string path_str = dirEntry.path();
-        
+
         ComputationalDag graph;
         bool status = file_reader::readGraph(dirEntry.path(), graph);
         if (!status) {
             std::cout << "Failed to read graph\n";
             return 1;
-        }     
+        }
 
-        if (!status)
+        if (!status) {
             continue;
+        }
 
         std::string graph_name = path_str.substr(path_str.rfind("/") + 1);
         graph_name = graph_name.substr(0, graph_name.rfind("."));
@@ -177,4 +179,4 @@ int main(int argc, char *argv[]) {
     }
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/graph_converter.cpp b/apps/graph_converter.cpp
index 6c0d50db..3ffedd4f 100644
--- a/apps/graph_converter.cpp
+++ b/apps/graph_converter.cpp
@@ -33,23 +33,19 @@ void print_usage(const char *prog_name) {
     std::cerr << "Graph Format Converter" << std::endl;
     std::cerr << "----------------------" << std::endl;
     std::cerr << "This tool converts a directed graph from one file format to another. The desired output" << std::endl;
-    std::cerr << "format is determined by the file extension of the output file." << std::endl
-              << std::endl;
+    std::cerr << "format is determined by the file extension of the output file." << std::endl << std::endl;
     std::cerr << "Usage: " << prog_name << " <input_file> <output_file>" << std::endl << std::endl;
     std::cerr << "Arguments:" << std::endl;
-    std::cerr << "  <input_file>   Path to the input graph file." << std::endl
-              << std::endl;
+    std::cerr << "  <input_file>   Path to the input graph file." << std::endl << std::endl;
     std::cerr << "  <output_file>  Path for the output graph file. Special values of '.dot' or '.hdag' can be" << std::endl;
     std::cerr << "                 used to automatically generate the output filename by replacing the input" << std::endl;
     std::cerr << "                 file's extension with the specified one." << std::endl;
     std::cerr << std::endl;
     std::cerr << "Supported Formats:" << std::endl;
     std::cerr << "  Input (by extension):  .hdag, .mtx, .dot" << std::endl;
-    std::cerr << "  Output (by extension): .hdag, .dot" << std::endl
-              << std::endl;
+    std::cerr << "  Output (by extension): .hdag, .dot" << std::endl << std::endl;
     std::cerr << "The .hdag format is the HyperdagDB format. A detailed description can be found at:" << std::endl;
-    std::cerr << "https://github.com/Algebraic-Programming/HyperDAG_DB" << std::endl
-              << std::endl;
+    std::cerr << "https://github.com/Algebraic-Programming/HyperDAG_DB" << std::endl << std::endl;
     std::cerr << "Examples:" << std::endl;
     std::cerr << "  " << prog_name << " my_graph.mtx my_graph.hdag" << std::endl;
     std::cerr << "  " << prog_name << " my_graph.hdag my_graph.dot" << std::endl;
@@ -98,8 +94,8 @@ int main(int argc, char *argv[]) {
         return 1;
     }
 
-    std::cout << "Successfully read graph with " << graph.num_vertices() << " vertices and " << graph.num_edges()
-              << " edges." << std::endl;
+    std::cout << "Successfully read graph with " << graph.num_vertices() << " vertices and " << graph.num_edges() << " edges."
+              << std::endl;
 
     std::filesystem::path output_path(output_filename);
     std::string output_ext = output_path.extension().string();
@@ -118,4 +114,4 @@ int main(int argc, char *argv[]) {
     std::cout << "Successfully wrote graph to " << output_filename << std::endl;
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/graph_generator/gen_Erdos-Renyi_graph.cpp b/apps/graph_generator/gen_Erdos-Renyi_graph.cpp
index 5a429624..7c816302 100644
--- a/apps/graph_generator/gen_Erdos-Renyi_graph.cpp
+++ b/apps/graph_generator/gen_Erdos-Renyi_graph.cpp
@@ -16,15 +16,15 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-#include "osp/auxiliary/misc.hpp"
-#include "osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-
 #include <fstream>
 #include <iostream>
 #include <random>
 #include <string>
 
+#include "osp/auxiliary/misc.hpp"
+#include "osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+
 using namespace osp;
 
 using ComputationalDag = computational_dag_vector_impl_def_int_t;
@@ -32,8 +32,7 @@ using VertexType = vertex_idx_t<ComputationalDag>;
 
 int main(int argc, char *argv[]) {
     if (argc < 3) {
-        std::cerr << "Usage: " << argv[0]
-                  << " <number of vertices> <expected outdegree> (optional:) <number of graphs>\n"
+        std::cerr << "Usage: " << argv[0] << " <number of vertices> <expected outdegree> (optional:) <number of graphs>\n"
                   << std::endl;
         return 1;
     }
@@ -97,19 +96,18 @@ int main(int argc, char *argv[]) {
         std::ofstream graph_write;
         graph_write.open(graph_name);
         graph_write << header;
-        graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " " +
-                           std::to_string(graph.num_edges() + graph.num_vertices()) + "\n";
+        graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " "
+                           + std::to_string(graph.num_edges() + graph.num_vertices()) + "\n";
         for (VertexType i = 0; i < num_vert; i++) {
             double val = (1 - 2 * randInt(2)) * std::exp(unif_log(re));
             graph_write << std::to_string(i + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + "\n";
             for (const auto &chld : graph.children(i)) {
                 val = unif(re);
-                graph_write << std::to_string(chld + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) +
-                                   "\n";
+                graph_write << std::to_string(chld + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + "\n";
             }
         }
         graph_write.close();
     }
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/graph_generator/gen_near_diag_random_graph.cpp b/apps/graph_generator/gen_near_diag_random_graph.cpp
index 30e4fb2e..ede87c5c 100644
--- a/apps/graph_generator/gen_near_diag_random_graph.cpp
+++ b/apps/graph_generator/gen_near_diag_random_graph.cpp
@@ -16,15 +16,15 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-#include "osp/auxiliary/misc.hpp"
-#include "osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-
 #include <fstream>
 #include <iostream>
 #include <random>
 #include <string>
 
+#include "osp/auxiliary/misc.hpp"
+#include "osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+
 using namespace osp;
 
 using ComputationalDag = computational_dag_vector_impl_def_int_t;
@@ -32,8 +32,7 @@ using VertexType = vertex_idx_t<ComputationalDag>;
 
 int main(int argc, char *argv[]) {
     if (argc < 4) {
-        std::cerr << "Usage: " << argv[0]
-                  << " <number of vertices> <probability> <bandwidth> (optional:) <number of graphs>\n"
+        std::cerr << "Usage: " << argv[0] << " <number of vertices> <probability> <bandwidth> (optional:) <number of graphs>\n"
                   << std::endl;
         return 1;
     }
@@ -103,19 +102,18 @@ int main(int argc, char *argv[]) {
         std::ofstream graph_write;
         graph_write.open(graph_name);
         graph_write << header;
-        graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " " +
-                           std::to_string(graph.num_edges() + graph.num_vertices()) + "\n";
+        graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " "
+                           + std::to_string(graph.num_edges() + graph.num_vertices()) + "\n";
         for (VertexType j = 0; j < num_vert; j++) {
             double val = (1 - 2 * randInt(2)) * std::exp(unif_log(re));
             graph_write << std::to_string(j + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + "\n";
             for (const auto &chld : graph.children(j)) {
                 val = unif(re);
-                graph_write << std::to_string(chld + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) +
-                                   "\n";
+                graph_write << std::to_string(chld + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + "\n";
             }
         }
         graph_write.close();
     }
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/graph_generator/post_incomplete_cholesky.cpp b/apps/graph_generator/post_incomplete_cholesky.cpp
index ccc4f0d2..757b569d 100644
--- a/apps/graph_generator/post_incomplete_cholesky.cpp
+++ b/apps/graph_generator/post_incomplete_cholesky.cpp
@@ -15,16 +15,15 @@ limitations under the License.
 
 @author Christos Matzoros, Toni Boehnlein, Pal Andras Papp, Raphael S. Steiner
 */
+#include <Eigen/Core>
+#include <Eigen/Dense>
+#include <Eigen/IterativeLinearSolvers>
+#include <Eigen/OrderingMethods>
+#include <Eigen/SparseCore>
 #include <filesystem>
 #include <string>
-#include <vector>
-
-#include <Eigen/SparseCore>
-#include <Eigen/IterativeLinearSolvers>
 #include <unsupported/Eigen/SparseExtra>
-#include <Eigen/Dense>
-#include <Eigen/Core>
-#include <Eigen/OrderingMethods>
+#include <vector>
 
 int main(int argc, char *argv[]) {
     if (argc < 2) {
@@ -38,21 +37,22 @@ int main(int argc, char *argv[]) {
     name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
     std::cout << "Graph: " << name_graph << std::endl;
-    
-    using SM_csc = Eigen::SparseMatrix<double, Eigen::ColMajor, int32_t>; // Compressed Sparse Column format
-    using SM_csr = Eigen::SparseMatrix<double, Eigen::RowMajor, int32_t>; // Compressed Sparse Row format
 
-    SM_csc L_csc; // Initialize a sparse matrix in CSC format
+    using SM_csc = Eigen::SparseMatrix<double, Eigen::ColMajor, int32_t>;    // Compressed Sparse Column format
+    using SM_csr = Eigen::SparseMatrix<double, Eigen::RowMajor, int32_t>;    // Compressed Sparse Row format
+
+    SM_csc L_csc;    // Initialize a sparse matrix in CSC format
 
     Eigen::loadMarket(L_csc, filename_graph);
 
-    SM_csr L_csr = L_csc;   // Reformat the sparse matrix from CSC to CSR format
+    SM_csr L_csr = L_csc;    // Reformat the sparse matrix from CSC to CSR format
 
     Eigen::IncompleteCholesky<double, Eigen::Lower, Eigen::AMDOrdering<int>> ichol(L_csc);
 
     SM_csc LChol_csc = ichol.matrixL();
 
-    Eigen::saveMarket(LChol_csc, filename_graph.substr(0, filename_graph.find_last_of(".")) + "_postChol.mtx", Eigen::UpLoType::Symmetric);
+    Eigen::saveMarket(
+        LChol_csc, filename_graph.substr(0, filename_graph.find_last_of(".")) + "_postChol.mtx", Eigen::UpLoType::Symmetric);
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/ilp_bsp_scheduler.cpp b/apps/ilp_bsp_scheduler.cpp
index d0c44e37..90fe30f7 100644
--- a/apps/ilp_bsp_scheduler.cpp
+++ b/apps/ilp_bsp_scheduler.cpp
@@ -23,14 +23,14 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "osp/auxiliary/misc.hpp"
-#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp"
-#include "osp/graph_algorithms/directed_graph_path_util.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/auxiliary/io/arch_file_reader.hpp"
 #include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
 #include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/misc.hpp"
+#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp"
+#include "osp/graph_algorithms/directed_graph_path_util.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 
 using namespace osp;
 
@@ -38,8 +38,7 @@ using ComputationalDag = computational_dag_edge_idx_vector_impl_def_int_t;
 
 int main(int argc, char *argv[]) {
     if (argc < 4) {
-        std::cerr << "Usage: " << argv[0] << " <input_file> <machine_file> <max_number_step> <optional:recomp>"
-                  << std::endl;
+        std::cerr << "Usage: " << argv[0] << " <input_file> <machine_file> <max_number_step> <optional:recomp>" << std::endl;
         return 1;
     }
 
@@ -78,7 +77,6 @@ int main(int argc, char *argv[]) {
     // instance.getArchitecture().setProcessorsWithTypes({0,0,1,1,1,1});
 
     if (!status_graph || !status_arch) {
-
         std::cout << "Reading files failed." << std::endl;
         return 1;
     }
@@ -90,23 +88,20 @@ int main(int argc, char *argv[]) {
 
     CoptFullScheduler<ComputationalDag> scheduler;
     scheduler.setMaxNumberOfSupersteps(steps);
-    
-    if (recomp) {
 
+    if (recomp) {
         BspScheduleRecomp<ComputationalDag> schedule(instance);
 
         auto status_schedule = scheduler.computeScheduleRecomp(schedule);
 
         if (status_schedule == RETURN_STATUS::OSP_SUCCESS || status_schedule == RETURN_STATUS::BEST_FOUND) {
-
             DotFileWriter dot_writer;
-            dot_writer.write_schedule_recomp(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" +
-                                                 scheduler.getScheduleName() + "_recomp_schedule.dot",
+            dot_writer.write_schedule_recomp(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_"
+                                                 + scheduler.getScheduleName() + "_recomp_schedule.dot",
                                              schedule);
 
-            dot_writer.write_schedule_recomp_duplicate(name_graph + "_" + name_machine + "_maxS_" +
-                                                           std::to_string(steps) + "_" + scheduler.getScheduleName() +
-                                                           "_duplicate_recomp_schedule.dot",
+            dot_writer.write_schedule_recomp_duplicate(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_"
+                                                           + scheduler.getScheduleName() + "_duplicate_recomp_schedule.dot",
                                                        schedule);
 
             std::cout << "Recomp Schedule computed with costs: " << schedule.computeCosts() << std::endl;
@@ -117,16 +112,14 @@ int main(int argc, char *argv[]) {
         }
 
     } else {
-
         BspSchedule<ComputationalDag> schedule(instance);
 
         auto status_schedule = scheduler.computeSchedule(schedule);
 
         if (status_schedule == RETURN_STATUS::OSP_SUCCESS || status_schedule == RETURN_STATUS::BEST_FOUND) {
-
             DotFileWriter dot_writer;
-            dot_writer.write_schedule(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" +
-                                          scheduler.getScheduleName() + "_schedule.dot",
+            dot_writer.write_schedule(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_"
+                                          + scheduler.getScheduleName() + "_schedule.dot",
                                       schedule);
 
             std::cout << "Schedule computed with costs: " << schedule.computeCosts() << std::endl;
diff --git a/apps/ilp_hypergraph_partitioner.cpp b/apps/ilp_hypergraph_partitioner.cpp
index fd184358..78b02a47 100644
--- a/apps/ilp_hypergraph_partitioner.cpp
+++ b/apps/ilp_hypergraph_partitioner.cpp
@@ -23,18 +23,17 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
+#include "osp/auxiliary/io/mtx_hypergraph_file_reader.hpp"
+#include "osp/auxiliary/io/partitioning_file_writer.hpp"
 #include "osp/auxiliary/misc.hpp"
 #include "osp/graph_algorithms/directed_graph_path_util.hpp"
-#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/partitioning/model/hypergraph_utility.hpp"
 #include "osp/partitioning/partitioners/generic_FM.hpp"
 #include "osp/partitioning/partitioners/partitioning_ILP.hpp"
 #include "osp/partitioning/partitioners/partitioning_ILP_replication.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "osp/auxiliary/io/mtx_hypergraph_file_reader.hpp"
-#include "osp/auxiliary/io/partitioning_file_writer.hpp"
-    
 
 using namespace osp;
 
@@ -43,8 +42,7 @@ using hypergraph = Hypergraph_def_t;
 
 int main(int argc, char *argv[]) {
     if (argc < 4) {
-        std::cerr << "Usage: " << argv[0] << " <input_file> <nr_parts> <imbalance> <optional:part_repl|full_repl>"
-                  << std::endl;
+        std::cerr << "Usage: " << argv[0] << " <input_file> <nr_parts> <imbalance> <optional:part_repl|full_repl>" << std::endl;
         return 1;
     }
 
@@ -83,12 +81,13 @@ int main(int argc, char *argv[]) {
 
     PartitioningProblem<hypergraph> instance;
 
-    bool file_status = true;    
+    bool file_status = true;
     if (file_ending == "hdag") {
         graph dag;
         file_status = file_reader::readComputationalDagHyperdagFormatDB(filename_hgraph, dag);
-        if(file_status)
+        if (file_status) {
             instance.getHypergraph() = convert_from_cdag_as_hyperdag<hypergraph, graph>(dag);
+        }
     } else if (file_ending == "mtx") {
         file_status = file_reader::readHypergraphMartixMarketFormat(filename_hgraph, instance.getHypergraph());
     } else {
@@ -96,7 +95,6 @@ int main(int argc, char *argv[]) {
         return 1;
     }
     if (!file_status) {
-
         std::cout << "Reading input file failed." << std::endl;
         return 1;
     }
@@ -106,55 +104,65 @@ int main(int argc, char *argv[]) {
 
     Partitioning<hypergraph> initial_partition(instance);
     GenericFM<hypergraph> fm;
-    for(size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node)
+    for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) {
         initial_partition.setAssignedPartition(node, static_cast<unsigned>(node % static_cast<size_t>(nr_parts)));
-    if(nr_parts == 2)
+    }
+    if (nr_parts == 2) {
         fm.ImprovePartitioning(initial_partition);
-    if(nr_parts == 4 || nr_parts == 8 || nr_parts == 16 || nr_parts == 32)
+    }
+    if (nr_parts == 4 || nr_parts == 8 || nr_parts == 16 || nr_parts == 32) {
         fm.RecursiveFM(initial_partition);
+    }
 
     if (replicate > 0) {
-
         PartitioningWithReplication<hypergraph> partition(instance);
         HypergraphPartitioningILPWithReplication<hypergraph> partitioner;
 
-        for(size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node)
+        for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) {
             partition.setAssignedPartitions(node, {initial_partition.assignedPartition(node)});
-        if(partition.satisfiesBalanceConstraint())
+        }
+        if (partition.satisfiesBalanceConstraint()) {
             partitioner.setUseInitialSolution(true);
+        }
 
         partitioner.setTimeLimitSeconds(600);
-        if(replicate == 2)
-            partitioner.setReplicationModel(HypergraphPartitioningILPWithReplication<hypergraph>::REPLICATION_MODEL_IN_ILP::GENERAL);
+        if (replicate == 2) {
+            partitioner.setReplicationModel(
+                HypergraphPartitioningILPWithReplication<hypergraph>::REPLICATION_MODEL_IN_ILP::GENERAL);
+        }
 
         auto solve_status = partitioner.computePartitioning(partition);
 
         if (solve_status == RETURN_STATUS::OSP_SUCCESS || solve_status == RETURN_STATUS::BEST_FOUND) {
-            file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) +
-                "_ILP_rep" + std::to_string(replicate) + ".txt", partition);
-            std::cout << "Partitioning (with replicaiton) computed with costs: " << partition.computeConnectivityCost() << std::endl;
+            file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) + "_ILP_rep"
+                                       + std::to_string(replicate) + ".txt",
+                                   partition);
+            std::cout << "Partitioning (with replicaiton) computed with costs: " << partition.computeConnectivityCost()
+                      << std::endl;
         } else {
             std::cout << "Computing partition failed." << std::endl;
             return 1;
         }
 
     } else {
-
         Partitioning<hypergraph> partition(instance);
         HypergraphPartitioningILP<hypergraph> partitioner;
 
-        for(size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node)
+        for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) {
             partition.setAssignedPartition(node, initial_partition.assignedPartition(node));
-        if(partition.satisfiesBalanceConstraint())
+        }
+        if (partition.satisfiesBalanceConstraint()) {
             partitioner.setUseInitialSolution(true);
+        }
 
         partitioner.setTimeLimitSeconds(600);
 
         auto solve_status = partitioner.computePartitioning(partition);
 
         if (solve_status == RETURN_STATUS::OSP_SUCCESS || solve_status == RETURN_STATUS::BEST_FOUND) {
-            file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) +
-                "_ILP_rep" + std::to_string(replicate) + ".txt", partition);
+            file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) + "_ILP_rep"
+                                       + std::to_string(replicate) + ".txt",
+                                   partition);
             std::cout << "Partitioning computed with costs: " << partition.computeConnectivityCost() << std::endl;
         } else {
             std::cout << "Computing partition failed." << std::endl;
diff --git a/apps/osp.cpp b/apps/osp.cpp
index 7ea2b0de..7c66224b 100644
--- a/apps/osp.cpp
+++ b/apps/osp.cpp
@@ -16,7 +16,6 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-#include "boost/log/utility/setup.hpp"
 #include <boost/graph/graphviz.hpp>
 #include <boost/log/trivial.hpp>
 #include <boost/property_tree/json_parser.hpp>
@@ -26,13 +25,14 @@ limitations under the License.
 #include <string>
 #include <tuple>
 
-#include "osp/auxiliary/misc.hpp"
-#include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "boost/log/utility/setup.hpp"
 #include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/general_file_reader.hpp"
 #include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
+#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/misc.hpp"
+#include "osp/bsp/model/BspSchedule.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "test_suite_runner/ConfigParser.hpp"
 #include "test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp"
 
@@ -45,7 +45,6 @@ std::filesystem::path getExecutablePath() { return std::filesystem::canonical("/
 
 // invoked upon program call
 int main(int argc, char *argv[]) {
-
     ConfigParser parser(getExecutablePath().remove_filename().string() += "osp_config.json");
 
     try {
@@ -56,17 +55,16 @@ int main(int argc, char *argv[]) {
     }
 
     for (auto &instance : parser.instances) {
-
         BspInstance<graph_t> bsp_instance;
 
         std::string filename_graph = instance.second.get_child("graphFile").get_value<std::string>();
-        std::string name_graph = filename_graph.substr(filename_graph.rfind("/") + 1,
-                                                       filename_graph.rfind(".") - filename_graph.rfind("/") - 1);
+        std::string name_graph
+            = filename_graph.substr(filename_graph.rfind("/") + 1, filename_graph.rfind(".") - filename_graph.rfind("/") - 1);
 
         std::string filename_machine = instance.second.get_child("machineParamsFile").get_value<std::string>();
 
-        std::string name_machine = filename_machine.substr(
-            filename_machine.rfind("/") + 1, filename_machine.rfind(".") - filename_machine.rfind("/") - 1);
+        std::string name_machine = filename_machine.substr(filename_machine.rfind("/") + 1,
+                                                           filename_machine.rfind(".") - filename_machine.rfind("/") - 1);
 
         bool status_architecture = file_reader::readBspArchitecture(filename_machine, bsp_instance.getArchitecture());
 
@@ -75,7 +73,7 @@ int main(int argc, char *argv[]) {
             continue;
         }
 
-        bool status_graph = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag()); 
+        bool status_graph = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag());
         if (!status_graph) {
             std::cerr << "Reading graph files " + filename_graph << " failed." << std::endl;
             continue;
@@ -93,7 +91,6 @@ int main(int argc, char *argv[]) {
 
         size_t algorithm_counter = 0;
         for (auto &algorithm : parser.scheduler) {
-
             schedulers_name[algorithm_counter] = algorithm.second.get_child("name").get_value<std::string>();
 
             const auto start_time = std::chrono::high_resolution_clock::now();
@@ -105,48 +102,45 @@ int main(int argc, char *argv[]) {
                 return_status = run_bsp_scheduler(parser, algorithm.second, schedule);
             } catch (...) {
                 schedulers_failed[algorithm_counter] = true;
-                std::cerr << "Error during execution of Scheduler " +
-                                 algorithm.second.get_child("name").get_value<std::string>() + "."
+                std::cerr << "Error during execution of Scheduler " + algorithm.second.get_child("name").get_value<std::string>()
+                                 + "."
                           << std::endl;
                 continue;
             }
 
             const auto finish_time = std::chrono::high_resolution_clock::now();
 
-            schedulers_compute_time[algorithm_counter] =
-                std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count();
+            schedulers_compute_time[algorithm_counter]
+                = std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count();
 
             if (return_status != RETURN_STATUS::OSP_SUCCESS && return_status != RETURN_STATUS::BEST_FOUND) {
-
                 schedulers_failed[algorithm_counter] = true;
                 if (return_status == RETURN_STATUS::ERROR) {
-                    std::cerr << "Error while computing schedule " +
-                                     algorithm.second.get_child("name").get_value<std::string>() + "."
+                    std::cerr << "Error while computing schedule " + algorithm.second.get_child("name").get_value<std::string>()
+                                     + "."
                               << std::endl;
                 } else if (return_status == RETURN_STATUS::TIMEOUT) {
-                    std::cerr << "Timeout while computing schedule " +
-                                     algorithm.second.get_child("name").get_value<std::string>() + "."
+                    std::cerr << "Timeout while computing schedule " + algorithm.second.get_child("name").get_value<std::string>()
+                                     + "."
                               << std::endl;
                 } else {
-                    std::cerr << "Unknown return status while computing schedule " +
-                                     algorithm.second.get_child("name").get_value<std::string>() + "."
+                    std::cerr << "Unknown return status while computing schedule "
+                                     + algorithm.second.get_child("name").get_value<std::string>() + "."
                               << std::endl;
                 }
             } else {
-
                 schedulers_costs[algorithm_counter] = BspScheduleCS<graph_t>(schedule).computeCosts();
                 schedulers_work_costs[algorithm_counter] = schedule.computeWorkCosts();
                 schedulers_supersteps[algorithm_counter] = schedule.numberOfSupersteps();
 
                 if (parser.global_params.get_child("outputSchedule").get_value<bool>()) {
                     try {
-
-                        file_writer::write_txt(name_graph + "_" + name_machine + "_" +
-                                               algorithm.second.get_child("name").get_value<std::string>() +
-                                               "_schedule.txt", schedule);
+                        file_writer::write_txt(name_graph + "_" + name_machine + "_"
+                                                   + algorithm.second.get_child("name").get_value<std::string>() + "_schedule.txt",
+                                               schedule);
                     } catch (std::exception &e) {
-                        std::cerr << "Writing schedule file for " + name_graph + ", " + name_machine + ", " +
-                                         schedulers_name[algorithm_counter] + " has failed."
+                        std::cerr << "Writing schedule file for " + name_graph + ", " + name_machine + ", "
+                                         + schedulers_name[algorithm_counter] + " has failed."
                                   << std::endl;
                         std::cerr << e.what() << std::endl;
                     }
@@ -154,12 +148,13 @@ int main(int argc, char *argv[]) {
 
                 if (parser.global_params.get_child("outputSankeySchedule").get_value<bool>()) {
                     try {
-                       file_writer::write_sankey(name_graph + "_" + name_machine + "_" +
-                                                  algorithm.second.get_child("name").get_value<std::string>() +
-                                                  "_sankey.sankey", BspScheduleCS<graph_t>(schedule));
+                        file_writer::write_sankey(name_graph + "_" + name_machine + "_"
+                                                      + algorithm.second.get_child("name").get_value<std::string>()
+                                                      + "_sankey.sankey",
+                                                  BspScheduleCS<graph_t>(schedule));
                     } catch (std::exception &e) {
-                        std::cerr << "Writing sankey file for " + name_graph + ", " + name_machine + ", " +
-                                         schedulers_name[algorithm_counter] + " has failed."
+                        std::cerr << "Writing sankey file for " + name_graph + ", " + name_machine + ", "
+                                         + schedulers_name[algorithm_counter] + " has failed."
                                   << std::endl;
                         std::cerr << e.what() << std::endl;
                     }
@@ -167,16 +162,15 @@ int main(int argc, char *argv[]) {
 
                 if (parser.global_params.get_child("outputDotSchedule").get_value<bool>()) {
                     try {
-
                         DotFileWriter sched_writer;
-                        sched_writer.write_schedule(name_graph + "_" + name_machine + "_" +
-                                                        algorithm.second.get_child("name").get_value<std::string>() +
-                                                        "_schedule.dot",
+                        sched_writer.write_schedule(name_graph + "_" + name_machine + "_"
+                                                        + algorithm.second.get_child("name").get_value<std::string>()
+                                                        + "_schedule.dot",
                                                     schedule);
 
                     } catch (std::exception &e) {
-                        std::cerr << "Writing dot file for " + name_graph + ", " + name_machine + ", " +
-                                         schedulers_name[algorithm_counter] + " has failed."
+                        std::cerr << "Writing dot file for " + name_graph + ", " + name_machine + ", "
+                                         + schedulers_name[algorithm_counter] + " has failed."
                                   << std::endl;
                         std::cerr << e.what() << std::endl;
                     }
@@ -188,8 +182,9 @@ int main(int argc, char *argv[]) {
 
         int tw = 1, ww = 1, cw = 1, nsw = 1, ct = 1;
         for (size_t i = 0; i < parser.scheduler.size(); i++) {
-            if (schedulers_failed[i])
+            if (schedulers_failed[i]) {
                 continue;
+            }
             tw = std::max(tw, 1 + int(std::log10(schedulers_costs[i])));
             ww = std::max(ww, 1 + int(std::log10(schedulers_work_costs[i])));
             cw = std::max(cw, 1 + int(std::log10(schedulers_costs[i] - schedulers_work_costs[i])));
@@ -200,8 +195,8 @@ int main(int argc, char *argv[]) {
         std::vector<size_t> ordering = sorting_arrangement(schedulers_costs);
 
         std::cout << std::endl << name_graph << " - " << name_machine << std::endl;
-        std::cout << "Number of Vertices: " + std::to_string(bsp_instance.getComputationalDag().num_vertices()) +
-                         "  Number of Edges: " + std::to_string(bsp_instance.getComputationalDag().num_edges())
+        std::cout << "Number of Vertices: " + std::to_string(bsp_instance.getComputationalDag().num_vertices())
+                         + "  Number of Edges: " + std::to_string(bsp_instance.getComputationalDag().num_edges())
                   << std::endl;
         for (size_t j = 0; j < parser.scheduler.size(); j++) {
             size_t i = j;
@@ -213,8 +208,7 @@ int main(int argc, char *argv[]) {
             } else {
                 std::cout << "total costs:  " << std::right << std::setw(tw) << schedulers_costs[i]
                           << "     work costs:  " << std::right << std::setw(ww) << schedulers_work_costs[i]
-                          << "     comm costs:  " << std::right << std::setw(cw)
-                          << schedulers_costs[i] - schedulers_work_costs[i]
+                          << "     comm costs:  " << std::right << std::setw(cw) << schedulers_costs[i] - schedulers_work_costs[i]
                           << "     number of supersteps:  " << std::right << std::setw(nsw) << schedulers_supersteps[i]
                           << "     compute time:  " << std::right << std::setw(ct) << schedulers_compute_time[i] << "ms"
                           << "     scheduler:  " << schedulers_name[i] << std::endl;
diff --git a/apps/osp_turnus.cpp b/apps/osp_turnus.cpp
index c8ba01f2..ea5f114d 100644
--- a/apps/osp_turnus.cpp
+++ b/apps/osp_turnus.cpp
@@ -20,12 +20,12 @@ limitations under the License.
 #include <iostream>
 #include <string>
 
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/auxiliary/io/general_file_reader.hpp"
 #include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
+#include "osp/auxiliary/io/general_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 
 using namespace osp;
 
@@ -34,7 +34,6 @@ using mem_constr = persistent_transient_memory_constraint<graph_t>;
 
 // invoked upon program call
 int main(int argc, char *argv[]) {
-
     if (argc != 5) {
         std::cout << "Usage: " << argv[0] << " <input_file_dag> <num_proc> <memory_bound> <algorithm>" << std::endl;
         std::cout << "Available algorithms: bsp, etf, variance" << std::endl;
@@ -63,25 +62,21 @@ int main(int argc, char *argv[]) {
         return 1;
     }
 
-    boost::algorithm::to_lower(algorithm_name); // modifies str
+    boost::algorithm::to_lower(algorithm_name);    // modifies str
 
     BspSchedule<graph_t> bsp_schedule(bsp_instance);
     Scheduler<graph_t> *scheduler = nullptr;
 
     if (algorithm_name == "bsp") {
-
         float max_percent_idle_processors = 0.2f;
         bool increase_parallelism_in_new_superstep = true;
 
-        scheduler = new GreedyBspScheduler<graph_t, mem_constr>(
-            max_percent_idle_processors, increase_parallelism_in_new_superstep);
+        scheduler = new GreedyBspScheduler<graph_t, mem_constr>(max_percent_idle_processors, increase_parallelism_in_new_superstep);
 
     } else if (algorithm_name == "etf") {
-
         scheduler = new EtfScheduler<graph_t, mem_constr>(BL_EST);
 
     } else if (algorithm_name == "variance") {
-
         const double max_percent_idle_processors = 0.0;
         const bool increase_parallelism_in_new_superstep = true;
         const double variance_power = 6.0;
@@ -91,10 +86,15 @@ int main(int argc, char *argv[]) {
         const float bound_component_weight_percent = 4.0f;
         const float slack = 0.0f;
 
-        scheduler = new LightEdgeVariancePartitioner<graph_t, flat_spline_interpolation,mem_constr>(
-            max_percent_idle_processors, variance_power, heavy_is_x_times_median, min_percent_components_retained,
-            bound_component_weight_percent, increase_parallelism_in_new_superstep, 
-            max_priority_difference_percent, slack);
+        scheduler = new LightEdgeVariancePartitioner<graph_t, flat_spline_interpolation, mem_constr>(
+            max_percent_idle_processors,
+            variance_power,
+            heavy_is_x_times_median,
+            min_percent_components_retained,
+            bound_component_weight_percent,
+            increase_parallelism_in_new_superstep,
+            max_priority_difference_percent,
+            slack);
 
     } else {
         std::cout << "Unknown algorithm: " << algorithm_name << std::endl;
diff --git a/apps/sptrsv_test_suite.cpp b/apps/sptrsv_test_suite.cpp
index c065e32b..47643a9b 100644
--- a/apps/sptrsv_test_suite.cpp
+++ b/apps/sptrsv_test_suite.cpp
@@ -19,6 +19,7 @@ limitations under the License.
 #include <filesystem>
 #include <iostream>
 #include <string>
+
 #include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
 #include "test_suite_runner/BspScheduleTestSuiteRunner.hpp"
 
@@ -27,11 +28,10 @@ limitations under the License.
 using graph_t = osp::sparse_matrix_graph_int32_t;
 
 int main(int argc, char *argv[]) {
-
     osp::BspScheduleTestSuiteRunner<graph_t> runner;
     return runner.run(argc, argv);
 
     return 0;
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
index f023f937..01d07714 100644
--- a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
+++ b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
@@ -18,6 +18,8 @@ limitations under the License.
 
 #pragma once
 
+#include <boost/property_tree/json_parser.hpp>
+#include <boost/property_tree/ptree.hpp>
 #include <chrono>
 #include <filesystem>
 #include <fstream>
@@ -34,23 +36,21 @@ limitations under the License.
 #include "osp/auxiliary/io/general_file_reader.hpp"
 #include "osp/auxiliary/return_status.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
-#include <boost/property_tree/json_parser.hpp>
-#include <boost/property_tree/ptree.hpp>
 
 // #define EIGEN_FOUND 1
 
 #ifdef EIGEN_FOUND
-#include <Eigen/Sparse>
-#include <unsupported/Eigen/SparseExtra>
+#    include <Eigen/Sparse>
+#    include <unsupported/Eigen/SparseExtra>
 
-#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
+#    include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
 #endif
 
 namespace osp {
 
 namespace pt = boost::property_tree;
 
-template<typename TargetObjectType, typename GraphType>
+template <typename TargetObjectType, typename GraphType>
 class AbstractTestSuiteRunner {
   protected:
     std::string executable_dir;
@@ -60,8 +60,7 @@ class AbstractTestSuiteRunner {
     std::vector<std::string> all_csv_headers;
     std::vector<std::unique_ptr<IStatisticModule<TargetObjectType>>> active_stats_modules;
 
-    std::string graph_dir_path, machine_dir_path, output_target_object_dir_path, log_file_path,
-        statistics_output_file_path;
+    std::string graph_dir_path, machine_dir_path, output_target_object_dir_path, log_file_path, statistics_output_file_path;
     bool write_target_object_to_file = false;
     unsigned time_limit_seconds = 0;
 
@@ -71,35 +70,38 @@ class AbstractTestSuiteRunner {
         try {
             executable_dir = getExecutablePath().remove_filename().string();
             time_limit_seconds = parser.global_params.get_child("timeLimit").get_value<unsigned>();
-            write_target_object_to_file =
-                parser.global_params.get_child("outputSchedule").get_value_optional<bool>().value_or(false);
+            write_target_object_to_file
+                = parser.global_params.get_child("outputSchedule").get_value_optional<bool>().value_or(false);
 
             graph_dir_path = parser.global_params.get_child("graphDirectory").get_value<std::string>();
-            if (graph_dir_path.substr(0, 1) != "/")
+            if (graph_dir_path.substr(0, 1) != "/") {
                 graph_dir_path = executable_dir + graph_dir_path;
+            }
 
             machine_dir_path = parser.global_params.get_child("archDirectory").get_value<std::string>();
-            if (machine_dir_path.substr(0, 1) != "/")
+            if (machine_dir_path.substr(0, 1) != "/") {
                 machine_dir_path = executable_dir + machine_dir_path;
+            }
 
             if (write_target_object_to_file) {
-                output_target_object_dir_path = parser.global_params.get_child("scheduleDirectory")
-                                                    .get_value<std::string>();
-                if (output_target_object_dir_path.substr(0, 1) != "/")
+                output_target_object_dir_path = parser.global_params.get_child("scheduleDirectory").get_value<std::string>();
+                if (output_target_object_dir_path.substr(0, 1) != "/") {
                     output_target_object_dir_path = executable_dir + output_target_object_dir_path;
+                }
                 if (!output_target_object_dir_path.empty() && !std::filesystem::exists(output_target_object_dir_path)) {
                     std::filesystem::create_directories(output_target_object_dir_path);
                 }
             }
 
             log_file_path = parser.global_params.get_child("outputLogFile").get_value<std::string>();
-            if (log_file_path.substr(0, 1) != "/")
+            if (log_file_path.substr(0, 1) != "/") {
                 log_file_path = executable_dir + log_file_path;
+            }
 
-            statistics_output_file_path =
-                parser.global_params.get_child("outputStatsFile").get_value<std::string>();
-            if (statistics_output_file_path.substr(0, 1) != "/")
+            statistics_output_file_path = parser.global_params.get_child("outputStatsFile").get_value<std::string>();
+            if (statistics_output_file_path.substr(0, 1) != "/") {
                 statistics_output_file_path = executable_dir + statistics_output_file_path;
+            }
 
             return true;
         } catch (const std::exception &e) {
@@ -130,8 +132,7 @@ class AbstractTestSuiteRunner {
             }
         }
 
-        all_csv_headers.insert(all_csv_headers.end(), unique_module_metric_headers.begin(),
-                               unique_module_metric_headers.end());
+        all_csv_headers.insert(all_csv_headers.end(), unique_module_metric_headers.begin(), unique_module_metric_headers.end());
 
         std::filesystem::path stats_p(statistics_output_file_path);
         if (stats_p.has_parent_path() && !std::filesystem::exists(stats_p.parent_path())) {
@@ -155,10 +156,8 @@ class AbstractTestSuiteRunner {
 
         stats_out_stream.open(statistics_output_file_path, std::ios_base::app);
         if (!stats_out_stream.is_open()) {
-            log_stream << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path
-                       << std::endl;
-            std::cerr << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path
-                      << std::endl;
+            log_stream << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path << std::endl;
+            std::cerr << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path << std::endl;
         } else if (!file_exists_and_has_header) {
             for (size_t i = 0; i < all_csv_headers.size(); ++i) {
                 stats_out_stream << all_csv_headers[i] << (i == all_csv_headers.size() - 1 ? "" : ",");
@@ -168,28 +167,30 @@ class AbstractTestSuiteRunner {
         }
     }
 
-    virtual RETURN_STATUS compute_target_object_impl(const BspInstance<GraphType> &instance, std::unique_ptr<TargetObjectType> &target_object,
+    virtual RETURN_STATUS compute_target_object_impl(const BspInstance<GraphType> &instance,
+                                                     std::unique_ptr<TargetObjectType> &target_object,
                                                      const pt::ptree &algo_config,
-                                                     long long &computation_time_ms) = 0;
+                                                     long long &computation_time_ms)
+        = 0;
 
     virtual void create_and_register_statistic_modules(const std::string &module_name) = 0;
 
-    virtual void write_target_object_hook(const TargetObjectType &, const std::string &, const std::string &,
-                                          const std::string &) {
-    } // default in case TargetObjectType cannot be written to file
+    virtual void write_target_object_hook(const TargetObjectType &, const std::string &, const std::string &, const std::string &) {
+    }    // default in case TargetObjectType cannot be written to file
 
   public:
     AbstractTestSuiteRunner() {}
 
     virtual ~AbstractTestSuiteRunner() {
-        if (log_stream.is_open())
+        if (log_stream.is_open()) {
             log_stream.close();
-        if (stats_out_stream.is_open())
+        }
+        if (stats_out_stream.is_open()) {
             stats_out_stream.close();
+        }
     }
 
     int run(int argc, char *argv[]) {
-
         try {
             parser.parse_args(argc, argv);
         } catch (const std::exception &e) {
@@ -197,8 +198,9 @@ class AbstractTestSuiteRunner {
             return 1;
         }
 
-        if (!parse_common_config())
+        if (!parse_common_config()) {
             return 1;
+        }
 
         setup_log_file();
 
@@ -229,8 +231,9 @@ class AbstractTestSuiteRunner {
             }
             std::string filename_machine = machine_entry.path().string();
             std::string name_machine = filename_machine.substr(filename_machine.rfind('/') + 1);
-            if (name_machine.rfind('.') != std::string::npos)
+            if (name_machine.rfind('.') != std::string::npos) {
                 name_machine = name_machine.substr(0, name_machine.rfind('.'));
+            }
 
             BspArchitecture<GraphType> arch;
             if (!file_reader::readBspArchitecture(filename_machine, arch)) {
@@ -246,16 +249,18 @@ class AbstractTestSuiteRunner {
                 }
                 std::string filename_graph = graph_entry.path().string();
                 std::string name_graph = filename_graph.substr(filename_graph.rfind('/') + 1);
-                if (name_graph.rfind('.') != std::string::npos)
+                if (name_graph.rfind('.') != std::string::npos) {
                     name_graph = name_graph.substr(0, name_graph.rfind('.'));
+                }
                 log_stream << "Start Graph: " + filename_graph + "\n";
 
                 BspInstance<GraphType> bsp_instance;
                 bsp_instance.getArchitecture() = arch;
                 bool graph_status = false;
                 std::string ext;
-                if (filename_graph.rfind('.') != std::string::npos)
+                if (filename_graph.rfind('.') != std::string::npos) {
                     ext = filename_graph.substr(filename_graph.rfind('.') + 1);
+                }
 
 #ifdef EIGEN_FOUND
 
@@ -268,7 +273,8 @@ class AbstractTestSuiteRunner {
                 SM_csc_int32 L_csc_int32{};
                 SM_csc_int64 L_csc_int64{};
 
-                if constexpr (std::is_same_v<GraphType, sparse_matrix_graph_int32_t> || std::is_same_v<GraphType, sparse_matrix_graph_int64_t>) {
+                if constexpr (std::is_same_v<GraphType, sparse_matrix_graph_int32_t>
+                              || std::is_same_v<GraphType, sparse_matrix_graph_int64_t>) {
                     if (ext != "mtx") {
                         log_stream << "Error: Only .mtx file is accepted for SpTRSV" << std::endl;
                         return 0;
@@ -285,7 +291,6 @@ class AbstractTestSuiteRunner {
                         L_csc_int32 = L_csr_int32;
                         bsp_instance.getComputationalDag().setCSC(&L_csc_int32);
                     } else {
-
                         graph_status = Eigen::loadMarket(L_csr_int64, filename_graph);
                         if (!graph_status) {
                             std::cerr << "Failed to read matrix from " << filename_graph << std::endl;
@@ -317,13 +322,15 @@ class AbstractTestSuiteRunner {
                     long long computation_time_ms;
                     std::unique_ptr<TargetObjectType> target_object;
 
-                    RETURN_STATUS exec_status = compute_target_object_impl(bsp_instance, target_object, algo_config, computation_time_ms);
+                    RETURN_STATUS exec_status
+                        = compute_target_object_impl(bsp_instance, target_object, algo_config, computation_time_ms);
 
                     if (exec_status != RETURN_STATUS::OSP_SUCCESS && exec_status != RETURN_STATUS::BEST_FOUND) {
-                        if (exec_status == RETURN_STATUS::ERROR)
+                        if (exec_status == RETURN_STATUS::ERROR) {
                             log_stream << "Error computing with " << current_algo_name << "." << std::endl;
-                        else if (exec_status == RETURN_STATUS::TIMEOUT)
+                        } else if (exec_status == RETURN_STATUS::TIMEOUT) {
                             log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl;
+                        }
                         continue;
                     }
 
@@ -331,8 +338,8 @@ class AbstractTestSuiteRunner {
                         try {
                             write_target_object_hook(*target_object, name_graph, name_machine, current_algo_name);
                         } catch (const std::exception &e) {
-                            log_stream << "Writing target object file for " << name_graph << ", " << name_machine
-                                       << ", " << current_algo_name << " has failed: " << e.what() << std::endl;
+                            log_stream << "Writing target object file for " << name_graph << ", " << name_machine << ", "
+                                       << current_algo_name << " has failed: " << e.what() << std::endl;
                         }
                     }
 
@@ -361,4 +368,4 @@ class AbstractTestSuiteRunner {
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp b/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp
index 16afc890..dd161798 100644
--- a/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp
+++ b/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp
@@ -25,29 +25,28 @@ limitations under the License.
 #include "StatsModules/GraphStatsModule.hpp"
 #include "StringToScheduler/run_bsp_recomp_scheduler.hpp"
 #include "StringToScheduler/run_bsp_scheduler.hpp"
+#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/BspScheduleRecomp.hpp"
 #include "osp/bsp/model/IBspScheduleEval.hpp"
-#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
 
 namespace osp {
 
-template<typename concrete_graph_t>
-class BspScheduleRecompTestSuiteRunner
-    : public AbstractTestSuiteRunner<IBspScheduleEval<concrete_graph_t>, concrete_graph_t> {
+template <typename concrete_graph_t>
+class BspScheduleRecompTestSuiteRunner : public AbstractTestSuiteRunner<IBspScheduleEval<concrete_graph_t>, concrete_graph_t> {
   private:
     bool use_memory_constraint_for_bsp;
 
   protected:
-    RETURN_STATUS compute_target_object_impl(const BspInstance<concrete_graph_t> &instance, std::unique_ptr<IBspScheduleEval<concrete_graph_t>>& schedule, const pt::ptree &algo_config,
+    RETURN_STATUS compute_target_object_impl(const BspInstance<concrete_graph_t> &instance,
+                                             std::unique_ptr<IBspScheduleEval<concrete_graph_t>> &schedule,
+                                             const pt::ptree &algo_config,
                                              long long &computation_time_ms) override {
-
         std::string algo_name = algo_config.get_child("id").get_value<std::string>();
         const std::set<std::string> scheduler_names = get_available_bsp_scheduler_names();
         const std::set<std::string> scheduler_recomp_names = get_available_bsp_recomp_scheduler_names();
 
         if (scheduler_names.find(algo_name) != scheduler_names.end()) {
-
             auto bsp_schedule = std::make_unique<BspSchedule<concrete_graph_t>>(instance);
 
             const auto start_time = std::chrono::high_resolution_clock::now();
@@ -55,15 +54,13 @@ class BspScheduleRecompTestSuiteRunner
             RETURN_STATUS status = run_bsp_scheduler(this->parser, algo_config, *bsp_schedule);
 
             const auto finish_time = std::chrono::high_resolution_clock::now();
-            computation_time_ms =
-                std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count();
+            computation_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count();
 
             schedule = std::move(bsp_schedule);
 
             return status;
 
         } else if (scheduler_recomp_names.find(algo_name) != scheduler_recomp_names.end()) {
-
             auto bsp_recomp_schedule = std::make_unique<BspScheduleRecomp<concrete_graph_t>>(instance);
 
             const auto start_time = std::chrono::high_resolution_clock::now();
@@ -71,14 +68,12 @@ class BspScheduleRecompTestSuiteRunner
             RETURN_STATUS status = run_bsp_recomp_scheduler(this->parser, algo_config, *bsp_recomp_schedule);
 
             const auto finish_time = std::chrono::high_resolution_clock::now();
-            computation_time_ms =
-                std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count();
+            computation_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count();
 
             schedule = std::move(bsp_recomp_schedule);
 
             return status;
         } else {
-
             std::cerr << "No matching category found for algorithm" << std::endl;
             return RETURN_STATUS::ERROR;
         }
@@ -86,11 +81,9 @@ class BspScheduleRecompTestSuiteRunner
 
     void create_and_register_statistic_modules(const std::string &module_name) override {
         if (module_name == "BasicBspStats") {
-            this->active_stats_modules.push_back(
-                std::make_unique<BasicBspStatsModule<IBspScheduleEval<concrete_graph_t>>>());
+            this->active_stats_modules.push_back(std::make_unique<BasicBspStatsModule<IBspScheduleEval<concrete_graph_t>>>());
         } else if (module_name == "GraphStats") {
-            this->active_stats_modules.push_back(
-                std::make_unique<GraphStatsModule<IBspScheduleEval<concrete_graph_t>>>());
+            this->active_stats_modules.push_back(std::make_unique<GraphStatsModule<IBspScheduleEval<concrete_graph_t>>>());
         }
     }
 
@@ -107,4 +100,4 @@ class BspScheduleRecompTestSuiteRunner
     BspScheduleRecompTestSuiteRunner() : AbstractTestSuiteRunner<IBspScheduleEval<concrete_graph_t>, concrete_graph_t>() {}
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp b/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp
index ba9bac6e..d1338e4c 100644
--- a/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp
+++ b/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp
@@ -19,25 +19,24 @@ limitations under the License.
 #pragma once
 
 #include "AbstractTestSuiteRunner.hpp"
-#include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
-#include "StringToScheduler/run_bsp_scheduler.hpp"
 #include "StatsModules/BasicBspStatsModule.hpp"
 #include "StatsModules/BspCommStatsModule.hpp"
 #include "StatsModules/BspSptrsvStatsModule.hpp"
 #include "StatsModules/GraphStatsModule.hpp"
+#include "StringToScheduler/run_bsp_scheduler.hpp"
+#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp"
+#include "osp/bsp/model/BspSchedule.hpp"
 
 namespace osp {
 
-template<typename concrete_graph_t>
+template <typename concrete_graph_t>
 class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunner<BspSchedule<concrete_graph_t>, concrete_graph_t> {
   private:
-  
   protected:
-    RETURN_STATUS compute_target_object_impl(const BspInstance<concrete_graph_t> &instance, std::unique_ptr<BspSchedule<concrete_graph_t>>& schedule,
-                                                             const pt::ptree &algo_config,  
-                                                             long long &computation_time_ms) override {
-        
+    RETURN_STATUS compute_target_object_impl(const BspInstance<concrete_graph_t> &instance,
+                                             std::unique_ptr<BspSchedule<concrete_graph_t>> &schedule,
+                                             const pt::ptree &algo_config,
+                                             long long &computation_time_ms) override {
         schedule = std::make_unique<BspSchedule<concrete_graph_t>>(instance);
 
         const auto start_time = std::chrono::high_resolution_clock::now();
@@ -59,13 +58,14 @@ class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunner<BspSchedule<co
         } else if (module_name == "BspSptrsvStats") {
             this->active_stats_modules.push_back(std::make_unique<BspSptrsvStatsModule<BspSchedule<concrete_graph_t>>>(NO_PERMUTE));
         } else if (module_name == "BspSptrsvPermLoopProcessorsStats") {
-            this->active_stats_modules.push_back(std::make_unique<BspSptrsvStatsModule<BspSchedule<concrete_graph_t>>>(LOOP_PROCESSORS));
+            this->active_stats_modules.push_back(
+                std::make_unique<BspSptrsvStatsModule<BspSchedule<concrete_graph_t>>>(LOOP_PROCESSORS));
         } else if (module_name == "BspSptrsvPermSnakeProcessorsStats") {
-            this->active_stats_modules.push_back(std::make_unique<BspSptrsvStatsModule<BspSchedule<concrete_graph_t>>>(SNAKE_PROCESSORS));
+            this->active_stats_modules.push_back(
+                std::make_unique<BspSptrsvStatsModule<BspSchedule<concrete_graph_t>>>(SNAKE_PROCESSORS));
 #endif
         } else if (module_name == "GraphStats") {
-            this->active_stats_modules.push_back(
-                std::make_unique<GraphStatsModule<BspSchedule<concrete_graph_t>>>());
+            this->active_stats_modules.push_back(std::make_unique<GraphStatsModule<BspSchedule<concrete_graph_t>>>());
         }
     }
 
@@ -78,8 +78,7 @@ class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunner<BspSchedule<co
     // }
 
   public:
-    BspScheduleTestSuiteRunner()
-        : AbstractTestSuiteRunner<BspSchedule<concrete_graph_t>, concrete_graph_t>() {}
+    BspScheduleTestSuiteRunner() : AbstractTestSuiteRunner<BspSchedule<concrete_graph_t>, concrete_graph_t>() {}
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/apps/test_suite_runner/ConfigParser.hpp b/apps/test_suite_runner/ConfigParser.hpp
index f362e424..1750ff9f 100644
--- a/apps/test_suite_runner/ConfigParser.hpp
+++ b/apps/test_suite_runner/ConfigParser.hpp
@@ -66,7 +66,6 @@ struct ConfigParser {
     }
 
     void add_algorithm(std::string name) {
-
         bool algorithm_found = false;
         std::string algorithm_identifier = name;
 
@@ -75,7 +74,6 @@ struct ConfigParser {
         }
 
         for (auto &algorithm : scheduler_config) {
-
             std::string alg_name = algorithm.second.get_child("name").get_value<std::string>();
 
             if (alg_name == algorithm_identifier) {
@@ -90,21 +88,17 @@ struct ConfigParser {
     }
 
     void parse_config_file(std::string filename) {
-
         pt::ptree loadPtreeRoot;
         pt::read_json(filename, loadPtreeRoot);
 
         global_params = loadPtreeRoot.get_child("globalParameters");
-        
+
         try {
             instances = loadPtreeRoot.get_child("inputInstances");
-        } catch (const pt::ptree_bad_path &e) {
-            
-        }      
+        } catch (const pt::ptree_bad_path &e) {}
 
         pt::ptree scheduler_config_parse = loadPtreeRoot.get_child("algorithms");
         for (auto &algorithm : scheduler_config_parse) {
-
             if (algorithm.second.get_child("run").get_value<bool>()) {
                 scheduler.push_back(algorithm);
             }
@@ -113,17 +107,15 @@ struct ConfigParser {
 
   public:
     ConfigParser() = default;
+
     ConfigParser(std::string main_config_file_) : main_config_file(main_config_file_), has_config_file(true) {}
 
     void parse_args(const int argc, const char *const argv[]) {
-
         if (has_config_file) {
-
             if (argc < 3) {
                 usage();
                 throw std::invalid_argument("Parameter error: not enough parameters specified.\n");
             } else if (std::string(argv[1]) == "--config") {
-
                 std::string config_file = argv[2];
                 if (config_file.empty() || config_file.substr(config_file.size() - 5) != ".json") {
                     throw std::invalid_argument("Parameter error: config file ending is not \".json\".\n");
@@ -140,10 +132,19 @@ struct ConfigParser {
                     throw std::invalid_argument("Parameter error: config file does not specify global parameters!\n");
                 }
             } else {
-
-                const std::set<std::string> parameters_requiring_value(
-                    {"--config", "--inputDag", "--g", "-inputDag", "-g", "--timeLimit", "--t", "-timeLimit", "-t",
-                     "--inputMachine", "--m", "-inputMachine", "-m"});
+                const std::set<std::string> parameters_requiring_value({"--config",
+                                                                        "--inputDag",
+                                                                        "--g",
+                                                                        "-inputDag",
+                                                                        "-g",
+                                                                        "--timeLimit",
+                                                                        "--t",
+                                                                        "-timeLimit",
+                                                                        "-t",
+                                                                        "--inputMachine",
+                                                                        "--m",
+                                                                        "-inputMachine",
+                                                                        "-m"});
 
                 pt::ptree loadPtreeRoot;
                 pt::read_json(main_config_file, loadPtreeRoot);
@@ -159,8 +160,8 @@ struct ConfigParser {
                 for (int i = 1; i < argc; ++i) {
                     // Check parameters that require an argument afterwards
                     if (parameters_requiring_value.count(argv[i]) == 1 && i + 1 >= argc) {
-                        throw std::invalid_argument("Parameter error: no parameter value after the \"" +
-                                                    std::string(argv[i]) + "\" option.\n");
+                        throw std::invalid_argument("Parameter error: no parameter value after the \"" + std::string(argv[i])
+                                                    + "\" option.\n");
                     }
 
                     std::string flag = argv[i];
@@ -169,30 +170,30 @@ struct ConfigParser {
                         usage();
                         throw std::invalid_argument("Parameter error: usage \"" + std::string(argv[i]) + "\".\n");
 
-                    } else if (std::string(flag) == "--timelimit" || std::string(flag) == "--t" ||
-                               std::string(flag) == "-t" || std::string(flag) == "-timelimit") {
+                    } else if (std::string(flag) == "--timelimit" || std::string(flag) == "--t" || std::string(flag) == "-t"
+                               || std::string(flag) == "-timelimit") {
                         global_params.put("timeLimit", std::stoi(argv[++i]));
 
-                    } else if (std::string(flag) == "--sankey" || std::string(flag) == "--s" ||
-                               std::string(flag) == "-s" || std::string(flag) == "-sankey") {
+                    } else if (std::string(flag) == "--sankey" || std::string(flag) == "--s" || std::string(flag) == "-s"
+                               || std::string(flag) == "-sankey") {
                         global_params.put("outputSankeySchedule", true);
 
-                    } else if (std::string(flag) == "--dot" || std::string(flag) == "--d" ||
-                               std::string(flag) == "-d" || std::string(flag) == "-dot") {
+                    } else if (std::string(flag) == "--dot" || std::string(flag) == "--d" || std::string(flag) == "-d"
+                               || std::string(flag) == "-dot") {
                         global_params.put("outputDotSchedule", true);
 
-                    } else if (std::string(flag) == "--inputDag" || std::string(flag) == "--g" ||
-                               std::string(flag) == "-inputDag" || std::string(flag) == "-g") {
+                    } else if (std::string(flag) == "--inputDag" || std::string(flag) == "--g" || std::string(flag) == "-inputDag"
+                               || std::string(flag) == "-g") {
                         instance.put("graphFile", argv[++i]);
                         graph_specified = true;
 
-                    } else if (std::string(flag) == "--inputMachine" || std::string(flag) == "--m" ||
-                               std::string(flag) == "-inputMachine" || std::string(flag) == "-m") {
+                    } else if (std::string(flag) == "--inputMachine" || std::string(flag) == "--m"
+                               || std::string(flag) == "-inputMachine" || std::string(flag) == "-m") {
                         instance.put("machineParamsFile", argv[++i]);
                         machine_specified = true;
 
-                    } else if (std::string(flag) == "--output" || std::string(flag) == "--o" ||
-                               std::string(flag) == "-output" || std::string(flag) == "-o") {
+                    } else if (std::string(flag) == "--output" || std::string(flag) == "--o" || std::string(flag) == "-output"
+                               || std::string(flag) == "-o") {
                         global_params.put("outputSchedule", true);
                     } else {
                         add_algorithm(flag);
@@ -210,16 +211,13 @@ struct ConfigParser {
                 instances.push_back(std::make_pair("", instance));
             }
         } else {
-
             if (argc < 3 || std::string(argv[1]) != "--config") {
-
                 std::cout << "Usage: read config file: \n"
                           << "     --config *.json          \t\tSpecify config .json file.\n";
 
                 throw std::invalid_argument("Parameter error: not enough parameters specified.\n");
 
             } else {
-
                 std::string config_file = argv[2];
                 if (config_file.empty() || config_file.substr(config_file.size() - 5) != ".json") {
                     throw std::invalid_argument("Parameter error: config file ending is not \".json\".\n");
diff --git a/apps/test_suite_runner/PebblingTestSuiteRunner.hpp b/apps/test_suite_runner/PebblingTestSuiteRunner.hpp
index abbfe998..2cbcfc5d 100644
--- a/apps/test_suite_runner/PebblingTestSuiteRunner.hpp
+++ b/apps/test_suite_runner/PebblingTestSuiteRunner.hpp
@@ -19,13 +19,13 @@ limitations under the License.
 #pragma once
 
 #include "AbstractTestSuiteRunner.hpp"
-#include "osp/pebbling/PebblingSchedule.hpp"
-#include "StringToScheduler/run_pebbler.hpp" 
 #include "StatsModules/IStatsModule.hpp"
+#include "StringToScheduler/run_pebbler.hpp"
+#include "osp/pebbling/PebblingSchedule.hpp"
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class BasicPebblingStatsModule : public IStatisticModule<PebblingSchedule<Graph_t>> {
   public:
   private:
@@ -44,17 +44,18 @@ class BasicPebblingStatsModule : public IStatisticModule<PebblingSchedule<Graph_
     }
 };
 
-template<typename concrete_graph_t>
+template <typename concrete_graph_t>
 class PebblingTestSuiteRunner : public AbstractTestSuiteRunner<PebblingSchedule<concrete_graph_t>, concrete_graph_t> {
   private:
     bool use_memory_constraint;
 
   protected:
-        RETURN_STATUS compute_target_object_impl(const BspInstance<concrete_graph_t> &instance, std::unique_ptr<PebblingSchedule<concrete_graph_t>>& schedule, const pt::ptree &algo_config,
+    RETURN_STATUS compute_target_object_impl(const BspInstance<concrete_graph_t> &instance,
+                                             std::unique_ptr<PebblingSchedule<concrete_graph_t>> &schedule,
+                                             const pt::ptree &algo_config,
                                              long long &computation_time_ms) override {
-        
         schedule = std::make_unique<PebblingSchedule<concrete_graph_t>>(instance);
-        
+
         const auto start_time = std::chrono::high_resolution_clock::now();
 
         RETURN_STATUS status = run_pebbler(this->parser, algo_config, *schedule);
@@ -84,4 +85,4 @@ class PebblingTestSuiteRunner : public AbstractTestSuiteRunner<PebblingSchedule<
     PebblingTestSuiteRunner() : AbstractTestSuiteRunner<PebblingSchedule<concrete_graph_t>, concrete_graph_t>() {}
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp b/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp
index 59d6d457..21229567 100644
--- a/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp
+++ b/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp
@@ -18,43 +18,36 @@ limitations under the License.
 
 #pragma once
 
+#include <map>
 #include <string>
 #include <vector>
-#include <map>
 
 #include "IStatsModule.hpp"
 #include "osp/bsp/model/IBspScheduleEval.hpp"
-#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t
+#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"    // For graph_t
 
 namespace osp {
 
-template<typename TargetObjectType>
-class BasicBspStatsModule : public IStatisticModule<TargetObjectType> { 
-public:
-
-private:
-    const std::vector<std::string> metric_headers = {
-        "BspCost", "WorkCost", "CommCost", "Supersteps"
-    };
+template <typename TargetObjectType>
+class BasicBspStatsModule : public IStatisticModule<TargetObjectType> {
+  public:
+  private:
+    const std::vector<std::string> metric_headers = {"BspCost", "WorkCost", "CommCost", "Supersteps"};
 
-public:
-
-    std::vector<std::string> get_metric_headers() const override {
-        return metric_headers;
-    }
+  public:
+    std::vector<std::string> get_metric_headers() const override { return metric_headers; }
 
-    std::map<std::string, std::string> record_statistics(
-                            const TargetObjectType& schedule, 
-                            std::ofstream& /*log_stream*/) const override { 
+    std::map<std::string, std::string> record_statistics(const TargetObjectType &schedule,
+                                                         std::ofstream & /*log_stream*/) const override {
         std::map<std::string, std::string> stats;
         const auto bsp_cost = schedule.computeCosts();
         const auto work_cost = schedule.computeWorkCosts();
         stats["BspCost"] = std::to_string(bsp_cost);
         stats["WorkCost"] = std::to_string(work_cost);
-        stats["CommCost"] = std::to_string(bsp_cost - work_cost); 
+        stats["CommCost"] = std::to_string(bsp_cost - work_cost);
         stats["Supersteps"] = std::to_string(schedule.numberOfSupersteps());
         return stats;
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp b/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp
index 7f1066ee..83f6f1b9 100644
--- a/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp
+++ b/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp
@@ -18,32 +18,29 @@ limitations under the License.
 
 #pragma once
 
+#include <map>
+#include <string>
+#include <vector>
+
 #include "IStatsModule.hpp"
-#include "osp/bsp/model/BspSchedule.hpp" // Still needed
+#include "osp/bsp/model/BspSchedule.hpp"    // Still needed
 #include "osp/bsp/model/cost/BufferedSendingCost.hpp"
 #include "osp/bsp/model/cost/TotalCommunicationCost.hpp"
 #include "osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp"
-#include <map>
-#include <string>
-#include <vector>
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class BspCommStatsModule : public IStatisticModule<BspSchedule<Graph_t>> {
   public:
   private:
-    const std::vector<std::string> metric_headers = {
-        "TotalCommCost", "TotalLambdaCommCost", "BufferedSendingCosts"};
+    const std::vector<std::string> metric_headers = {"TotalCommCost", "TotalLambdaCommCost", "BufferedSendingCosts"};
 
   public:
-    std::vector<std::string> get_metric_headers() const override {
-        return metric_headers;
-    }
+    std::vector<std::string> get_metric_headers() const override { return metric_headers; }
 
-    std::map<std::string, std::string> record_statistics(
-        const BspSchedule<Graph_t> &schedule,
-        std::ofstream & /*log_stream*/) const override {
+    std::map<std::string, std::string> record_statistics(const BspSchedule<Graph_t> &schedule,
+                                                         std::ofstream & /*log_stream*/) const override {
         std::map<std::string, std::string> stats;
         stats["TotalCommCost"] = std::to_string(TotalCommunicationCost<Graph_t>()(schedule));
         stats["TotalLambdaCommCost"] = std::to_string(TotalLambdaCommunicationCost<Graph_t>()(schedule));
@@ -52,4 +49,4 @@ class BspCommStatsModule : public IStatisticModule<BspSchedule<Graph_t>> {
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp b/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp
index 8839ba39..e2b650d2 100644
--- a/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp
+++ b/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp
@@ -19,33 +19,38 @@ limitations under the License.
 #pragma once
 #ifdef EIGEN_FOUND
 
-#include <Eigen/Core>
-#include <string>
-#include <vector>
-#include <map>
-#include <typeinfo>
-#include <cxxabi.h>
-#include <numeric>
-#include <algorithm>
-#include <cmath>
-#include <sstream>
-
-#include "IStatsModule.hpp"
-#include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t
-#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
-#include "osp/auxiliary/sptrsv_simulator/sptrsv.hpp"
-#include "osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp"
+#    include <cxxabi.h>
+
+#    include <Eigen/Core>
+#    include <algorithm>
+#    include <cmath>
+#    include <map>
+#    include <numeric>
+#    include <sstream>
+#    include <string>
+#    include <typeinfo>
+#    include <vector>
+
+#    include "IStatsModule.hpp"
+#    include "osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp"
+#    include "osp/auxiliary/sptrsv_simulator/sptrsv.hpp"
+#    include "osp/bsp/model/BspSchedule.hpp"
+#    include "osp/graph_implementations/boost_graphs/boost_graph.hpp"    // For graph_t
+#    include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
 
 namespace osp {
 
 // Turn permutation mode into a human-readable prefix used in metric names
-inline const char* mode_tag(SCHEDULE_NODE_PERMUTATION_MODES m) {
+inline const char *mode_tag(SCHEDULE_NODE_PERMUTATION_MODES m) {
     switch (m) {
-        case NO_PERMUTE:       return "NoPermute_";
-        case LOOP_PROCESSORS:  return "LoopProc_";
-        case SNAKE_PROCESSORS: return "SnakeProc_";
-        default:               return "Unknown_";
+        case NO_PERMUTE:
+            return "NoPermute_";
+        case LOOP_PROCESSORS:
+            return "LoopProc_";
+        case SNAKE_PROCESSORS:
+            return "SnakeProc_";
+        default:
+            return "Unknown_";
     }
 }
 
@@ -56,9 +61,9 @@ bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) {
     assert(v1.size() == v2.size());
     bool same = true;
     const double epsilon = 1e-10;
-    for (long long int i=0; i < v1.size(); ++i){
-        //std::cout << "Ind: " << i << ": | " << v1[i] << " - " << v2[i] << " | = " << abs(v1[i]-v2[i]) << "\n";  
-        if( std::abs(v1[i] - v2[i]) / (std::abs(v1[i]) + std::abs(v2[i]) + epsilon) > epsilon ){
+    for (long long int i = 0; i < v1.size(); ++i) {
+        // std::cout << "Ind: " << i << ": | " << v1[i] << " - " << v2[i] << " | = " << abs(v1[i]-v2[i]) << "\n";
+        if (std::abs(v1[i] - v2[i]) / (std::abs(v1[i]) + std::abs(v2[i]) + epsilon) > epsilon) {
             std::cout << "We have differences in the matrix in position: " << i << std::endl;
             std::cout << v1[i] << " , " << v2[i] << std::endl;
             same = false;
@@ -68,49 +73,40 @@ bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) {
     return same;
 }
 
-template<typename TargetObjectType>
+template <typename TargetObjectType>
 class BspSptrsvStatsModule : public IStatisticModule<TargetObjectType> {
-public:
-    explicit BspSptrsvStatsModule(SCHEDULE_NODE_PERMUTATION_MODES _mode = NO_PERMUTE)
-        : mode(_mode) {}
+  public:
+    explicit BspSptrsvStatsModule(SCHEDULE_NODE_PERMUTATION_MODES _mode = NO_PERMUTE) : mode(_mode) {}
 
-    
     std::vector<std::string> get_metric_headers() const override {
         const std::string prefix = mode_tag(mode);
-        return {
-            prefix + "SpTrSV_Runtime_Geomean(ns)",
-            prefix + "SpTrSV_Runtime_Stddev",
-            prefix + "SpTrSV_Runtime_Q25(ns)",
-            prefix + "SpTrSV_Runtime_Q75(ns)"
-        };
+        return {prefix + "SpTrSV_Runtime_Geomean(ns)",
+                prefix + "SpTrSV_Runtime_Stddev",
+                prefix + "SpTrSV_Runtime_Q25(ns)",
+                prefix + "SpTrSV_Runtime_Q75(ns)"};
     }
-    std::map<std::string, std::string> record_statistics(
-        const TargetObjectType& schedule,
-        std::ofstream&) const override {
 
+    std::map<std::string, std::string> record_statistics(const TargetObjectType &schedule, std::ofstream &) const override {
         std::map<std::string, std::string> stats;
 
-        if constexpr (
-            std::is_same_v<TargetObjectType, osp::BspSchedule<osp::SparseMatrixImp<int32_t>>> ||
-            std::is_same_v<TargetObjectType, osp::BspSchedule<osp::SparseMatrixImp<int64_t>>>
-        ) {
-            using index_t = std::conditional_t<
-                std::is_same_v<TargetObjectType, osp::BspSchedule<osp::SparseMatrixImp<int32_t>>>,
-                int32_t, int64_t>;
+        if constexpr (std::is_same_v<TargetObjectType, osp::BspSchedule<osp::SparseMatrixImp<int32_t>>>
+                      || std::is_same_v<TargetObjectType, osp::BspSchedule<osp::SparseMatrixImp<int64_t>>>) {
+            using index_t
+                = std::conditional_t<std::is_same_v<TargetObjectType, osp::BspSchedule<osp::SparseMatrixImp<int32_t>>>, int32_t, int64_t>;
 
             auto instance = schedule.getInstance();
             Sptrsv<index_t> sim{instance};
 
             std::vector<size_t> perm;
 
-            if (mode == NO_PERMUTE){ 
+            if (mode == NO_PERMUTE) {
                 sim.setup_csr_no_permutation(schedule);
             } else if (mode == LOOP_PROCESSORS) {
                 perm = schedule_node_permuter_basic(schedule, LOOP_PROCESSORS);
-                sim.setup_csr_with_permutation (schedule, perm);
+                sim.setup_csr_with_permutation(schedule, perm);
             } else if (mode == SNAKE_PROCESSORS) {
                 perm = schedule_node_permuter_basic(schedule, SNAKE_PROCESSORS);
-                sim.setup_csr_with_permutation (schedule, perm);
+                sim.setup_csr_with_permutation(schedule, perm);
             } else {
                 std::cout << "Wrong type of permutation provided" << std::endl;
             }
@@ -132,13 +128,13 @@ class BspSptrsvStatsModule : public IStatisticModule<TargetObjectType> {
                 L_x_osp.setZero();
                 sim.x = &L_x_osp[0];
                 sim.b = &L_b_osp[0];
-                std::chrono::_V2::system_clock::time_point start,end;
-                
-                if (mode == NO_PERMUTE){ 
+                std::chrono::_V2::system_clock::time_point start, end;
+
+                if (mode == NO_PERMUTE) {
                     start = std::chrono::high_resolution_clock::now();
                     sim.lsolve_no_permutation();
                     end = std::chrono::high_resolution_clock::now();
-                } else{
+                } else {
                     start = std::chrono::high_resolution_clock::now();
                     sim.lsolve_with_permutation();
                     end = std::chrono::high_resolution_clock::now();
@@ -149,17 +145,17 @@ class BspSptrsvStatsModule : public IStatisticModule<TargetObjectType> {
             }
 
             // Geometric mean (requires conversion to double)
-            double total_log = std::accumulate(times_ns.begin(), times_ns.end(), 0.0,
-                                            [](double sum, long long val) { return sum + std::log(static_cast<double>(val)); });
+            double total_log = std::accumulate(times_ns.begin(), times_ns.end(), 0.0, [](double sum, long long val) {
+                return sum + std::log(static_cast<double>(val));
+            });
             long long geom_mean = static_cast<long long>(std::exp(total_log / runs));
 
             // Standard deviation
             double mean = std::accumulate(times_ns.begin(), times_ns.end(), 0.0) / runs;
-            double sq_sum = std::accumulate(times_ns.begin(), times_ns.end(), 0.0,
-                                            [mean](double acc, long long val) {
-                                                double diff = static_cast<double>(val) - mean;
-                                                return acc + diff * diff;
-                                            });
+            double sq_sum = std::accumulate(times_ns.begin(), times_ns.end(), 0.0, [mean](double acc, long long val) {
+                double diff = static_cast<double>(val) - mean;
+                return acc + diff * diff;
+            });
             long long stddev = static_cast<long long>(std::sqrt(sq_sum / runs));
 
             // Quartiles
@@ -168,25 +164,23 @@ class BspSptrsvStatsModule : public IStatisticModule<TargetObjectType> {
             long long q75 = times_ns[3 * runs / 4];
 
             auto to_str = [](long long value) {
-                return std::to_string(value);  // no decimal points
+                return std::to_string(value);    // no decimal points
             };
 
             // Permute back if needed
             if (mode != NO_PERMUTE) {
                 sim.permute_x_vector(perm);
-            } 
-
+            }
 
             if (!compare_vectors(L_x_ref, L_x_osp)) {
                 std::cout << "Output is not equal" << std::endl;
             }
 
-
             const std::string prefix = mode_tag(mode);
             stats[prefix + "SpTrSV_Runtime_Geomean(ns)"] = to_str(geom_mean);
-            stats[prefix + "SpTrSV_Runtime_Stddev"]     = to_str(stddev);
-            stats[prefix + "SpTrSV_Runtime_Q25(ns)"]    = to_str(q25);
-            stats[prefix + "SpTrSV_Runtime_Q75(ns)"]    = to_str(q75);
+            stats[prefix + "SpTrSV_Runtime_Stddev"] = to_str(stddev);
+            stats[prefix + "SpTrSV_Runtime_Q25(ns)"] = to_str(q25);
+            stats[prefix + "SpTrSV_Runtime_Q75(ns)"] = to_str(q75);
 
         } else {
             std::cout << "Simulation is not available without the SparseMatrix type" << std::endl;
@@ -195,11 +189,11 @@ class BspSptrsvStatsModule : public IStatisticModule<TargetObjectType> {
         return stats;
     }
 
-    private:
+  private:
     SCHEDULE_NODE_PERMUTATION_MODES mode;
-    static constexpr int runs = 100;  // number of runs for benchmarking
+    static constexpr int runs = 100;    // number of runs for benchmarking
 };
 
-} // namespace osp
+}    // namespace osp
 
-#endif
\ No newline at end of file
+#endif
diff --git a/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp b/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp
index 6ed263de..09a3953c 100644
--- a/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp
+++ b/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp
@@ -18,42 +18,37 @@ limitations under the License.
 
 #pragma once
 
-#include "IStatsModule.hpp"
-#include "osp/bsp/model/IBspScheduleEval.hpp"
-#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t
+#include <map>
 #include <string>
 #include <vector>
-#include <map>
-
-namespace osp {
 
-template<typename TargetObjectType>
-class GraphStatsModule : public IStatisticModule<TargetObjectType> { 
-public:
+#include "IStatsModule.hpp"
+#include "osp/bsp/model/IBspScheduleEval.hpp"
+#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"    // For graph_t
 
-private:
-    const std::vector<std::string> metric_headers = {
-        "num_vertices", "num_edges", "avg_wavefront_size"
-    };
+namespace osp {
 
-public:
+template <typename TargetObjectType>
+class GraphStatsModule : public IStatisticModule<TargetObjectType> {
+  public:
+  private:
+    const std::vector<std::string> metric_headers = {"num_vertices", "num_edges", "avg_wavefront_size"};
 
-    std::vector<std::string> get_metric_headers() const override {
-        return metric_headers;
-    }
+  public:
+    std::vector<std::string> get_metric_headers() const override { return metric_headers; }
 
-    std::map<std::string, std::string> record_statistics(
-                            const TargetObjectType& schedule, 
-                            std::ofstream& /*log_stream*/) const override { 
+    std::map<std::string, std::string> record_statistics(const TargetObjectType &schedule,
+                                                         std::ofstream & /*log_stream*/) const override {
         std::map<std::string, std::string> stats;
 
         const auto &graph = schedule.getInstance().getComputationalDag();
 
         stats["num_vertices"] = std::to_string(graph.num_vertices());
         stats["num_edges"] = std::to_string(graph.num_edges());
-        stats["avg_wavefront_size"] = std::to_string(static_cast<double>(graph.num_vertices()) / static_cast<double>(longestPath(graph)));                            
+        stats["avg_wavefront_size"]
+            = std::to_string(static_cast<double>(graph.num_vertices()) / static_cast<double>(longestPath(graph)));
         return stats;
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/apps/test_suite_runner/StatsModules/IStatsModule.hpp b/apps/test_suite_runner/StatsModules/IStatsModule.hpp
index dfef9049..46544af7 100644
--- a/apps/test_suite_runner/StatsModules/IStatsModule.hpp
+++ b/apps/test_suite_runner/StatsModules/IStatsModule.hpp
@@ -20,18 +20,19 @@ limitations under the License.
 
 #include <boost/property_tree/ptree.hpp>
 #include <fstream>
+#include <map>
 #include <string>
 #include <vector>
- #include <map>
+
 // #include "osp/bsp/model/BspSchedule.hpp" // TargetObject will be passed, no specific include here
 
 // Forward declarations to avoid circular dependencies
-namespace osp { // Ensure this is within the osp namespace
+namespace osp {    // Ensure this is within the osp namespace
 
 namespace pt = boost::property_tree;
 
-template<typename TargetObjectType>
-class IStatisticModule { // Changed from Graph_t_ to TargetObjectType
+template <typename TargetObjectType>
+class IStatisticModule {    // Changed from Graph_t_ to TargetObjectType
   public:
     virtual ~IStatisticModule() = default;
 
@@ -40,9 +41,9 @@ class IStatisticModule { // Changed from Graph_t_ to TargetObjectType
 
     // Called for each generated target_object.
     // Returns a map of {header_name: value_string}.
-    virtual std::map<std::string, std::string> record_statistics( 
-                                   const TargetObjectType &target_object, // Changed parameter
-                                   std::ofstream &log_stream) const = 0;
+    virtual std::map<std::string, std::string> record_statistics(const TargetObjectType &target_object,    // Changed parameter
+                                                                 std::ofstream &log_stream) const
+        = 0;
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/apps/test_suite_runner/StringToScheduler/get_coarser.hpp b/apps/test_suite_runner/StringToScheduler/get_coarser.hpp
index 7c438833..4020a08e 100644
--- a/apps/test_suite_runner/StringToScheduler/get_coarser.hpp
+++ b/apps/test_suite_runner/StringToScheduler/get_coarser.hpp
@@ -28,35 +28,33 @@ limitations under the License.
 #include "../ConfigParser.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
-#include "osp/coarser/Coarser.hpp"
-#include "osp/coarser/funnel/FunnelBfs.hpp"
 #include "osp/coarser/BspScheduleCoarser.hpp"
-#include "osp/coarser/hdagg/hdagg_coarser.hpp"
+#include "osp/coarser/Coarser.hpp"
 #include "osp/coarser/MultilevelCoarser.hpp"
 #include "osp/coarser/Sarkar/Sarkar.hpp"
 #include "osp/coarser/Sarkar/SarkarMul.hpp"
-#include "osp/coarser/top_order/top_order_coarser.hpp"
-#include "osp/graph_algorithms/cuthill_mckee.hpp"
 #include "osp/coarser/SquashA/SquashA.hpp"
 #include "osp/coarser/SquashA/SquashAMul.hpp"
+#include "osp/coarser/funnel/FunnelBfs.hpp"
+#include "osp/coarser/hdagg/hdagg_coarser.hpp"
+#include "osp/coarser/top_order/top_order_coarser.hpp"
+#include "osp/graph_algorithms/cuthill_mckee.hpp"
 
 namespace osp {
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::unique_ptr<Coarser<Graph_t_in, Graph_t_out>>
-get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coarser_algorithm) {
-
+template <typename Graph_t_in, typename Graph_t_out>
+std::unique_ptr<Coarser<Graph_t_in, Graph_t_out>> get_coarser_by_name(const ConfigParser &,
+                                                                      const boost::property_tree::ptree &coarser_algorithm) {
     const std::string coarser_name = coarser_algorithm.get_child("name").get_value<std::string>();
 
     if (coarser_name == "funnel") {
         typename FunnelBfs<Graph_t_in, Graph_t_out>::FunnelBfs_parameters funnel_parameters;
         if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) {
             const auto &params_pt = params_opt.get();
-            funnel_parameters.funnel_incoming =
-                params_pt.get_optional<bool>("funnel_incoming").value_or(funnel_parameters.funnel_incoming);
-            funnel_parameters.use_approx_transitive_reduction =
-                params_pt.get_optional<bool>("use_approx_transitive_reduction")
-                    .value_or(funnel_parameters.use_approx_transitive_reduction);
+            funnel_parameters.funnel_incoming
+                = params_pt.get_optional<bool>("funnel_incoming").value_or(funnel_parameters.funnel_incoming);
+            funnel_parameters.use_approx_transitive_reduction = params_pt.get_optional<bool>("use_approx_transitive_reduction")
+                                                                    .value_or(funnel_parameters.use_approx_transitive_reduction);
         }
         return std::make_unique<FunnelBfs<Graph_t_in, Graph_t_out>>(funnel_parameters);
 
@@ -68,11 +66,10 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa
                                             .value_or(std::numeric_limits<v_workw_t<Graph_t_in>>::max()));
             coarser->set_memory_threshold(params_pt.get_optional<v_memw_t<Graph_t_in>>("max_memory_weight")
                                               .value_or(std::numeric_limits<v_memw_t<Graph_t_in>>::max()));
-            coarser->set_communication_threshold(
-                params_pt.get_optional<v_commw_t<Graph_t_in>>("max_communication_weight")
-                    .value_or(std::numeric_limits<v_commw_t<Graph_t_in>>::max()));
-            coarser->set_super_node_size_threshold(params_pt.get_optional<std::size_t>("max_super_node_size")
-                                                       .value_or(std::numeric_limits<std::size_t>::max()));
+            coarser->set_communication_threshold(params_pt.get_optional<v_commw_t<Graph_t_in>>("max_communication_weight")
+                                                     .value_or(std::numeric_limits<v_commw_t<Graph_t_in>>::max()));
+            coarser->set_super_node_size_threshold(
+                params_pt.get_optional<std::size_t>("max_super_node_size").value_or(std::numeric_limits<std::size_t>::max()));
         }
         return coarser;
 
@@ -89,15 +86,11 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa
                                                     .value_or(std::numeric_limits<v_workw_t<Graph_t_in>>::max()));
                 coarser_ptr->set_memory_threshold(params_pt.get_optional<v_memw_t<Graph_t_in>>("memory_threshold")
                                                       .value_or(std::numeric_limits<v_memw_t<Graph_t_in>>::max()));
-                coarser_ptr->set_communication_threshold(
-                    params_pt.get_optional<v_commw_t<Graph_t_in>>("communication_threshold")
-                        .value_or(std::numeric_limits<v_commw_t<Graph_t_in>>::max()));
+                coarser_ptr->set_communication_threshold(params_pt.get_optional<v_commw_t<Graph_t_in>>("communication_threshold")
+                                                             .value_or(std::numeric_limits<v_commw_t<Graph_t_in>>::max()));
                 coarser_ptr->set_super_node_size_threshold(
-                    params_pt.get_optional<std::size_t>("super_node_size_threshold")
-                        .value_or(10));
-                coarser_ptr->set_node_dist_threshold(
-                    params_pt.get_optional<unsigned>("node_dist_threshold").value_or(10));
-                
+                    params_pt.get_optional<std::size_t>("super_node_size_threshold").value_or(10));
+                coarser_ptr->set_node_dist_threshold(params_pt.get_optional<unsigned>("node_dist_threshold").value_or(10));
             }
         };
 
@@ -126,18 +119,16 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa
             set_params(coarser);
             return coarser;
         } else if (top_order_strategy == "cuthill_mckee_wavefront") {
-            auto coarser =
-                std::make_unique<top_order_coarser<Graph_t_in, Graph_t_out, GetTopOrderCuthillMcKeeWavefront>>();
+            auto coarser = std::make_unique<top_order_coarser<Graph_t_in, Graph_t_out, GetTopOrderCuthillMcKeeWavefront>>();
             set_params(coarser);
             return coarser;
         } else if (top_order_strategy == "cuthill_mckee_undirected") {
-            auto coarser =
-                std::make_unique<top_order_coarser<Graph_t_in, Graph_t_out, GetTopOrderCuthillMcKeeUndirected>>();
+            auto coarser = std::make_unique<top_order_coarser<Graph_t_in, Graph_t_out, GetTopOrderCuthillMcKeeUndirected>>();
             set_params(coarser);
             return coarser;
         } else {
-            std::cerr << "Warning: Unknown top_order strategy '" << top_order_strategy
-                      << "'. Falling back to default (bfs)." << std::endl;
+            std::cerr << "Warning: Unknown top_order strategy '" << top_order_strategy << "'. Falling back to default (bfs)."
+                      << std::endl;
             auto coarser = std::make_unique<top_order_coarser<Graph_t_in, Graph_t_out, GetTopOrder>>();
             set_params(coarser);
             return coarser;
@@ -149,25 +140,40 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa
             const auto &params_pt = params_opt.get();
             params.commCost = params_pt.get_optional<v_workw_t<Graph_t_in>>("commCost").value_or(params.commCost);
             params.maxWeight = params_pt.get_optional<v_workw_t<Graph_t_in>>("maxWeight").value_or(params.maxWeight);
-            params.smallWeightThreshold = params_pt.get_optional<v_workw_t<Graph_t_in>>("smallWeightThreshold").value_or(params.smallWeightThreshold);
+            params.smallWeightThreshold
+                = params_pt.get_optional<v_workw_t<Graph_t_in>>("smallWeightThreshold").value_or(params.smallWeightThreshold);
             params.useTopPoset = params_pt.get_optional<bool>("useTopPoset").value_or(params.useTopPoset);
             params.geomDecay = params_pt.get_optional<double>("geomDecay").value_or(params.geomDecay);
             params.leniency = params_pt.get_optional<double>("leniency").value_or(params.leniency);
 
             if (auto mode_str_opt = params_pt.get_optional<std::string>("mode")) {
                 const std::string &mode_str = mode_str_opt.get();
-                if (mode_str == "LINES") params.mode = SarkarParams::Mode::LINES;
-                else if (mode_str == "FAN_IN_FULL") params.mode = SarkarParams::Mode::FAN_IN_FULL;
-                else if (mode_str == "FAN_IN_PARTIAL") params.mode = SarkarParams::Mode::FAN_IN_PARTIAL;
-                else if (mode_str == "FAN_OUT_FULL") params.mode = SarkarParams::Mode::FAN_OUT_FULL;
-                else if (mode_str == "FAN_OUT_PARTIAL") params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL;
-                else if (mode_str == "LEVEL_EVEN") params.mode = SarkarParams::Mode::LEVEL_EVEN;
-                else if (mode_str == "LEVEL_ODD") params.mode = SarkarParams::Mode::LEVEL_ODD;
-                else if (mode_str == "FAN_IN_BUFFER") params.mode = SarkarParams::Mode::FAN_IN_BUFFER;
-                else if (mode_str == "FAN_OUT_BUFFER") params.mode = SarkarParams::Mode::FAN_OUT_BUFFER;
-                else if (mode_str == "HOMOGENEOUS_BUFFER") params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER;
-                else throw std::invalid_argument("Invalid Sarkar mode: " + mode_str
-                    + "!\nChoose from: LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER.");
+                if (mode_str == "LINES") {
+                    params.mode = SarkarParams::Mode::LINES;
+                } else if (mode_str == "FAN_IN_FULL") {
+                    params.mode = SarkarParams::Mode::FAN_IN_FULL;
+                } else if (mode_str == "FAN_IN_PARTIAL") {
+                    params.mode = SarkarParams::Mode::FAN_IN_PARTIAL;
+                } else if (mode_str == "FAN_OUT_FULL") {
+                    params.mode = SarkarParams::Mode::FAN_OUT_FULL;
+                } else if (mode_str == "FAN_OUT_PARTIAL") {
+                    params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL;
+                } else if (mode_str == "LEVEL_EVEN") {
+                    params.mode = SarkarParams::Mode::LEVEL_EVEN;
+                } else if (mode_str == "LEVEL_ODD") {
+                    params.mode = SarkarParams::Mode::LEVEL_ODD;
+                } else if (mode_str == "FAN_IN_BUFFER") {
+                    params.mode = SarkarParams::Mode::FAN_IN_BUFFER;
+                } else if (mode_str == "FAN_OUT_BUFFER") {
+                    params.mode = SarkarParams::Mode::FAN_OUT_BUFFER;
+                } else if (mode_str == "HOMOGENEOUS_BUFFER") {
+                    params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER;
+                } else {
+                    throw std::invalid_argument(
+                        "Invalid Sarkar mode: " + mode_str
+                        + "!\nChoose from: LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, "
+                          "LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER.");
+                }
             }
         }
         return std::make_unique<Sarkar<Graph_t_in, Graph_t_out>>(params);
@@ -177,14 +183,18 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa
         auto coarser = std::make_unique<SquashA<Graph_t_in, Graph_t_out>>(params);
         if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) {
             const auto &params_pt = params_opt.get();
-            params.use_structured_poset =
-                params_pt.get_optional<bool>("use_structured_poset").value_or(params.use_structured_poset);
+            params.use_structured_poset
+                = params_pt.get_optional<bool>("use_structured_poset").value_or(params.use_structured_poset);
             params.use_top_poset = params_pt.get_optional<bool>("use_top_poset").value_or(params.use_top_poset);
             if (auto mode_str_opt = params_pt.get_optional<std::string>("mode")) {
-                if (mode_str_opt.get() == "EDGE_WEIGHT") params.mode = SquashAParams::Mode::EDGE_WEIGHT;
-                else if (mode_str_opt.get() == "TRIANGLES") params.mode = SquashAParams::Mode::TRIANGLES;
-                else throw std::invalid_argument("Invalid Squash mode: " + mode_str_opt.get()
-                    + "!\nChoose from: EDGE_WEIGHT, TRIANGLES.");
+                if (mode_str_opt.get() == "EDGE_WEIGHT") {
+                    params.mode = SquashAParams::Mode::EDGE_WEIGHT;
+                } else if (mode_str_opt.get() == "TRIANGLES") {
+                    params.mode = SquashAParams::Mode::TRIANGLES;
+                } else {
+                    throw std::invalid_argument("Invalid Squash mode: " + mode_str_opt.get()
+                                                + "!\nChoose from: EDGE_WEIGHT, TRIANGLES.");
+                }
             }
         }
         coarser->setParams(params);
@@ -198,9 +208,9 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa
     throw std::invalid_argument("Invalid coarser name: " + coarser_name);
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::unique_ptr<MultilevelCoarser<Graph_t_in, Graph_t_out>>
-get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coarser_algorithm) {
+template <typename Graph_t_in, typename Graph_t_out>
+std::unique_ptr<MultilevelCoarser<Graph_t_in, Graph_t_out>> get_multilevel_coarser_by_name(
+    const ConfigParser &, const boost::property_tree::ptree &coarser_algorithm) {
     const std::string coarser_name = coarser_algorithm.get_child("name").get_value<std::string>();
 
     if (coarser_name == "Sarkar") {
@@ -219,23 +229,28 @@ get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree:
                 }
                 std::sort(ml_params.commCostVec.begin(), ml_params.commCostVec.end());
             }
-            ml_params.maxWeight =
-                params_pt.get_optional<v_workw_t<Graph_t_in>>("maxWeight").value_or(ml_params.maxWeight);
-            ml_params.smallWeightThreshold =
-                params_pt.get_optional<v_workw_t<Graph_t_in>>("smallWeightThreshold").value_or(ml_params.smallWeightThreshold);
-            ml_params.max_num_iteration_without_changes =
-                params_pt.get_optional<unsigned>("max_num_iteration_without_changes")
-                    .value_or(ml_params.max_num_iteration_without_changes);
+            ml_params.maxWeight = params_pt.get_optional<v_workw_t<Graph_t_in>>("maxWeight").value_or(ml_params.maxWeight);
+            ml_params.smallWeightThreshold
+                = params_pt.get_optional<v_workw_t<Graph_t_in>>("smallWeightThreshold").value_or(ml_params.smallWeightThreshold);
+            ml_params.max_num_iteration_without_changes = params_pt.get_optional<unsigned>("max_num_iteration_without_changes")
+                                                              .value_or(ml_params.max_num_iteration_without_changes);
 
             if (auto mode_str_opt = params_pt.get_optional<std::string>("buffer_merge_mode")) {
                 const std::string &mode_str = mode_str_opt.get();
-                if (mode_str == "OFF") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::OFF;
-                else if (mode_str == "FAN_IN") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_IN;
-                else if (mode_str == "FAN_OUT") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_OUT;
-                else if (mode_str == "HOMOGENEOUS") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::HOMOGENEOUS;
-                else if (mode_str == "FULL") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FULL;
-                else throw std::invalid_argument("Invalid Sarkar Buffer Merge mode: " + mode_str
-                    + "!\nChoose from: OFF, FAN_IN, FAN_OUT, HOMOGENEOUS, FULL.");
+                if (mode_str == "OFF") {
+                    ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::OFF;
+                } else if (mode_str == "FAN_IN") {
+                    ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_IN;
+                } else if (mode_str == "FAN_OUT") {
+                    ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_OUT;
+                } else if (mode_str == "HOMOGENEOUS") {
+                    ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::HOMOGENEOUS;
+                } else if (mode_str == "FULL") {
+                    ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FULL;
+                } else {
+                    throw std::invalid_argument("Invalid Sarkar Buffer Merge mode: " + mode_str
+                                                + "!\nChoose from: OFF, FAN_IN, FAN_OUT, HOMOGENEOUS, FULL.");
+                }
             }
         }
 
@@ -248,18 +263,16 @@ get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree:
 
         if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) {
             const auto &params_pt = params_opt.get();
-            params.geom_decay_num_nodes =
-                params_pt.get_optional<double>("geom_decay_num_nodes").value_or(params.geom_decay_num_nodes);
+            params.geom_decay_num_nodes
+                = params_pt.get_optional<double>("geom_decay_num_nodes").value_or(params.geom_decay_num_nodes);
             params.poisson_par = params_pt.get_optional<double>("poisson_par").value_or(params.poisson_par);
             params.noise = params_pt.get_optional<unsigned>("noise").value_or(params.noise);
-            params.num_rep_without_node_decrease =
-                params_pt.get_optional<unsigned>("num_rep_without_node_decrease")
-                    .value_or(params.num_rep_without_node_decrease);
-            params.temperature_multiplier =
-                params_pt.get_optional<double>("temperature_multiplier").value_or(params.temperature_multiplier);
-            params.number_of_temperature_increases =
-                params_pt.get_optional<unsigned>("number_of_temperature_increases")
-                    .value_or(params.number_of_temperature_increases);
+            params.num_rep_without_node_decrease
+                = params_pt.get_optional<unsigned>("num_rep_without_node_decrease").value_or(params.num_rep_without_node_decrease);
+            params.temperature_multiplier
+                = params_pt.get_optional<double>("temperature_multiplier").value_or(params.temperature_multiplier);
+            params.number_of_temperature_increases = params_pt.get_optional<unsigned>("number_of_temperature_increases")
+                                                         .value_or(params.number_of_temperature_increases);
 
             if (auto mode_str_opt = params_pt.get_optional<std::string>("mode")) {
                 if (mode_str_opt.get() == "EDGE_WEIGHT") {
@@ -268,7 +281,7 @@ get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree:
                     params.mode = SquashAParams::Mode::TRIANGLES;
                 } else {
                     throw std::invalid_argument("Invalid Squash mode: " + mode_str_opt.get()
-                    + "!\nChoose from: EDGE_WEIGHT, TRIANGLES.");
+                                                + "!\nChoose from: EDGE_WEIGHT, TRIANGLES.");
                 }
             }
 
@@ -282,4 +295,4 @@ get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree:
     throw std::invalid_argument("Invalid multilevel coarser name: " + coarser_name);
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp
index 369c9fa6..3ce4f75a 100644
--- a/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp
+++ b/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp
@@ -25,46 +25,44 @@ limitations under the License.
 #include <string>
 #include <tuple>
 
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp"
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "../ConfigParser.hpp"
 #include "osp/bsp/model/BspScheduleRecomp.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
 #include "run_bsp_scheduler.hpp"
 
-
 namespace osp {
 
 const std::set<std::string> get_available_bsp_recomp_scheduler_names() { return {"GreedyRecomputer"}; }
 
-template<typename Graph_t>
-RETURN_STATUS run_bsp_recomp_scheduler(const ConfigParser &parser, const boost::property_tree::ptree &algorithm,
-                                BspScheduleRecomp<Graph_t> &schedule) {
-
-    //const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value<unsigned>();
-    // const bool use_memory_constraint = parser.global_params.get_child("use_memory_constraints").get_value<bool>();
+template <typename Graph_t>
+RETURN_STATUS run_bsp_recomp_scheduler(const ConfigParser &parser,
+                                       const boost::property_tree::ptree &algorithm,
+                                       BspScheduleRecomp<Graph_t> &schedule) {
+    // const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value<unsigned>();
+    //  const bool use_memory_constraint = parser.global_params.get_child("use_memory_constraints").get_value<bool>();
 
     std::cout << "Running algorithm: " << algorithm.get_child("name").get_value<std::string>() << std::endl;
 
     if (algorithm.get_child("name").get_value<std::string>() == "GreedyRecomputer") {
-
         BspSchedule<Graph_t> bsp_schedule(schedule.getInstance());
 
         RETURN_STATUS status = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), bsp_schedule);
 
         BspScheduleCS<Graph_t> initial_schedule(std::move(bsp_schedule));
 
-        if (status == RETURN_STATUS::ERROR)    
+        if (status == RETURN_STATUS::ERROR) {
             return RETURN_STATUS::ERROR;
+        }
 
         GreedyRecomputer<Graph_t> scheduler;
 
         return scheduler.computeRecompSchedule(initial_schedule, schedule);
 
     } else {
-
         throw std::invalid_argument("Parameter error: Unknown algorithm.\n");
     }
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
index 08209efd..996faa08 100644
--- a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
+++ b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
@@ -50,18 +50,30 @@ limitations under the License.
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 
 #ifdef COPT
-#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp"
+#    include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp"
 // #include "osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp"
 #endif
 
 namespace osp {
 
 const std::set<std::string> get_available_bsp_scheduler_names() {
-    return {"Serial", "GreedyBsp", "GrowLocal", "BspLocking", "Cilk", "Etf", "GreedyRandom",
-            "GreedyChildren", "Variance", "MultiHC", "LocalSearch", "Coarser", "FullILP", "MultiLevel"};
+    return {"Serial",
+            "GreedyBsp",
+            "GrowLocal",
+            "BspLocking",
+            "Cilk",
+            "Etf",
+            "GreedyRandom",
+            "GreedyChildren",
+            "Variance",
+            "MultiHC",
+            "LocalSearch",
+            "Coarser",
+            "FullILP",
+            "MultiLevel"};
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::unique_ptr<ImprovementScheduler<Graph_t>> get_bsp_improver_by_name(const ConfigParser &,
                                                                         const boost::property_tree::ptree &algorithm) {
     const std::string improver_name = algorithm.get_child("name").get_value<std::string>();
@@ -77,10 +89,9 @@ std::unique_ptr<ImprovementScheduler<Graph_t>> get_bsp_improver_by_name(const Co
     throw std::invalid_argument("Invalid improver name: " + improver_name);
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::unique_ptr<Scheduler<Graph_t>> get_base_bsp_scheduler_by_name(const ConfigParser &parser,
                                                                    const boost::property_tree::ptree &algorithm) {
-
     const std::string id = algorithm.get_child("id").get_value<std::string>();
 
     if (id == "Serial") {
@@ -88,42 +99,38 @@ std::unique_ptr<Scheduler<Graph_t>> get_base_bsp_scheduler_by_name(const ConfigP
         return scheduler;
 
     } else if (id == "GreedyBsp") {
-        float max_percent_idle_processors =
-            algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value<float>();
-        bool increase_parallelism_in_new_superstep =
-            algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value<bool>();
-        auto scheduler = std::make_unique<GreedyBspScheduler<Graph_t>>(max_percent_idle_processors,
-                                                                       increase_parallelism_in_new_superstep);
+        float max_percent_idle_processors
+            = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value<float>();
+        bool increase_parallelism_in_new_superstep
+            = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value<bool>();
+        auto scheduler
+            = std::make_unique<GreedyBspScheduler<Graph_t>>(max_percent_idle_processors, increase_parallelism_in_new_superstep);
 
         return scheduler;
 
     } else if (id == "GrowLocal") {
         GrowLocalAutoCores_Params<v_workw_t<Graph_t>> params;
         params.minSuperstepSize = algorithm.get_child("parameters").get_child("minSuperstepSize").get_value<unsigned>();
-        params.syncCostMultiplierMinSuperstepWeight = algorithm.get_child("parameters")
-                                                          .get_child("syncCostMultiplierMinSuperstepWeight")
-                                                          .get_value<v_workw_t<Graph_t>>();
-        params.syncCostMultiplierParallelCheck = algorithm.get_child("parameters")
-                                                     .get_child("syncCostMultiplierParallelCheck")
-                                                     .get_value<v_workw_t<Graph_t>>();
+        params.syncCostMultiplierMinSuperstepWeight
+            = algorithm.get_child("parameters").get_child("syncCostMultiplierMinSuperstepWeight").get_value<v_workw_t<Graph_t>>();
+        params.syncCostMultiplierParallelCheck
+            = algorithm.get_child("parameters").get_child("syncCostMultiplierParallelCheck").get_value<v_workw_t<Graph_t>>();
 
         return std::make_unique<GrowLocalAutoCores<Graph_t>>(params);
 
     } else if (id == "BspLocking") {
-        float max_percent_idle_processors =
-            algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value<float>();
-        bool increase_parallelism_in_new_superstep =
-            algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value<bool>();
-        auto scheduler =
-            std::make_unique<BspLocking<Graph_t>>(max_percent_idle_processors, increase_parallelism_in_new_superstep);
+        float max_percent_idle_processors
+            = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value<float>();
+        bool increase_parallelism_in_new_superstep
+            = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value<bool>();
+        auto scheduler = std::make_unique<BspLocking<Graph_t>>(max_percent_idle_processors, increase_parallelism_in_new_superstep);
 
         return scheduler;
 
     } else if (id == "Cilk") {
         auto scheduler = std::make_unique<CilkScheduler<Graph_t>>();
-        algorithm.get_child("parameters").get_child("mode").get_value<std::string>() == "SJF"
-            ? scheduler->setMode(CilkMode::SJF)
-            : scheduler->setMode(CilkMode::CILK);
+        algorithm.get_child("parameters").get_child("mode").get_value<std::string>() == "SJF" ? scheduler->setMode(CilkMode::SJF)
+                                                                                              : scheduler->setMode(CilkMode::CILK);
         return scheduler;
 
     } else if (id == "Etf") {
@@ -142,12 +149,12 @@ std::unique_ptr<Scheduler<Graph_t>> get_base_bsp_scheduler_by_name(const ConfigP
         return scheduler;
 
     } else if (id == "Variance") {
-        float max_percent_idle_processors =
-            algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value<float>();
-        bool increase_parallelism_in_new_superstep =
-            algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value<bool>();
-        auto scheduler = std::make_unique<VarianceFillup<Graph_t>>(max_percent_idle_processors,
-                                                                   increase_parallelism_in_new_superstep);
+        float max_percent_idle_processors
+            = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value<float>();
+        bool increase_parallelism_in_new_superstep
+            = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value<bool>();
+        auto scheduler
+            = std::make_unique<VarianceFillup<Graph_t>>(max_percent_idle_processors, increase_parallelism_in_new_superstep);
 
         return scheduler;
     }
@@ -160,8 +167,7 @@ std::unique_ptr<Scheduler<Graph_t>> get_base_bsp_scheduler_by_name(const ConfigP
             unsigned step = algorithm.get_child("parameters").get_child("hill_climbing_steps").get_value<unsigned>();
             scheduler->setNumberOfHcSteps(step);
 
-            const double contraction_rate =
-                algorithm.get_child("parameters").get_child("contraction_rate").get_value<double>();
+            const double contraction_rate = algorithm.get_child("parameters").get_child("contraction_rate").get_value<double>();
             scheduler->setContractionRate(contraction_rate);
             scheduler->useLinearRefinementSteps(20U);
             scheduler->setMinTargetNrOfNodes(100U);
@@ -172,29 +178,28 @@ std::unique_ptr<Scheduler<Graph_t>> get_base_bsp_scheduler_by_name(const ConfigP
     throw std::invalid_argument("Invalid base scheduler name: " + id);
 }
 
-template<typename Graph_t>
-RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::property_tree::ptree &algorithm,
+template <typename Graph_t>
+RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser,
+                                const boost::property_tree::ptree &algorithm,
                                 BspSchedule<Graph_t> &schedule) {
-
-    using vertex_type_t_or_default =
-        std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
-    using edge_commw_t_or_default =
-        std::conditional_t<has_edge_weights_v<Graph_t>, e_commw_t<Graph_t>, v_commw_t<Graph_t>>;
-    using boost_graph_t = boost_graph<v_workw_t<Graph_t>, v_commw_t<Graph_t>, v_memw_t<Graph_t>,
-                                      vertex_type_t_or_default, edge_commw_t_or_default>;
+    using vertex_type_t_or_default
+        = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
+    using edge_commw_t_or_default = std::conditional_t<has_edge_weights_v<Graph_t>, e_commw_t<Graph_t>, v_commw_t<Graph_t>>;
+    using boost_graph_t
+        = boost_graph<v_workw_t<Graph_t>, v_commw_t<Graph_t>, v_memw_t<Graph_t>, vertex_type_t_or_default, edge_commw_t_or_default>;
 
     const std::string id = algorithm.get_child("id").get_value<std::string>();
 
     std::cout << "Running algorithm: " << id << std::endl;
 
     if (id == "LocalSearch") {
-        RETURN_STATUS status =
-            run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule);
-        if (status == RETURN_STATUS::ERROR)
+        RETURN_STATUS status = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule);
+        if (status == RETURN_STATUS::ERROR) {
             return RETURN_STATUS::ERROR;
+        }
 
-        std::unique_ptr<ImprovementScheduler<Graph_t>> improver =
-            get_bsp_improver_by_name<Graph_t>(parser, algorithm.get_child("parameters").get_child("improver"));
+        std::unique_ptr<ImprovementScheduler<Graph_t>> improver
+            = get_bsp_improver_by_name<Graph_t>(parser, algorithm.get_child("parameters").get_child("improver"));
         return improver->improveSchedule(schedule);
 #ifdef COPT
     } else if (id == "FullILP") {
@@ -207,8 +212,8 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert
 
         // initial solution
         if (algorithm.get_child("parameters").get_child("use_initial_solution").get_value<bool>()) {
-            std::string init_sched =
-                algorithm.get_child("parameters").get_child("initial_solution_scheduler").get_value<std::string>();
+            std::string init_sched
+                = algorithm.get_child("parameters").get_child("initial_solution_scheduler").get_value<std::string>();
             if (init_sched == "FullILP") {
                 throw std::invalid_argument("Parameter error: Initial solution cannot be FullILP.\n");
             }
@@ -228,48 +233,49 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert
         // intermediate solutions
         if (algorithm.get_child("parameters").get_child("write_intermediate_solutions").get_value<bool>()) {
             scheduler.enableWriteIntermediateSol(
-                algorithm.get_child("parameters")
-                    .get_child("intermediate_solutions_directory")
-                    .get_value<std::string>(),
+                algorithm.get_child("parameters").get_child("intermediate_solutions_directory").get_value<std::string>(),
                 algorithm.get_child("parameters").get_child("intermediate_solutions_prefix").get_value<std::string>());
         }
 
         return scheduler.computeScheduleWithTimeLimit(schedule, timeLimit);
 #endif
     } else if (id == "Coarser") {
-        std::unique_ptr<Coarser<Graph_t, boost_graph_t>> coarser =
-            get_coarser_by_name<Graph_t, boost_graph_t>(parser, algorithm.get_child("parameters").get_child("coarser"));
+        std::unique_ptr<Coarser<Graph_t, boost_graph_t>> coarser
+            = get_coarser_by_name<Graph_t, boost_graph_t>(parser, algorithm.get_child("parameters").get_child("coarser"));
         const auto &instance = schedule.getInstance();
         BspInstance<boost_graph_t> instance_coarse;
         std::vector<vertex_idx_t<boost_graph_t>> reverse_vertex_map;
-        bool status = coarser->coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(),
-                                          reverse_vertex_map);
-        if (!status)
+        bool status
+            = coarser->coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), reverse_vertex_map);
+        if (!status) {
             return RETURN_STATUS::ERROR;
+        }
 
         instance_coarse.getArchitecture() = instance.getArchitecture();
         instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
         BspSchedule<boost_graph_t> schedule_coarse(instance_coarse);
 
-        const auto status_coarse =
-            run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule_coarse);
-        if (status_coarse != RETURN_STATUS::OSP_SUCCESS and status_coarse != RETURN_STATUS::BEST_FOUND)
+        const auto status_coarse
+            = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule_coarse);
+        if (status_coarse != RETURN_STATUS::OSP_SUCCESS and status_coarse != RETURN_STATUS::BEST_FOUND) {
             return status_coarse;
+        }
 
         status = coarser_util::pull_back_schedule(schedule_coarse, reverse_vertex_map, schedule);
-        if (!status)
+        if (!status) {
             return RETURN_STATUS::ERROR;
+        }
 
         return RETURN_STATUS::OSP_SUCCESS;
 
     } else if (id == "MultiLevel") {
-        std::unique_ptr<MultilevelCoarser<Graph_t, boost_graph_t>> ml_coarser =
-            get_multilevel_coarser_by_name<Graph_t, boost_graph_t>(
-                parser, algorithm.get_child("parameters").get_child("coarser"));
-        std::unique_ptr<ImprovementScheduler<boost_graph_t>> improver =
-            get_bsp_improver_by_name<boost_graph_t>(parser, algorithm.get_child("parameters").get_child("improver"));
-        std::unique_ptr<Scheduler<boost_graph_t>> scheduler = get_base_bsp_scheduler_by_name<boost_graph_t>(
-            parser, algorithm.get_child("parameters").get_child("scheduler"));
+        std::unique_ptr<MultilevelCoarser<Graph_t, boost_graph_t>> ml_coarser
+            = get_multilevel_coarser_by_name<Graph_t, boost_graph_t>(parser,
+                                                                     algorithm.get_child("parameters").get_child("coarser"));
+        std::unique_ptr<ImprovementScheduler<boost_graph_t>> improver
+            = get_bsp_improver_by_name<boost_graph_t>(parser, algorithm.get_child("parameters").get_child("improver"));
+        std::unique_ptr<Scheduler<boost_graph_t>> scheduler
+            = get_base_bsp_scheduler_by_name<boost_graph_t>(parser, algorithm.get_child("parameters").get_child("scheduler"));
 
         MultilevelCoarseAndSchedule<Graph_t, boost_graph_t> coarse_and_schedule(*scheduler, *improver, *ml_coarser);
         return coarse_and_schedule.computeSchedule(schedule);
@@ -279,4 +285,4 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert
     }
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp b/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp
index 3dd05318..a03bcc67 100644
--- a/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp
+++ b/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp
@@ -32,21 +32,19 @@ namespace osp {
 
 const std::set<std::string> get_available_pebbler_names() { return {"Pebbler"}; }
 
-template<typename Graph_t>
-RETURN_STATUS run_pebbler(const ConfigParser &parser, const boost::property_tree::ptree &algorithm,
+template <typename Graph_t>
+RETURN_STATUS run_pebbler(const ConfigParser &parser,
+                          const boost::property_tree::ptree &algorithm,
                           PebblingSchedule<Graph_t> &schedule) {
-
     // const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value<unsigned>();
     //  const bool use_memory_constraint = parser.global_params.get_child("use_memory_constraints").get_value<bool>();
 
     std::cout << "Running algorithm: " << algorithm.get_child("name").get_value<std::string>() << std::endl;
 
     if (algorithm.get_child("name").get_value<std::string>() == "Pebbler") {
-
     } else {
-
         throw std::invalid_argument("Parameter error: Unknown algorithm.\n");
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/Balanced_Coin_Flips.hpp b/include/osp/auxiliary/Balanced_Coin_Flips.hpp
index 7443719b..44676800 100644
--- a/include/osp/auxiliary/Balanced_Coin_Flips.hpp
+++ b/include/osp/auxiliary/Balanced_Coin_Flips.hpp
@@ -126,8 +126,7 @@ class Biased_Random_with_side_bias : public BalancedCoinFlips {
             throw std::runtime_error("true_bias is too large!");
         }
 
-        unsigned die_size =
-            (side_ratio.first + side_ratio.second) * genuine_random_size + static_cast<unsigned>(abs_true_bias);
+        unsigned die_size = (side_ratio.first + side_ratio.second) * genuine_random_size + static_cast<unsigned>(abs_true_bias);
 
         if (die_size > static_cast<unsigned>(std::numeric_limits<int>::max())) {
             throw std::runtime_error("die_size is too large!");
@@ -164,4 +163,4 @@ class Biased_Random_with_side_bias : public BalancedCoinFlips {
     const std::pair<unsigned, unsigned> side_ratio;
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/datastructures/bit_mask.hpp b/include/osp/auxiliary/datastructures/bit_mask.hpp
index 225e16b8..a60c44b9 100644
--- a/include/osp/auxiliary/datastructures/bit_mask.hpp
+++ b/include/osp/auxiliary/datastructures/bit_mask.hpp
@@ -25,9 +25,9 @@ namespace osp {
 struct BitMask {
     std::vector<bool> mask;
 
-    BitMask(std::size_t n) : mask(std::vector<bool>(n, false)) { };
+    BitMask(std::size_t n) : mask(std::vector<bool>(n, false)) {};
 
-    BitMask& operator++() {
+    BitMask &operator++() {
         for (std::size_t i = 0; i < mask.size(); ++i) {
             if (mask[i]) {
                 mask[i] = false;
@@ -46,4 +46,4 @@ struct BitMask {
     };
 };
 
-} // end namespace osp
\ No newline at end of file
+}    // end namespace osp
diff --git a/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp b/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp
index 2f73f6e0..212c28ab 100644
--- a/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp
+++ b/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp
@@ -39,7 +39,8 @@ namespace osp {
  * @tparam D The number of children for each node (the 'd' in d-ary). Must be >= 2.
  * @tparam Compare The comparison function object type.
  */
-template <typename Key, typename Value, unsigned int D, typename Compare> class DaryHeap {
+template <typename Key, typename Value, unsigned int D, typename Compare>
+class DaryHeap {
     static_assert(D >= 2, "D-ary heap must have at least 2 children per node.");
 
   private:
@@ -169,8 +170,9 @@ template <typename Key, typename Value, unsigned int D, typename Compare> class
     }
 
     void sift_up(size_t index) {
-        if (index == 0)
+        if (index == 0) {
             return;
+        }
 
         Key key_to_sift = std::move(heap[index]);
         const Value &value_to_sift = node_info.at(key_to_sift).value;
@@ -238,4 +240,4 @@ using MaxIndexedHeap = IndexedHeap<Key, Value, std::greater<Value>>;
 template <typename Key, typename Value>
 using MinIndexedHeap = IndexedHeap<Key, Value, std::less<Value>>;
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp b/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp
index 39bb79f4..fe505ab1 100644
--- a/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp
+++ b/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp
@@ -18,22 +18,23 @@ limitations under the License.
 
 #pragma once
 
-#include <functional>
 #include <algorithm>
+#include <functional>
 #include <stdexcept>
 #include <unordered_map>
 #include <vector>
 
 namespace osp {
 
-template <typename Key, typename Value, typename Compare> class PairingHeap {
+template <typename Key, typename Value, typename Compare>
+class PairingHeap {
   private:
     struct Node {
         Key key;
         Value value;
-        Node *child = nullptr;          // Leftmost child
-        Node *next_sibling = nullptr;   // Sibling to the right
-        Node *prev_or_parent = nullptr; // If leftmost child, parent; otherwise, left sibling.
+        Node *child = nullptr;             // Leftmost child
+        Node *next_sibling = nullptr;      // Sibling to the right
+        Node *prev_or_parent = nullptr;    // If leftmost child, parent; otherwise, left sibling.
     };
 
     Node *root = nullptr;
@@ -43,10 +44,12 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
 
     // Melds two heaps together.
     Node *meld(Node *heap1, Node *heap2) {
-        if (!heap1)
+        if (!heap1) {
             return heap2;
-        if (!heap2)
+        }
+        if (!heap2) {
             return heap1;
+        }
 
         if (comp(heap2->value, heap1->value)) {
             std::swap(heap1, heap2);
@@ -108,9 +111,9 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
             return;
         }
 
-        if (node->prev_or_parent->child == node) { // is leftmost child
+        if (node->prev_or_parent->child == node) {    // is leftmost child
             node->prev_or_parent->child = node->next_sibling;
-        } else { // is not leftmost child
+        } else {    // is not leftmost child
             node->prev_or_parent->next_sibling = node->next_sibling;
         }
         if (node->next_sibling) {
@@ -122,18 +125,19 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
 
   public:
     PairingHeap() = default;
+
     ~PairingHeap() { clear(); }
-    
-    PairingHeap(const PairingHeap& other) : num_elements(other.num_elements), comp(other.comp) {
+
+    PairingHeap(const PairingHeap &other) : num_elements(other.num_elements), comp(other.comp) {
         root = nullptr;
         if (!other.root) {
             return;
         }
 
-        std::unordered_map<const Node*, Node*> old_to_new;
-        std::vector<const Node*> q;
+        std::unordered_map<const Node *, Node *> old_to_new;
+        std::vector<const Node *> q;
         q.reserve(other.num_elements);
-        
+
         // Create root
         root = new Node{other.root->key, other.root->value};
         node_map[root->key] = root;
@@ -141,15 +145,15 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
         q.push_back(other.root);
 
         size_t head = 0;
-        while(head < q.size()) {
-            const Node* old_parent = q[head++];
-            Node* new_parent = old_to_new[old_parent];
+        while (head < q.size()) {
+            const Node *old_parent = q[head++];
+            Node *new_parent = old_to_new[old_parent];
 
             if (old_parent->child) {
-                const Node* old_child = old_parent->child;
-                
+                const Node *old_child = old_parent->child;
+
                 // First child
-                Node* new_child = new Node{old_child->key, old_child->value};
+                Node *new_child = new Node{old_child->key, old_child->value};
                 new_parent->child = new_child;
                 new_child->prev_or_parent = new_parent;
                 node_map[new_child->key] = new_child;
@@ -157,11 +161,11 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
                 q.push_back(old_child);
 
                 // Siblings
-                Node* prev_new_sibling = new_child;
-                while(old_child->next_sibling) {
+                Node *prev_new_sibling = new_child;
+                while (old_child->next_sibling) {
                     old_child = old_child->next_sibling;
                     new_child = new Node{old_child->key, old_child->value};
-                    
+
                     prev_new_sibling->next_sibling = new_child;
                     new_child->prev_or_parent = prev_new_sibling;
 
@@ -175,7 +179,7 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
         }
     }
 
-    PairingHeap& operator=(const PairingHeap& other) {
+    PairingHeap &operator=(const PairingHeap &other) {
         if (this != &other) {
             PairingHeap temp(other);
             std::swap(root, temp.root);
@@ -205,7 +209,7 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
         const auto pair = node_map.emplace(key, new_node);
         const bool &success = pair.second;
         if (!success) {
-            delete new_node; // Avoid memory leak if key already exists
+            delete new_node;    // Avoid memory leak if key already exists
             throw std::invalid_argument("Key already exists in the heap.");
         }
         root = meld(root, new_node);
@@ -248,13 +252,13 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
         Node *node = it->second;
         const Value old_value = node->value;
 
-        if (comp(new_value, old_value)) { // Decrease key
+        if (comp(new_value, old_value)) {    // Decrease key
             node->value = new_value;
             if (node != root) {
                 cut(node);
                 root = meld(root, node);
             }
-        } else if (comp(old_value, new_value)) { // Increase key
+        } else if (comp(old_value, new_value)) {    // Increase key
             node->value = new_value;
             if (node != root) {
                 cut(node);
@@ -354,13 +358,13 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
             top_keys.reserve(limit);
         }
 
-        const Value& top_value = root->value;
-        std::vector<const Node*> q;
+        const Value &top_value = root->value;
+        std::vector<const Node *> q;
         q.push_back(root);
         size_t head = 0;
 
         while (head < q.size()) {
-            const Node* current = q[head++];
+            const Node *current = q[head++];
 
             if (comp(top_value, current->value)) {
                 continue;
@@ -371,7 +375,7 @@ template <typename Key, typename Value, typename Compare> class PairingHeap {
                 return top_keys;
             }
 
-            Node* child = current->child;
+            Node *child = current->child;
             while (child) {
                 q.push_back(child);
                 child = child->next_sibling;
@@ -387,4 +391,4 @@ using MaxPairingHeap = PairingHeap<Key, Value, std::greater<Value>>;
 template <typename Key, typename Value>
 using MinPairingHeap = PairingHeap<Key, Value, std::less<Value>>;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/datastructures/union_find.hpp b/include/osp/auxiliary/datastructures/union_find.hpp
index 559d5b6b..f955eddc 100644
--- a/include/osp/auxiliary/datastructures/union_find.hpp
+++ b/include/osp/auxiliary/datastructures/union_find.hpp
@@ -29,9 +29,9 @@ limitations under the License.
 namespace osp {
 
 /// @brief Structure to execute a union-find algorithm
-template<typename T, typename index_t, typename workw_t, typename memw_t>
+template <typename T, typename index_t, typename workw_t, typename memw_t>
 struct union_find_object {
-    const T name; // unique identifier
+    const T name;    // unique identifier
     index_t parent_index;
     unsigned rank;
     workw_t weight;
@@ -43,11 +43,11 @@ struct union_find_object {
     }
 
     union_find_object(const union_find_object &other) = default;
-    union_find_object& operator=(const union_find_object &other) = default;
+    union_find_object &operator=(const union_find_object &other) = default;
 };
 
 /// @brief Class to execute a union-find algorithm
-template<typename T, typename index_t, typename workw_t, typename memw_t>
+template <typename T, typename index_t, typename workw_t, typename memw_t>
 class Union_Find_Universe {
   private:
     std::vector<union_find_object<T, index_t, workw_t, memw_t>> universe;
@@ -91,7 +91,6 @@ class Union_Find_Universe {
     index_t get_index_from_name(const T &name) const { return names_to_indices.at(name); }
 
   public:
-
     void reset() {
         universe.clear();
         names_to_indices.clear();
@@ -108,9 +107,7 @@ class Union_Find_Universe {
     /// @brief Joins two components
     /// @param name of object to join
     /// @param other_name of object to join
-    void join_by_name(const T &name, const T &other_name) {
-        join(names_to_indices.at(name), names_to_indices.at(other_name));
-    }
+    void join_by_name(const T &name, const T &other_name) { join(names_to_indices.at(name), names_to_indices.at(other_name)); }
 
     /// @brief Retrieves the current number of connected components
     std::size_t get_number_of_connected_components() const { return component_indices.size(); }
@@ -140,8 +137,7 @@ class Union_Find_Universe {
         std::vector<std::tuple<T, workw_t, memw_t>> component_names_weights_and_memory;
         component_names_weights_and_memory.reserve(component_indices.size());
         for (auto &indx : component_indices) {
-            component_names_weights_and_memory.emplace_back(
-                {universe[indx].name, universe[indx].weight, universe[indx].memory});
+            component_names_weights_and_memory.emplace_back({universe[indx].name, universe[indx].weight, universe[indx].memory});
         }
         return component_names_weights_and_memory;
     }
@@ -342,8 +338,7 @@ class Union_Find_Universe {
     /// @param names of objects
     /// @param weights of objects
     /// @param memories of objects
-    void add_object(const std::vector<T> &names, const std::vector<unsigned> &weights,
-                    const std::vector<memw_t> &memories) {
+    void add_object(const std::vector<T> &names, const std::vector<unsigned> &weights, const std::vector<memw_t> &memories) {
         if (names.size() != weights.size()) {
             throw std::runtime_error("Vectors of names and weights must be of equal length.");
         }
@@ -379,25 +374,23 @@ class Union_Find_Universe {
     /// @brief Initiates a union-find structure
     /// @param names of objects
     /// @param weights of objects
-    explicit Union_Find_Universe(const std::vector<T> &names, const std::vector<workw_t> &weights) {
-        add_object(names, weights);
-    }
+    explicit Union_Find_Universe(const std::vector<T> &names, const std::vector<workw_t> &weights) { add_object(names, weights); }
 
     /// @brief Initiates a union-find structure
     /// @param names of objects
     /// @param weights of objects
-    explicit Union_Find_Universe(const std::vector<T> &names, const std::vector<workw_t> &weights,
+    explicit Union_Find_Universe(const std::vector<T> &names,
+                                 const std::vector<workw_t> &weights,
                                  const std::vector<memw_t> &memories) {
         add_object(names, weights, memories);
     }
 
     Union_Find_Universe(const Union_Find_Universe &other) = default;
-    Union_Find_Universe& operator=(const Union_Find_Universe &other) = default;
+    Union_Find_Universe &operator=(const Union_Find_Universe &other) = default;
 };
 
-template<typename Graph_t>
-using union_find_universe_t = Union_Find_Universe<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t>, v_workw_t<Graph_t>,
-                                               v_memw_t<Graph_t>>;
-
+template <typename Graph_t>
+using union_find_universe_t
+    = Union_Find_Universe<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t>, v_workw_t<Graph_t>, v_memw_t<Graph_t>>;
 
-} // namespace osp  
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/hash_util.hpp b/include/osp/auxiliary/hash_util.hpp
index 80699cbc..825e5ba5 100644
--- a/include/osp/auxiliary/hash_util.hpp
+++ b/include/osp/auxiliary/hash_util.hpp
@@ -22,29 +22,32 @@ limitations under the License.
 
 namespace osp {
 
-template<typename VertexType, std::size_t defautlVal = 11U>
+template <typename VertexType, std::size_t defautlVal = 11U>
 struct uniform_node_hash_func {
     using result_type = std::size_t;
-    result_type operator()(const VertexType& ) { return defautlVal; }
-};
 
+    result_type operator()(const VertexType &) { return defautlVal; }
+};
 
-template<typename VertexType>
+template <typename VertexType>
 struct vector_node_hash_func {
-    const std::vector<std::size_t>& node_hashes_;
-    vector_node_hash_func(const std::vector<std::size_t>& node_hashes) : node_hashes_(node_hashes) {}
+    const std::vector<std::size_t> &node_hashes_;
+
+    vector_node_hash_func(const std::vector<std::size_t> &node_hashes) : node_hashes_(node_hashes) {}
+
     using result_type = std::size_t;
-    result_type operator()(const VertexType& v) const { return node_hashes_[v]; }
+
+    result_type operator()(const VertexType &v) const { return node_hashes_[v]; }
 };
 
-template<class T>
+template <class T>
 void hash_combine(std::size_t &seed, const T &v) {
     std::hash<T> hasher;
     seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 }
 
 struct pair_hash {
-    template<class T1, class T2>
+    template <class T1, class T2>
     std::size_t operator()(const std::pair<T1, T2> &p) const {
         std::size_t h1 = std::hash<T1>{}(p.first);
 
@@ -57,4 +60,4 @@ struct pair_hash {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/DotFileWriter.hpp b/include/osp/auxiliary/io/DotFileWriter.hpp
index 907fcb06..ef75c39f 100644
--- a/include/osp/auxiliary/io/DotFileWriter.hpp
+++ b/include/osp/auxiliary/io/DotFileWriter.hpp
@@ -18,19 +18,19 @@ limitations under the License.
 
 #pragma once
 
+#include <fstream>
+#include <string>
+
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/BspScheduleRecomp.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 
-#include <fstream>
-#include <string>
-
 namespace osp {
 
 class DotFileWriter {
   private:
-    template<typename Graph_t>
+    template <typename Graph_t>
 
     struct EdgeWriter_DOT {
         const Graph_t &graph;
@@ -44,9 +44,8 @@ class DotFileWriter {
         }
     };
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     struct VertexWriterSchedule_DOT {
-
         const BspSchedule<Graph_t> &schedule;
 
         VertexWriterSchedule_DOT(const BspSchedule<Graph_t> &schedule_) : schedule(schedule_) {}
@@ -58,20 +57,17 @@ class DotFileWriter {
                 << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";";
 
             if constexpr (has_typed_vertices_v<Graph_t>) {
-
                 out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";";
             }
 
-            out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\""
-                << schedule.assignedSuperstep(i) << "\";";
+            out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\"" << schedule.assignedSuperstep(i) << "\";";
 
             out << "]";
         }
     };
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     struct VertexWriterScheduleRecomp_DOT {
-
         const BspScheduleRecomp<Graph_t> &schedule;
 
         VertexWriterScheduleRecomp_DOT(const BspScheduleRecomp<Graph_t> &schedule_) : schedule(schedule_) {}
@@ -83,7 +79,6 @@ class DotFileWriter {
                 << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";";
 
             if constexpr (has_typed_vertices_v<Graph_t>) {
-
                 out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";";
             }
 
@@ -101,9 +96,7 @@ class DotFileWriter {
             bool found = false;
 
             for (const auto &[key, val] : schedule.getCommunicationSchedule()) {
-
                 if (std::get<0>(key) == i) {
-
                     if (!found) {
                         out << "cs=\"[";
                         found = true;
@@ -123,7 +116,7 @@ class DotFileWriter {
         }
     };
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     struct VertexWriterDuplicateRecompSchedule_DOT {
         const Graph_t &graph;
         const std::vector<std::string> name;
@@ -136,21 +129,18 @@ class DotFileWriter {
                                                 std::vector<unsigned> &node_to_superstep_)
             : graph(graph_), name(name_), node_to_proc(node_to_proc_), node_to_superstep(node_to_superstep_) {}
 
-        template<class VertexOrEdge>
+        template <class VertexOrEdge>
         void operator()(std::ostream &out, const VertexOrEdge &i) const {
-            out << i << " [" << "label=\"" << name[i] << "\";" << "work_weight=\""
-                << graph.vertex_work_weight(i) << "\";" << "comm_weight=\""
-                << graph.vertex_comm_weight(i) << "\";" << "mem_weight=\""
-                << graph.vertex_mem_weight(i) << "\";" << "proc=\""
-                << node_to_proc[i] << "\";" << "superstep=\"" << node_to_superstep[i] << "\";";
+            out << i << " [" << "label=\"" << name[i] << "\";" << "work_weight=\"" << graph.vertex_work_weight(i) << "\";"
+                << "comm_weight=\"" << graph.vertex_comm_weight(i) << "\";" << "mem_weight=\"" << graph.vertex_mem_weight(i)
+                << "\";" << "proc=\"" << node_to_proc[i] << "\";" << "superstep=\"" << node_to_superstep[i] << "\";";
 
             out << "]";
         }
     };
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     struct VertexWriterScheduleCS_DOT {
-
         const BspScheduleCS<Graph_t> &schedule;
 
         VertexWriterScheduleCS_DOT(const BspScheduleCS<Graph_t> &schedule_) : schedule(schedule_) {}
@@ -162,19 +152,15 @@ class DotFileWriter {
                 << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";";
 
             if constexpr (has_typed_vertices_v<Graph_t>) {
-
                 out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";";
             }
 
-            out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\""
-                << schedule.assignedSuperstep(i) << "\";";
+            out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\"" << schedule.assignedSuperstep(i) << "\";";
 
             bool found = false;
 
             for (const auto &[key, val] : schedule.getCommunicationSchedule()) {
-
                 if (std::get<0>(key) == i) {
-
                     if (!found) {
                         out << "cs=\"[";
                         found = true;
@@ -194,10 +180,8 @@ class DotFileWriter {
         }
     };
 
-
-    template<typename Graph_t>
+    template <typename Graph_t>
     struct VertexWriterGraph_DOT {
-
         const Graph_t &graph;
 
         VertexWriterGraph_DOT(const Graph_t &graph_) : graph(graph_) {}
@@ -209,7 +193,6 @@ class DotFileWriter {
                 << "mem_weight=\"" << graph.vertex_mem_weight(i) << "\";";
 
             if constexpr (has_typed_vertices_v<Graph_t>) {
-
                 out << "type=\"" << graph.vertex_type(i) << "\";";
             }
 
@@ -217,60 +200,62 @@ class DotFileWriter {
         }
     };
 
-    template<typename Graph_t, typename color_container_t>
+    template <typename Graph_t, typename color_container_t>
     struct ColoredVertexWriterGraph_DOT {
-
         const Graph_t &graph;
         const color_container_t &colors;
         std::vector<std::string> color_strings;
         std::vector<std::string> shape_strings;
 
         ColoredVertexWriterGraph_DOT(const Graph_t &graph_, const color_container_t &colors_) : graph(graph_), colors(colors_) {
-
-           color_strings = {
-                "lightcoral", "palegreen", "lightblue", "gold", "orchid", "sandybrown", "aquamarine", "burlywood",
-                "hotpink", "yellowgreen", "skyblue", "khaki", "violet", "salmon", "turquoise", "tan",
-                "deeppink", "chartreuse", "deepskyblue", "lemonchiffon", "magenta", "orangered", "cyan", "wheat",
-                "mediumvioletred", "limegreen", "dodgerblue", "lightyellow", "darkviolet", "tomato", "paleturquoise", "bisque",
-                "crimson", "lime", "steelblue", "papayawhip", "purple", "darkorange", "cadetblue", "peachpuff",
-                "indianred", "springgreen", "powderblue", "cornsilk", "mediumorchid", "chocolate", "darkturquoise", "navajowhite",
-                "firebrick", "seagreen", "royalblue", "lightgoldenrodyellow", "darkmagenta", "coral", "teal", "moccasin",
-                "maroon", "forestgreen", "blue", "yellow", "darkorchid", "red", "green", "navy",
-                "darkred", "darkgreen", "mediumblue", "ivory", "indigo", "orange", "darkcyan", "antiquewhite"
-            };
-                
-            shape_strings = {
-                 "oval", "rect", "hexagon", "parallelogram"
-            };
+            color_strings = {"lightcoral",      "palegreen",   "lightblue",     "gold",
+                             "orchid",          "sandybrown",  "aquamarine",    "burlywood",
+                             "hotpink",         "yellowgreen", "skyblue",       "khaki",
+                             "violet",          "salmon",      "turquoise",     "tan",
+                             "deeppink",        "chartreuse",  "deepskyblue",   "lemonchiffon",
+                             "magenta",         "orangered",   "cyan",          "wheat",
+                             "mediumvioletred", "limegreen",   "dodgerblue",    "lightyellow",
+                             "darkviolet",      "tomato",      "paleturquoise", "bisque",
+                             "crimson",         "lime",        "steelblue",     "papayawhip",
+                             "purple",          "darkorange",  "cadetblue",     "peachpuff",
+                             "indianred",       "springgreen", "powderblue",    "cornsilk",
+                             "mediumorchid",    "chocolate",   "darkturquoise", "navajowhite",
+                             "firebrick",       "seagreen",    "royalblue",     "lightgoldenrodyellow",
+                             "darkmagenta",     "coral",       "teal",          "moccasin",
+                             "maroon",          "forestgreen", "blue",          "yellow",
+                             "darkorchid",      "red",         "green",         "navy",
+                             "darkred",         "darkgreen",   "mediumblue",    "ivory",
+                             "indigo",          "orange",      "darkcyan",      "antiquewhite"};
+
+            shape_strings = {"oval", "rect", "hexagon", "parallelogram"};
         }
 
         void operator()(std::ostream &out, const vertex_idx_t<Graph_t> &i) const {
-
             if (i >= static_cast<vertex_idx_t<Graph_t>>(colors.size())) {
-                 // Fallback for safety: print without color if colors vector is mismatched or palette is empty.
-                 out << i << " [";
+                // Fallback for safety: print without color if colors vector is mismatched or palette is empty.
+                out << i << " [";
             } else {
-                 // Use modulo operator to cycle through the fixed palette if there are more color
-                 // groups than available colors.
-                 const std::string& color = color_strings[colors[i] % color_strings.size()];
-                 out << i << " [style=filled;fillcolor=" << color << ";";
+                // Use modulo operator to cycle through the fixed palette if there are more color
+                // groups than available colors.
+                const std::string &color = color_strings[colors[i] % color_strings.size()];
+                out << i << " [style=filled;fillcolor=" << color << ";";
             }
-          
+
             out << "work_weight=\"" << graph.vertex_work_weight(i) << "\";"
                 << "comm_weight=\"" << graph.vertex_comm_weight(i) << "\";"
                 << "mem_weight=\"" << graph.vertex_mem_weight(i) << "\";";
 
             if constexpr (has_typed_vertices_v<Graph_t>) {
-                out << "type=\"" << graph.vertex_type(i) << "\";shape=\"" << shape_strings[graph.vertex_type(i) % shape_strings.size()] << "\";"; 
+                out << "type=\"" << graph.vertex_type(i) << "\";shape=\""
+                    << shape_strings[graph.vertex_type(i) % shape_strings.size()] << "\";";
             }
 
             out << "]";
         }
     };
 
-    template<typename Graph_t, typename vertex_writer_t>
+    template <typename Graph_t, typename vertex_writer_t>
     void write_graph_structure(std::ostream &os, const Graph_t &graph, const vertex_writer_t &vertex_writer) const {
-
         os << "digraph G {\n";
         for (const auto &v : graph.vertices()) {
             vertex_writer(os, v);
@@ -286,7 +271,6 @@ class DotFileWriter {
             }
 
         } else {
-
             for (const auto &v : graph.vertices()) {
                 for (const auto &child : graph.children(v)) {
                     os << v << "->" << child << "\n";
@@ -314,11 +298,9 @@ class DotFileWriter {
      *
      * @param os The output stream to write the DOT representation of the computational DAG.
      */
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_schedule(std::ostream &os, const BspSchedule<Graph_t> &schedule) const {
-
-        write_graph_structure(os, schedule.getInstance().getComputationalDag(),
-                              VertexWriterSchedule_DOT<Graph_t>(schedule));
+        write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterSchedule_DOT<Graph_t>(schedule));
     }
 
     /**
@@ -335,41 +317,36 @@ class DotFileWriter {
      *
      * @param filename The name of the file to write the DOT representation of the computational DAG.
      */
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_schedule(const std::string &filename, const BspSchedule<Graph_t> &schedule) const {
         std::ofstream os(filename);
         write_schedule(os, schedule);
     }
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_schedule_cs(std::ostream &os, const BspScheduleCS<Graph_t> &schedule) const {
-
-        write_graph_structure(os, schedule.getInstance().getComputationalDag(),
-                              VertexWriterScheduleCS_DOT<Graph_t>(schedule));
+        write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterScheduleCS_DOT<Graph_t>(schedule));
     }
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_schedule_cs(const std::string &filename, const BspScheduleCS<Graph_t> &schedule) const {
         std::ofstream os(filename);
         write_schedule_cs(os, schedule);
     }
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_schedule_recomp(std::ostream &os, const BspScheduleRecomp<Graph_t> &schedule) const {
-
-        write_graph_structure(os, schedule.getInstance().getComputationalDag(),
-                              VertexWriterScheduleRecomp_DOT<Graph_t>(schedule));
+        write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterScheduleRecomp_DOT<Graph_t>(schedule));
     }
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_schedule_recomp(const std::string &filename, const BspScheduleRecomp<Graph_t> &schedule) const {
         std::ofstream os(filename);
         write_schedule_recomp(os, schedule);
     }
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_schedule_recomp_duplicate(std::ostream &os, const BspScheduleRecomp<Graph_t> &schedule) const {
-
         const auto &g = schedule.getInstance().getComputationalDag();
 
         using VertexType = vertex_idx_t<Graph_t>;
@@ -380,11 +357,12 @@ class DotFileWriter {
 
         std::unordered_map<VertexType, std::vector<size_t>> vertex_to_idx;
 
-        using vertex_type_t_or_default = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
+        using vertex_type_t_or_default
+            = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
         using edge_commw_t_or_default = std::conditional_t<has_edge_weights_v<Graph_t>, e_commw_t<Graph_t>, v_commw_t<Graph_t>>;
 
-        using cdag_vertex_impl_t = cdag_vertex_impl<vertex_idx_t<Graph_t>, v_workw_t<Graph_t>, v_commw_t<Graph_t>,
-                                                    v_memw_t<Graph_t>, vertex_type_t_or_default>;
+        using cdag_vertex_impl_t
+            = cdag_vertex_impl<vertex_idx_t<Graph_t>, v_workw_t<Graph_t>, v_commw_t<Graph_t>, v_memw_t<Graph_t>, vertex_type_t_or_default>;
         using cdag_edge_impl_t = cdag_edge_impl<edge_commw_t_or_default>;
 
         using graph_t = computational_dag_edge_idx_vector_impl<cdag_vertex_impl_t, cdag_edge_impl_t>;
@@ -394,11 +372,9 @@ class DotFileWriter {
         size_t idx_new = 0;
 
         for (const auto &node : g.vertices()) {
-
             if (schedule.assignments(node).size() == 1) {
-
-                g2.add_vertex(g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node),
-                              g.vertex_type(node));
+                g2.add_vertex(
+                    g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node), g.vertex_type(node));
 
                 names[idx_new] = std::to_string(node);
                 node_to_proc[idx_new] = schedule.assignments(node)[0].first;
@@ -408,12 +384,10 @@ class DotFileWriter {
                 idx_new++;
 
             } else {
-
                 std::vector<size_t> idxs;
                 for (unsigned i = 0; i < schedule.assignments(node).size(); ++i) {
-
-                    g2.add_vertex(g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node),
-                                  g.vertex_type(node));
+                    g2.add_vertex(
+                        g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node), g.vertex_type(node));
 
                     names[idx_new] = std::to_string(node).append("_").append(std::to_string(i));
                     node_to_proc[idx_new] = schedule.assignments(node)[i].first;
@@ -426,34 +400,25 @@ class DotFileWriter {
         }
 
         for (const auto &[key, val] : vertex_to_idx) {
-
             if (val.size() == 1) {
-
                 for (const auto &target : g.children(key)) {
-
                     for (const auto &new_node_target : vertex_to_idx[target]) {
                         g2.add_edge(val[0], new_node_target);
                     }
                 }
 
             } else {
-
                 std::unordered_set<unsigned> assigned_processors;
 
                 for (const auto &assignment : schedule.assignments(key)) {
-
                     assigned_processors.insert(assignment.first);
                 }
 
                 for (unsigned i = 0; i < val.size(); i++) {
-
                     for (const auto &target : g.children(key)) {
-
                         for (size_t j = 0; j < vertex_to_idx[target].size(); j++) {
-
-                            if (assigned_processors.find(node_to_proc[vertex_to_idx[target][j]]) ==
-                                    assigned_processors.end() ||
-                                node_to_proc[val[i]] == node_to_proc[vertex_to_idx[target][j]]) {
+                            if (assigned_processors.find(node_to_proc[vertex_to_idx[target][j]]) == assigned_processors.end()
+                                || node_to_proc[val[i]] == node_to_proc[vertex_to_idx[target][j]]) {
                                 g2.add_edge(val[i], vertex_to_idx[target][j]);
                             }
                         }
@@ -462,45 +427,39 @@ class DotFileWriter {
             }
         }
 
-        write_graph_structure(
-            os, g2, VertexWriterDuplicateRecompSchedule_DOT<graph_t>(g2, names, node_to_proc, node_to_superstep));
+        write_graph_structure(os, g2, VertexWriterDuplicateRecompSchedule_DOT<graph_t>(g2, names, node_to_proc, node_to_superstep));
     }
 
-    template<typename Graph_t>
-    void write_schedule_recomp_duplicate(const std::string &filename,
-                                         const BspScheduleRecomp<Graph_t> &schedule) const {
+    template <typename Graph_t>
+    void write_schedule_recomp_duplicate(const std::string &filename, const BspScheduleRecomp<Graph_t> &schedule) const {
         std::ofstream os(filename);
         write_schedule_recomp_duplicate(os, schedule);
     }
 
-    template<typename Graph_t, typename color_container_t>
-    void write_colored_graph(std::ostream &os, const Graph_t &graph, const color_container_t & colors) const {
-
+    template <typename Graph_t, typename color_container_t>
+    void write_colored_graph(std::ostream &os, const Graph_t &graph, const color_container_t &colors) const {
         static_assert(is_computational_dag_v<Graph_t>, "Graph_t must be a computational DAG");
 
         write_graph_structure(os, graph, ColoredVertexWriterGraph_DOT<Graph_t, color_container_t>(graph, colors));
     }
 
-    template<typename Graph_t, typename color_container_t>
-    void write_colored_graph(const std::string &filename, const Graph_t &graph, const color_container_t & colors) const {
-
+    template <typename Graph_t, typename color_container_t>
+    void write_colored_graph(const std::string &filename, const Graph_t &graph, const color_container_t &colors) const {
         static_assert(is_computational_dag_v<Graph_t>, "Graph_t must be a computational DAG");
 
         std::ofstream os(filename);
         write_colored_graph(os, graph, colors);
     }
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_graph(std::ostream &os, const Graph_t &graph) const {
-
         static_assert(is_computational_dag_v<Graph_t>, "Graph_t must be a computational DAG");
 
         write_graph_structure(os, graph, VertexWriterGraph_DOT<Graph_t>(graph));
     }
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     void write_graph(const std::string &filename, const Graph_t &graph) const {
-
         static_assert(is_computational_dag_v<Graph_t>, "Graph_t must be a computational DAG");
 
         std::ofstream os(filename);
@@ -508,4 +467,4 @@ class DotFileWriter {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/arch_file_reader.hpp b/include/osp/auxiliary/io/arch_file_reader.hpp
index 71b0f006..68a269c2 100644
--- a/include/osp/auxiliary/io/arch_file_reader.hpp
+++ b/include/osp/auxiliary/io/arch_file_reader.hpp
@@ -18,21 +18,24 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/bsp/model/BspArchitecture.hpp"
 #include <fstream>
 #include <iostream>
 #include <sstream>
 
-namespace osp { namespace file_reader {
+#include "osp/bsp/model/BspArchitecture.hpp"
+
+namespace osp {
+namespace file_reader {
 
-template<typename Graph_t>
+template <typename Graph_t>
 bool readBspArchitecture(std::ifstream &infile, BspArchitecture<Graph_t> &architecture) {
     std::string line;
 
     // Skip comment lines
     while (std::getline(infile, line)) {
-        if (!line.empty() && line[0] != '%')
+        if (!line.empty() && line[0] != '%') {
             break;
+        }
     }
 
     // Parse architecture parameters
@@ -49,7 +52,7 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture<Graph_t> &archit
 
     // Try to read optional mem_type and M
     if (!(iss >> mem_type >> M)) {
-        mem_type = -1; // Memory info not present
+        mem_type = -1;    // Memory info not present
     }
 
     architecture.setNumberOfProcessors(p);
@@ -59,24 +62,24 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture<Graph_t> &archit
     if (0 <= mem_type && mem_type <= 3) {
         using memw_t = v_memw_t<Graph_t>;
         switch (mem_type) {
-        case 0:
-            architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::NONE);
-            break;
-        case 1:
-            architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL);
-            architecture.setMemoryBound(static_cast<memw_t>(M));
-            break;
-        case 2:
-            architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::GLOBAL);
-            architecture.setMemoryBound(static_cast<memw_t>(M));
-            break;
-        case 3:
-            architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT);
-            architecture.setMemoryBound(static_cast<memw_t>(M));
-            break;
-        default:
-            std::cerr << "Invalid memory type.\n";
-            return false;
+            case 0:
+                architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::NONE);
+                break;
+            case 1:
+                architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL);
+                architecture.setMemoryBound(static_cast<memw_t>(M));
+                break;
+            case 2:
+                architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::GLOBAL);
+                architecture.setMemoryBound(static_cast<memw_t>(M));
+                break;
+            case 3:
+                architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT);
+                architecture.setMemoryBound(static_cast<memw_t>(M));
+                break;
+            default:
+                std::cerr << "Invalid memory type.\n";
+                return false;
         }
     } else if (mem_type == -1) {
         std::cout << "No memory type specified. Assuming \"NONE\".\n";
@@ -131,7 +134,7 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture<Graph_t> &archit
     return true;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 bool readBspArchitecture(const std::string &filename, BspArchitecture<Graph_t> &architecture) {
     std::ifstream infile(filename);
     if (!infile.is_open()) {
@@ -142,4 +145,5 @@ bool readBspArchitecture(const std::string &filename, BspArchitecture<Graph_t> &
     return readBspArchitecture(infile, architecture);
 }
 
-}} // namespace osp::file_reader
+}    // namespace file_reader
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp b/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp
index 11273e49..41062016 100644
--- a/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp
+++ b/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp
@@ -18,36 +18,35 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/bsp/model/BspScheduleCS.hpp"
 #include <fstream>
 #include <iostream>
 
-namespace osp { namespace file_writer {
+#include "osp/bsp/model/BspSchedule.hpp"
+#include "osp/bsp/model/BspScheduleCS.hpp"
+
+namespace osp {
+namespace file_writer {
 
-template<typename Graph_t>
+template <typename Graph_t>
 void write_txt(std::ostream &os, const BspSchedule<Graph_t> &schedule) {
-
     os << "%% BspSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and "
        << schedule.numberOfSupersteps() << " supersteps." << std::endl;
     os << schedule.getInstance().numberOfVertices() << " " << schedule.getInstance().numberOfProcessors() << " "
        << schedule.numberOfSupersteps() << std::endl;
 
     for (const auto &vertex : schedule.getInstance().getComputationalDag().vertices()) {
-        os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex)
-           << std::endl;
+        os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex) << std::endl;
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void write_txt(const std::string &filename, const BspSchedule<Graph_t> &schedule) {
     std::ofstream os(filename);
     write_txt(os, schedule);
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void write_txt(std::ostream &os, const BspScheduleCS<Graph_t> &schedule) {
-
     os << "%% BspSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and "
        << schedule.numberOfSupersteps() << " supersteps." << std::endl;
     os << schedule.getInstance().numberOfVertices() << " " << schedule.getInstance().numberOfProcessors() << " "
@@ -61,14 +60,12 @@ void write_txt(std::ostream &os, const BspScheduleCS<Graph_t> &schedule) {
     os << std::endl;
 
     for (const auto &vertex : schedule.getInstance().getComputationalDag().vertices()) {
-        os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex)
-           << std::endl;
+        os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex) << std::endl;
     }
 
     if (schedule.getCommunicationSchedule().empty()) {
         os << "%% No communication schedule available." << std::endl;
     } else {
-
         os << "%% Communication schedule available." << std::endl;
 
         for (const auto &[key, val] : schedule.getCommunicationSchedule()) {
@@ -77,33 +74,33 @@ void write_txt(std::ostream &os, const BspScheduleCS<Graph_t> &schedule) {
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void write_txt(const std::string &filename, const BspScheduleCS<Graph_t> &schedule) {
     std::ofstream os(filename);
     write_txt(os, schedule);
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void write_sankey(std::ostream &os, const BspScheduleCS<Graph_t> &schedule) {
     // Computing workloads
     std::vector<std::vector<v_workw_t<Graph_t>>> proc_workloads(
         schedule.numberOfSupersteps(), std::vector<v_workw_t<Graph_t>>(schedule.getInstance().numberOfProcessors(), 0));
 
     for (size_t node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
-        proc_workloads[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] +=
-            schedule.getInstance().getComputationalDag().vertex_work_weight(node);
+        proc_workloads[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)]
+            += schedule.getInstance().getComputationalDag().vertex_work_weight(node);
     }
 
     // Computing communicationloads
     std::vector<std::vector<std::vector<v_commw_t<Graph_t>>>> commloads(
-                      schedule.numberOfSupersteps() - 1,
-                      std::vector<std::vector<v_commw_t<Graph_t>>>(
-                          schedule.getInstance().numberOfProcessors(),
-                          std::vector<v_commw_t<Graph_t>>(schedule.getInstance().numberOfProcessors(), 0)));
+        schedule.numberOfSupersteps() - 1,
+        std::vector<std::vector<v_commw_t<Graph_t>>>(
+            schedule.getInstance().numberOfProcessors(),
+            std::vector<v_commw_t<Graph_t>>(schedule.getInstance().numberOfProcessors(), 0)));
 
     for (const auto &[comm_triple, sstep] : schedule.getCommunicationSchedule()) {
-        commloads[sstep][std::get<1>(comm_triple)][std::get<2>(comm_triple)] +=
-            schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(comm_triple));
+        commloads[sstep][std::get<1>(comm_triple)][std::get<2>(comm_triple)]
+            += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(comm_triple));
     }
 
     os << "BspSchedule: Number of Processors, Number of Supersteps" << std::endl;
@@ -132,10 +129,11 @@ void write_sankey(std::ostream &os, const BspScheduleCS<Graph_t> &schedule) {
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void write_sankey(const std::string &filename, const BspScheduleCS<Graph_t> &schedule) {
     std::ofstream os(filename);
     write_sankey(os, schedule);
 }
 
-}} // namespace osp::file_writer
\ No newline at end of file
+}    // namespace file_writer
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/dot_graph_file_reader.hpp b/include/osp/auxiliary/io/dot_graph_file_reader.hpp
index a6117dcf..2b4b7b58 100644
--- a/include/osp/auxiliary/io/dot_graph_file_reader.hpp
+++ b/include/osp/auxiliary/io/dot_graph_file_reader.hpp
@@ -18,17 +18,17 @@ limitations under the License.
 
 #pragma once
 
+#include <filesystem>
 #include <fstream>
 #include <iostream>
-#include <string>
-#include <vector>
+#include <limits>
 #include <sstream>
+#include <string>
 #include <utility>
-#include <filesystem>
-#include <limits>
+#include <vector>
 
-#include "osp/concepts/constructable_computational_dag_concept.hpp"
 #include "osp/auxiliary/io/filepath_checker.hpp"
+#include "osp/concepts/constructable_computational_dag_concept.hpp"
 
 namespace osp {
 namespace file_reader {
@@ -44,7 +44,9 @@ std::vector<std::string> split(const std::string &s, char delimiter) {
 }
 
 std::string removeLeadingAndTrailingQuotes(const std::string &str) {
-    if (str.empty()) return str;
+    if (str.empty()) {
+        return str;
+    }
 
     std::size_t start = 0;
     std::size_t end = str.length();
@@ -60,12 +62,16 @@ std::string removeLeadingAndTrailingQuotes(const std::string &str) {
     return str.substr(start, end - start);
 }
 
-template<typename Graph_t>
-void parseDotNode(const std::string& line, Graph_t& G) {
+template <typename Graph_t>
+void parseDotNode(const std::string &line, Graph_t &G) {
     std::size_t pos = line.find('[');
-    if (pos == std::string::npos) return;
+    if (pos == std::string::npos) {
+        return;
+    }
     std::size_t end_pos = line.find(']');
-    if (end_pos == std::string::npos) return;
+    if (end_pos == std::string::npos) {
+        return;
+    }
 
     std::string properties = line.substr(pos + 1, end_pos - pos - 1);
     std::vector<std::string> keyValuePairs = split(properties, ';');
@@ -77,14 +83,18 @@ void parseDotNode(const std::string& line, Graph_t& G) {
 
     for (const std::string &keyValuePair : keyValuePairs) {
         std::vector<std::string> keyValue = split(keyValuePair, '=');
-        if (keyValue.size() != 2) continue;
+        if (keyValue.size() != 2) {
+            continue;
+        }
 
         std::string key = keyValue[0];
         // trim leading/trailing whitespace from key
         key.erase(0, key.find_first_not_of(" \t\n\r\f\v"));
         key.erase(key.find_last_not_of(" \t\n\r\f\v") + 1);
 
-        if (key.empty()) continue;
+        if (key.empty()) {
+            continue;
+        }
 
         std::string value = removeLeadingAndTrailingQuotes(keyValue[1]);
 
@@ -110,12 +120,14 @@ void parseDotNode(const std::string& line, Graph_t& G) {
     }
 }
 
-template<typename Graph_t>
-void parseDotEdge(const std::string& line, Graph_t& G) {
+template <typename Graph_t>
+void parseDotEdge(const std::string &line, Graph_t &G) {
     using edge_commw_t_or_default = std::conditional_t<has_edge_weights_v<Graph_t>, e_commw_t<Graph_t>, v_commw_t<Graph_t>>;
 
     std::size_t arrow_pos = line.find("->");
-    if (arrow_pos == std::string::npos) return;
+    if (arrow_pos == std::string::npos) {
+        return;
+    }
 
     std::string source_str = line.substr(0, arrow_pos);
     source_str.erase(source_str.find_last_not_of(" \t\n\r\f\v") + 1);
@@ -144,14 +156,18 @@ void parseDotEdge(const std::string& line, Graph_t& G) {
                     std::string properties = line.substr(bracket_pos + 1, end_bracket_pos - bracket_pos - 1);
                     std::vector<std::string> keyValuePairs = split(properties, ';');
 
-                    for (const auto& keyValuePair : keyValuePairs) {
+                    for (const auto &keyValuePair : keyValuePairs) {
                         std::vector<std::string> keyValue = split(keyValuePair, '=');
-                        if (keyValue.size() != 2) continue;
+                        if (keyValue.size() != 2) {
+                            continue;
+                        }
 
                         std::string key = keyValue[0];
                         key.erase(0, key.find_first_not_of(" \t\n\r\f\v"));
                         key.erase(key.find_last_not_of(" \t\n\r\f\v") + 1);
-                        if (key.empty()) continue;
+                        if (key.empty()) {
+                            continue;
+                        }
 
                         std::string value = removeLeadingAndTrailingQuotes(keyValue[1]);
 
@@ -171,8 +187,8 @@ void parseDotEdge(const std::string& line, Graph_t& G) {
     }
 }
 
-template<typename Graph_t>
-bool readComputationalDagDotFormat(std::ifstream& infile, Graph_t& graph) {
+template <typename Graph_t>
+bool readComputationalDagDotFormat(std::ifstream &infile, Graph_t &graph) {
     std::string line;
     while (std::getline(infile, line)) {
         if (line.length() > MAX_LINE_LENGTH) {
@@ -182,7 +198,9 @@ bool readComputationalDagDotFormat(std::ifstream& infile, Graph_t& graph) {
 
         line.erase(0, line.find_first_not_of(" \t\n\r\f\v"));
 
-        if (line.empty() || line.rfind("digraph", 0) == 0 || line.rfind("}", 0) == 0) continue;
+        if (line.empty() || line.rfind("digraph", 0) == 0 || line.rfind("}", 0) == 0) {
+            continue;
+        }
 
         if (line.find("->") != std::string::npos) {
             // This is an edge
@@ -196,8 +214,8 @@ bool readComputationalDagDotFormat(std::ifstream& infile, Graph_t& graph) {
     return true;
 }
 
-template<typename Graph_t>
-bool readComputationalDagDotFormat(const std::string& filename, Graph_t& graph) {
+template <typename Graph_t>
+bool readComputationalDagDotFormat(const std::string &filename, Graph_t &graph) {
     if (std::filesystem::path(filename).extension() != ".dot") {
         std::cerr << "Error: Only .dot files are accepted.\n";
         return false;
@@ -217,4 +235,5 @@ bool readComputationalDagDotFormat(const std::string& filename, Graph_t& graph)
     return readComputationalDagDotFormat(infile, graph);
 }
 
-}} // namespace osp::file_reader
\ No newline at end of file
+}    // namespace file_reader
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/filepath_checker.hpp b/include/osp/auxiliary/io/filepath_checker.hpp
index e4014a2b..f3ac03a4 100644
--- a/include/osp/auxiliary/io/filepath_checker.hpp
+++ b/include/osp/auxiliary/io/filepath_checker.hpp
@@ -18,30 +18,37 @@ limitations under the License.
 
 #pragma once
 
+#include <filesystem>
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
 #include <vector>
-#include <limits>
-#include <filesystem>
 
 namespace osp {
 namespace file_reader {
-    
-constexpr std::size_t MAX_LINE_LENGTH = 1 << 14; // 16 KB
+
+constexpr std::size_t MAX_LINE_LENGTH = 1 << 14;    // 16 KB
 
 // Path safety to avoid symlink, traversal or malicious file types
-inline bool isPathSafe(const std::string& path) {
+inline bool isPathSafe(const std::string &path) {
     try {
         std::filesystem::path resolved = std::filesystem::weakly_canonical(path);
-        if (std::filesystem::is_symlink(resolved)) return false;
-        if (!std::filesystem::is_regular_file(resolved)) return false;
-        if (resolved.string().find('\0') != std::string::npos) return false;
+        if (std::filesystem::is_symlink(resolved)) {
+            return false;
+        }
+        if (!std::filesystem::is_regular_file(resolved)) {
+            return false;
+        }
+        if (resolved.string().find('\0') != std::string::npos) {
+            return false;
+        }
         return true;
     } catch (...) {
         return false;
     }
 }
 
-}} // namespace osp::file_reader
\ No newline at end of file
+}    // namespace file_reader
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/general_file_reader.hpp b/include/osp/auxiliary/io/general_file_reader.hpp
index 8ed5ad4e..e05e5277 100644
--- a/include/osp/auxiliary/io/general_file_reader.hpp
+++ b/include/osp/auxiliary/io/general_file_reader.hpp
@@ -26,8 +26,8 @@ limitations under the License.
 namespace osp {
 namespace file_reader {
 
-template<typename Graph_t>
-bool readGraph(const std::string& filename, Graph_t& graph) {
+template <typename Graph_t>
+bool readGraph(const std::string &filename, Graph_t &graph) {
     if (!isPathSafe(filename)) {
         std::cerr << "Error: Unsafe file path (possible traversal or invalid type).\n";
         return false;
@@ -50,12 +50,12 @@ bool readGraph(const std::string& filename, Graph_t& graph) {
     } else if (file_ending == "dot") {
         status = file_reader::readComputationalDagDotFormat(infile, graph);
     } else {
-        std::cout << "Unknown file ending: ." << file_ending
-                    << " ...assuming hyperDag format." << std::endl;
+        std::cout << "Unknown file ending: ." << file_ending << " ...assuming hyperDag format." << std::endl;
         status = file_reader::readComputationalDagHyperdagFormatDB(infile, graph);
     }
 
     return status;
 }
 
-}} // namespace osp::file_reader
\ No newline at end of file
+}    // namespace file_reader
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/hdag_graph_file_reader.hpp b/include/osp/auxiliary/io/hdag_graph_file_reader.hpp
index a91481a7..b96c86ea 100644
--- a/include/osp/auxiliary/io/hdag_graph_file_reader.hpp
+++ b/include/osp/auxiliary/io/hdag_graph_file_reader.hpp
@@ -18,24 +18,24 @@ limitations under the License.
 
 #pragma once
 
+#include <filesystem>
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
 #include <vector>
-#include <limits>
-#include <filesystem>
 
-#include "osp/concepts/computational_dag_concept.hpp"
-#include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/auxiliary/io/filepath_checker.hpp"
+#include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/concepts/constructable_computational_dag_concept.hpp"
+#include "osp/graph_algorithms/directed_graph_util.hpp"
 
 namespace osp {
 namespace file_reader {
 
-template<typename Graph_t>
-bool readComputationalDagHyperdagFormat(std::ifstream& infile, Graph_t& graph) {
+template <typename Graph_t>
+bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) {
     std::string line;
 
     // Skip comment lines starting with '%'
@@ -84,8 +84,7 @@ bool readComputationalDagHyperdagFormat(std::ifstream& infile, Graph_t& graph) {
         if (edgeSource[edgeIdx] == -1) {
             edgeSource[edgeIdx] = node;
         } else {
-            graph.add_edge(static_cast<vertex_idx_t<Graph_t>>(edgeSource[edgeIdx]),
-                        static_cast<vertex_idx_t<Graph_t>>(node));
+            graph.add_edge(static_cast<vertex_idx_t<Graph_t>>(edgeSource[edgeIdx]), static_cast<vertex_idx_t<Graph_t>>(node));
         }
     }
 
@@ -120,7 +119,7 @@ bool readComputationalDagHyperdagFormat(std::ifstream& infile, Graph_t& graph) {
         }
     }
     */
-    
+
     if (!is_acyclic(graph)) {
         std::cerr << "Error: DAG is not acyclic.\n";
         return false;
@@ -129,8 +128,8 @@ bool readComputationalDagHyperdagFormat(std::ifstream& infile, Graph_t& graph) {
     return true;
 }
 
-template<typename Graph_t>
-bool readComputationalDagHyperdagFormat(const std::string& filename, Graph_t& graph) {
+template <typename Graph_t>
+bool readComputationalDagHyperdagFormat(const std::string &filename, Graph_t &graph) {
     if (!isPathSafe(filename)) {
         std::cerr << "Error: Unsafe file path (possible traversal or invalid type).\n";
         return false;
@@ -145,9 +144,8 @@ bool readComputationalDagHyperdagFormat(const std::string& filename, Graph_t& gr
     return readComputationalDagHyperdagFormat(infile, graph);
 }
 
-
-template<typename Graph_t>
-bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph) {
+template <typename Graph_t>
+bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) {
     std::string line;
 
     // Skip comment lines
@@ -182,7 +180,7 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph)
             std::cerr << "Warning: Could not read hyperedge ID for hyperedge " << i << ".\n";
             continue;
         }
-        edgeStream >> comm_weight >> mem_weight; // optional
+        edgeStream >> comm_weight >> mem_weight;    // optional
 
         if (hEdge < 0 || hEdge >= hEdges) {
             std::cerr << "Error: Hyperedge ID " << hEdge << " is out of range (0 to " << hEdges - 1 << ").\n";
@@ -219,7 +217,7 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph)
 
         if constexpr (has_typed_vertices_v<Graph_t>) {
             graph.set_vertex_type(static_cast<vertex_idx_t<Graph_t>>(node), static_cast<v_type_t<Graph_t>>(type));
-        }  
+        }
     }
 
     // Resize(N);
@@ -254,16 +252,14 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph)
             graph.set_vertex_mem_weight(static_cast<vertex_idx_t<Graph_t>>(node), hyperedge_mem_weights[edgeIdx]);
         } else {
             if constexpr (is_modifiable_cdag_comm_edge_v<Graph_t>) {
-
                 auto edge = graph.add_edge(static_cast<vertex_idx_t<Graph_t>>(edgeSource[edgeIdx]),
-                                    static_cast<vertex_idx_t<Graph_t>>(nodeIdx));
+                                           static_cast<vertex_idx_t<Graph_t>>(nodeIdx));
 
-                graph.set_edge_comm_weight(edge.first,
-                    static_cast<e_commw_t<Graph_t>>(hyperedge_comm_weights[edgeIdx]));
+                graph.set_edge_comm_weight(edge.first, static_cast<e_commw_t<Graph_t>>(hyperedge_comm_weights[edgeIdx]));
 
             } else {
                 graph.add_edge(static_cast<vertex_idx_t<Graph_t>>(edgeSource[edgeIdx]),
-                                    static_cast<vertex_idx_t<Graph_t>>(nodeIdx));
+                               static_cast<vertex_idx_t<Graph_t>>(nodeIdx));
             }
         }
     }
@@ -276,8 +272,8 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph)
     return true;
 }
 
-template<typename Graph_t>
-bool readComputationalDagHyperdagFormatDB(const std::string& filename, Graph_t& graph) {
+template <typename Graph_t>
+bool readComputationalDagHyperdagFormatDB(const std::string &filename, Graph_t &graph) {
     // Optional: limit file extension for safety
     if (std::filesystem::path(filename).extension() != ".hdag") {
         std::cerr << "Error: Only .hdag files are accepted.\n";
@@ -298,4 +294,5 @@ bool readComputationalDagHyperdagFormatDB(const std::string& filename, Graph_t&
     return readComputationalDagHyperdagFormatDB(infile, graph);
 }
 
-}} // namespace osp::file_reader
\ No newline at end of file
+}    // namespace file_reader
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/hdag_graph_file_writer.hpp b/include/osp/auxiliary/io/hdag_graph_file_writer.hpp
index e065801c..be0638ac 100644
--- a/include/osp/auxiliary/io/hdag_graph_file_writer.hpp
+++ b/include/osp/auxiliary/io/hdag_graph_file_writer.hpp
@@ -60,14 +60,18 @@ void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &grap
     os << num_hyperedges << " " << num_vertices << " " << num_pins << "\n";
 
     // Hyperedges
-    if (write_comment_lines) os << "%% Hyperedges: ID comm_weight mem_weight\n";
+    if (write_comment_lines) {
+        os << "%% Hyperedges: ID comm_weight mem_weight\n";
+    }
     for (unsigned i = 0; i < num_hyperedges; ++i) {
         const auto u = hyperedge_idx_to_node[i];
         os << i << " " << graph.vertex_comm_weight(u) << " " << graph.vertex_mem_weight(u) << "\n";
     }
 
     // Vertices
-    if (write_comment_lines) os << "%% Vertices: ID work_weight type\n";
+    if (write_comment_lines) {
+        os << "%% Vertices: ID work_weight type\n";
+    }
     for (const auto &u : graph.vertices()) {
         os << u << " " << graph.vertex_work_weight(u);
         if constexpr (has_typed_vertices_v<Graph_t>) {
@@ -79,12 +83,14 @@ void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &grap
     }
 
     // Pins
-    if (write_comment_lines) os << "%% Pins: HyperedgeID NodeID\n";
+    if (write_comment_lines) {
+        os << "%% Pins: HyperedgeID NodeID\n";
+    }
     for (unsigned i = 0; i < num_hyperedges; ++i) {
         const auto u = hyperedge_idx_to_node[i];
-        os << i << " " << u << "\n"; // Source pin
+        os << i << " " << u << "\n";    // Source pin
         for (const auto &v : graph.children(u)) {
-            os << i << " " << v << "\n"; // Target pins
+            os << i << " " << v << "\n";    // Target pins
         }
     }
 }
@@ -98,7 +104,9 @@ void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &grap
  * @return true if writing was successful, false otherwise.
  */
 template <typename Graph_t>
-bool writeComputationalDagHyperdagFormatDB(const std::string &filename, const Graph_t &graph, const bool write_comment_lines = false) {
+bool writeComputationalDagHyperdagFormatDB(const std::string &filename,
+                                           const Graph_t &graph,
+                                           const bool write_comment_lines = false) {
     std::ofstream os(filename);
     if (!os.is_open()) {
         std::cerr << "Error: Failed to open file for writing: " << filename << "\n";
@@ -108,5 +116,5 @@ bool writeComputationalDagHyperdagFormatDB(const std::string &filename, const Gr
     return true;
 }
 
-} // namespace file_writer
-} // namespace osp
\ No newline at end of file
+}    // namespace file_writer
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/mtx_graph_file_reader.hpp b/include/osp/auxiliary/io/mtx_graph_file_reader.hpp
index 45382094..5a98721e 100644
--- a/include/osp/auxiliary/io/mtx_graph_file_reader.hpp
+++ b/include/osp/auxiliary/io/mtx_graph_file_reader.hpp
@@ -18,29 +18,31 @@ limitations under the License.
 
 #pragma once
 
+#include <filesystem>
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
 #include <vector>
-#include <limits>
-#include <filesystem>
 
-#include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/auxiliary/io/filepath_checker.hpp"
+#include "osp/concepts/computational_dag_concept.hpp"
 
 namespace osp {
 namespace file_reader {
 
-template<typename Graph_t>
-bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& graph) {
+template <typename Graph_t>
+bool readComputationalDagMartixMarketFormat(std::ifstream &infile, Graph_t &graph) {
     using vertex_t = vertex_idx_t<Graph_t>;
 
     std::string line;
 
     // Skip comments or empty lines (robustly)
     while (std::getline(infile, line)) {
-        if (line.empty() || line[0] == '%') continue;
+        if (line.empty() || line[0] == '%') {
+            continue;
+        }
 
         // Null byte check
         if (line.find('\0') != std::string::npos) {
@@ -52,7 +54,7 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap
             std::cerr << "Error: Line too long, possible malformed or malicious file.\n";
             return false;
         }
-        break; // We found the actual header line
+        break;    // We found the actual header line
     }
 
     if (infile.eof()) {
@@ -63,8 +65,7 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap
     int M_row = 0, M_col = 0, nEntries = 0;
 
     std::istringstream header_stream(line);
-    if (!(header_stream >> M_row >> M_col >> nEntries) ||
-        M_row <= 0 || M_col <= 0 || M_row != M_col) {
+    if (!(header_stream >> M_row >> M_col >> nEntries) || M_row <= 0 || M_col <= 0 || M_row != M_col) {
         std::cerr << "Error: Invalid header or non-square matrix.\n";
         return false;
     }
@@ -84,7 +85,9 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap
 
     int entries_read = 0;
     while (entries_read < nEntries && std::getline(infile, line)) {
-        if (line.empty() || line[0] == '%') continue;
+        if (line.empty() || line[0] == '%') {
+            continue;
+        }
         if (line.size() > MAX_LINE_LENGTH) {
             std::cerr << "Error: Line too long.\n";
             return false;
@@ -99,7 +102,8 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap
             return false;
         }
 
-        row -= 1; col -= 1; // Convert to 0-based
+        row -= 1;
+        col -= 1;    // Convert to 0-based
 
         if (row < 0 || col < 0 || row >= M_row || col >= M_col) {
             std::cerr << "Error: Matrix entry out of bounds.\n";
@@ -145,8 +149,8 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap
     return true;
 }
 
-template<typename Graph_t>
-bool readComputationalDagMartixMarketFormat(const std::string& filename, Graph_t& graph) {
+template <typename Graph_t>
+bool readComputationalDagMartixMarketFormat(const std::string &filename, Graph_t &graph) {
     // Ensure the file is .mtx format
     if (std::filesystem::path(filename).extension() != ".mtx") {
         std::cerr << "Error: Only .mtx files are accepted.\n";
@@ -177,13 +181,10 @@ bool readComputationalDagMartixMarketFormat(const std::string& filename, Graph_t
     return readComputationalDagMartixMarketFormat(infile, graph);
 }
 
-
 // bool readProblem(const std::string &filename, DAG &G, BSPproblem &params, bool NoNUMA = true);
 
 // std::pair<bool, BspInstance> readBspInstance(const std::string &filename);
 
-
-
 // std::pair<bool, ComputationalDag>
 // readComputationalDagMartixMarketFormat(const std::string &filename,
 //                                        std::unordered_map<std::pair<VertexType, VertexType>, double, pair_hash> &mtx);
@@ -206,7 +207,6 @@ bool readComputationalDagMartixMarketFormat(const std::string& filename, Graph_t
 
 // std::pair<bool, BspArchitecture> readBspArchitecture(std::ifstream &infile);
 
-
 // std::pair<bool, BspSchedule> readBspSchdeuleTxtFormat(const BspInstance &instance, const std::string &filename);
 
 // std::pair<bool, BspSchedule> readBspSchdeuleTxtFormat(const BspInstance &instance, std::ifstream &infile);
@@ -243,6 +243,6 @@ bool readComputationalDagMartixMarketFormat(const std::string& filename, Graph_t
 //  */
 // std::pair<bool, BspScheduleRecomp> extractBspScheduleRecomp(std::ifstream &infile, const BspInstance &instance);
 
-} // namespace FileReader
+}    // namespace file_reader
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp b/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp
index 7d0bbe8f..d7f64c9b 100644
--- a/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp
+++ b/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp
@@ -18,29 +18,30 @@ limitations under the License.
 
 #pragma once
 
+#include <filesystem>
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
 #include <vector>
-#include <limits>
-#include <filesystem>
 
-#include "osp/partitioning/model/hypergraph.hpp"
 #include "osp/auxiliary/io/filepath_checker.hpp"
+#include "osp/partitioning/model/hypergraph.hpp"
 
 namespace osp {
 namespace file_reader {
 
 // reads a matrix into Hypergraph format, where nonzeros are vertices, and rows/columns are hyperedges
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph<index_type, workw_type, memw_type, commw_type>& hgraph) {
-
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+bool readHypergraphMartixMarketFormat(std::ifstream &infile, Hypergraph<index_type, workw_type, memw_type, commw_type> &hgraph) {
     std::string line;
 
     // Skip comments or empty lines (robustly)
     while (std::getline(infile, line)) {
-        if (line.empty() || line[0] == '%') continue;
+        if (line.empty() || line[0] == '%') {
+            continue;
+        }
 
         // Null byte check
         if (line.find('\0') != std::string::npos) {
@@ -52,7 +53,7 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph<index_ty
             std::cerr << "Error: Line too long, possible malformed or malicious file.\n";
             return false;
         }
-        break; // We found the actual header line
+        break;    // We found the actual header line
     }
 
     if (infile.eof()) {
@@ -63,8 +64,7 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph<index_ty
     int M_row = 0, M_col = 0, nEntries = 0;
 
     std::istringstream header_stream(line);
-    if (!(header_stream >> M_row >> M_col >> nEntries) ||
-        M_row <= 0 || M_col <= 0) {
+    if (!(header_stream >> M_row >> M_col >> nEntries) || M_row <= 0 || M_col <= 0) {
         std::cerr << "Error: Invalid header.\n";
         return false;
     }
@@ -82,7 +82,9 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph<index_ty
 
     int entries_read = 0;
     while (entries_read < nEntries && std::getline(infile, line)) {
-        if (line.empty() || line[0] == '%') continue;
+        if (line.empty() || line[0] == '%') {
+            continue;
+        }
         if (line.size() > MAX_LINE_LENGTH) {
             std::cerr << "Error: Line too long.\n";
             return false;
@@ -97,7 +99,8 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph<index_ty
             return false;
         }
 
-        row -= 1; col -= 1; // Convert to 0-based
+        row -= 1;
+        col -= 1;    // Convert to 0-based
 
         if (row < 0 || col < 0 || row >= M_row || col >= M_col) {
             std::cerr << "Error: Matrix entry out of bounds.\n";
@@ -127,19 +130,24 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph<index_ty
         }
     }
 
-    for(index_type row = 0; row < static_cast<index_type>(M_row); ++row)
-        if(!row_hyperedges[row].empty())
+    for (index_type row = 0; row < static_cast<index_type>(M_row); ++row) {
+        if (!row_hyperedges[row].empty()) {
             hgraph.add_hyperedge(row_hyperedges[row]);
+        }
+    }
 
-    for(index_type col = 0; col < static_cast<index_type>(M_col); ++col)
-        if(!column_hyperedges[col].empty())
+    for (index_type col = 0; col < static_cast<index_type>(M_col); ++col) {
+        if (!column_hyperedges[col].empty()) {
             hgraph.add_hyperedge(column_hyperedges[col]);
+        }
+    }
 
     return true;
 }
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-bool readHypergraphMartixMarketFormat(const std::string& filename, Hypergraph<index_type, workw_type, memw_type, commw_type>& hgraph) {
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+bool readHypergraphMartixMarketFormat(const std::string &filename,
+                                      Hypergraph<index_type, workw_type, memw_type, commw_type> &hgraph) {
     // Ensure the file is .mtx format
     if (std::filesystem::path(filename).extension() != ".mtx") {
         std::cerr << "Error: Only .mtx files are accepted.\n";
@@ -170,6 +178,6 @@ bool readHypergraphMartixMarketFormat(const std::string& filename, Hypergraph<in
     return readHypergraphMartixMarketFormat(infile, hgraph);
 }
 
-} // namespace FileReader
+}    // namespace file_reader
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/partitioning_file_writer.hpp b/include/osp/auxiliary/io/partitioning_file_writer.hpp
index 70fac29e..b2dd2953 100644
--- a/include/osp/auxiliary/io/partitioning_file_writer.hpp
+++ b/include/osp/auxiliary/io/partitioning_file_writer.hpp
@@ -18,50 +18,52 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/partitioning/model/partitioning.hpp"
-#include "osp/partitioning/model/partitioning_replication.hpp"
 #include <fstream>
 #include <iostream>
 
-namespace osp { namespace file_writer {
+#include "osp/partitioning/model/partitioning.hpp"
+#include "osp/partitioning/model/partitioning_replication.hpp"
+
+namespace osp {
+namespace file_writer {
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 void write_txt(std::ostream &os, const Partitioning<hypergraph_t> &partition) {
-
     using index_type = typename hypergraph_t::vertex_idx;
 
     os << "%% Partitioning for " << partition.getInstance().getNumberOfPartitions() << " parts." << std::endl;
 
-    for(index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node)
+    for (index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node) {
         os << node << " " << partition.assignedPartition(node) << std::endl;
+    }
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 void write_txt(const std::string &filename, const Partitioning<hypergraph_t> &partition) {
     std::ofstream os(filename);
     write_txt(os, partition);
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 void write_txt(std::ostream &os, const PartitioningWithReplication<hypergraph_t> &partition) {
-
     using index_type = typename hypergraph_t::vertex_idx;
 
     os << "%% Partitioning for " << partition.getInstance().getNumberOfPartitions() << " parts with replication." << std::endl;
 
-    for(index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node)
-    {
+    for (index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node) {
         os << node;
-        for(unsigned part : partition.assignedPartitions(node))
+        for (unsigned part : partition.assignedPartitions(node)) {
             os << " " << part;
+        }
         os << std::endl;
     }
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 void write_txt(const std::string &filename, const PartitioningWithReplication<hypergraph_t> &partition) {
     std::ofstream os(filename);
     write_txt(os, partition);
 }
 
-}} // namespace osp::file_writer
\ No newline at end of file
+}    // namespace file_writer
+}    // namespace osp
diff --git a/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp b/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp
index 217c591a..e3849668 100644
--- a/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp
+++ b/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp
@@ -18,58 +18,57 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/pebbling/PebblingSchedule.hpp"
 #include <fstream>
 #include <iostream>
 
-namespace osp { namespace file_writer {
+#include "osp/pebbling/PebblingSchedule.hpp"
+
+namespace osp {
+namespace file_writer {
 
-template<typename Graph_t>
+template <typename Graph_t>
 void write_txt(std::ostream &os, const PebblingSchedule<Graph_t> &schedule) {
-
     using vertex_idx = vertex_idx_t<Graph_t>;
 
     os << "%% PebblingSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and "
        << schedule.numberOfSupersteps() << " supersteps." << std::endl;
 
-    for(unsigned step = 0; step < schedule.numberOfSupersteps(); ++step)
-    {
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-        {
-            const auto& computeSteps = schedule.GetComputeStepsForProcSuperstep(proc, step);
-            for(const auto& computeStep : computeSteps)
-            {
+    for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) {
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
+            const auto &computeSteps = schedule.GetComputeStepsForProcSuperstep(proc, step);
+            for (const auto &computeStep : computeSteps) {
                 os << "Compute " << computeStep.node << " on proc " << proc << " in superstep " << step << std::endl;
-                for(vertex_idx to_evict : computeStep.nodes_evicted_after)
+                for (vertex_idx to_evict : computeStep.nodes_evicted_after) {
                     os << "Evict " << to_evict << " from proc " << proc << " in superstep " << step << std::endl;
+                }
             }
         }
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-        {
-            const std::vector<vertex_idx>& nodesSentUp = schedule.GetNodesSentUp(proc, step);
-            for(vertex_idx node : nodesSentUp)
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
+            const std::vector<vertex_idx> &nodesSentUp = schedule.GetNodesSentUp(proc, step);
+            for (vertex_idx node : nodesSentUp) {
                 os << "Send up " << node << " from proc " << proc << " in superstep " << step << std::endl;
+            }
         }
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-        {
-            const std::vector<vertex_idx>& nodesEvictedInComm = schedule.GetNodesEvictedInComm(proc, step);
-            for(vertex_idx node : nodesEvictedInComm)
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
+            const std::vector<vertex_idx> &nodesEvictedInComm = schedule.GetNodesEvictedInComm(proc, step);
+            for (vertex_idx node : nodesEvictedInComm) {
                 os << "Evict " << node << " from proc " << proc << " in superstep " << step << std::endl;
+            }
         }
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-        {
-            const std::vector<vertex_idx>& nodesSentDown = schedule.GetNodesSentDown(proc, step);
-            for(vertex_idx node : nodesSentDown)
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
+            const std::vector<vertex_idx> &nodesSentDown = schedule.GetNodesSentDown(proc, step);
+            for (vertex_idx node : nodesSentDown) {
                 os << "Send down " << node << " to proc " << proc << " in superstep " << step << std::endl;
+            }
         }
     }
-
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void write_txt(const std::string &filename, const PebblingSchedule<Graph_t> &schedule) {
     std::ofstream os(filename);
     write_txt(os, schedule);
 }
 
-}} // namespace osp::file_writer
\ No newline at end of file
+}    // namespace file_writer
+}    // namespace osp
diff --git a/include/osp/auxiliary/math/divisors.hpp b/include/osp/auxiliary/math/divisors.hpp
index 235f0129..fe268506 100644
--- a/include/osp/auxiliary/math/divisors.hpp
+++ b/include/osp/auxiliary/math/divisors.hpp
@@ -25,7 +25,7 @@ limitations under the License.
 
 namespace osp {
 
-template<typename integral_type>
+template <typename integral_type>
 integral_type intSqrtFloor(integral_type num) {
     static_assert(std::is_integral_v<integral_type>);
     assert(num > 0);
@@ -48,8 +48,7 @@ integral_type intSqrtFloor(integral_type num) {
     return sqrt;
 }
 
-
-template<typename integral_type>
+template <typename integral_type>
 std::vector<integral_type> divisorsList(integral_type num) {
     static_assert(std::is_integral_v<integral_type>);
     assert(num > 0);
@@ -62,11 +61,13 @@ std::vector<integral_type> divisorsList(integral_type num) {
             divs.emplace_back(div);
         }
     }
-    for (std::size_t indx = divs.back() * divs.back() == num ? divs.size() - 2U : divs.size() - 1U; indx != std::numeric_limits<std::size_t>::max(); --indx) {
+    for (std::size_t indx = divs.back() * divs.back() == num ? divs.size() - 2U : divs.size() - 1U;
+         indx != std::numeric_limits<std::size_t>::max();
+         --indx) {
         divs.emplace_back(num / divs[indx]);
     }
 
     return divs;
 }
 
-} // end namespace osp
+}    // end namespace osp
diff --git a/include/osp/auxiliary/math/math_helper.hpp b/include/osp/auxiliary/math/math_helper.hpp
index 93ca721c..b5227d43 100644
--- a/include/osp/auxiliary/math/math_helper.hpp
+++ b/include/osp/auxiliary/math/math_helper.hpp
@@ -21,15 +21,15 @@ limitations under the License.
 
 namespace osp {
 
-template<typename float_type>
+template <typename float_type>
 float_type log_sum_exp(float_type lhs, float_type rhs) {
     static_assert(std::is_floating_point_v<float_type>);
 
     const float_type max = std::max(lhs, rhs);
-    
+
     float_type result = max;
-    result += std::log2( std::exp2(lhs - max) + std::exp2(rhs - max) );
+    result += std::log2(std::exp2(lhs - max) + std::exp2(rhs - max));
     return result;
 }
 
-} // end namespace osp
+}    // end namespace osp
diff --git a/include/osp/auxiliary/misc.hpp b/include/osp/auxiliary/misc.hpp
index 56a95955..1b269f27 100644
--- a/include/osp/auxiliary/misc.hpp
+++ b/include/osp/auxiliary/misc.hpp
@@ -23,10 +23,10 @@ limitations under the License.
 #include <cstdlib>
 #include <functional>
 #include <iostream>
+#include <limits>
 #include <numeric>
 #include <set>
 #include <string>
-#include <limits>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
@@ -37,32 +37,32 @@ namespace osp {
 // unbiased random int generator
 inline int randInt(int lim) {
     int rnd = std::rand();
-    while (rnd >= RAND_MAX - RAND_MAX % lim)
+    while (rnd >= RAND_MAX - RAND_MAX % lim) {
         rnd = std::rand();
+    }
 
     return rnd % lim;
 }
 
 // pair of integers
-template<typename T1, typename T2>
+template <typename T1, typename T2>
 struct Pair {
     int a, b;
 
     explicit Pair(const T1 a_ = T1(), const T2 b_ = T2()) : a(a_), b(b_) {}
 
-    template<typename T1_, typename T2_>
+    template <typename T1_, typename T2_>
     bool operator<(const Pair<T1_, T2_> &other) const {
         return (a < other.a || (a == other.a && b < other.b));
     }
 
-    std::ostream &operator<<(std::ostream &os) const {
-        return os << ("(" + std::to_string(a) + ", " + std::to_string(b) + ")");
-    }
+    std::ostream &operator<<(std::ostream &os) const { return os << ("(" + std::to_string(a) + ", " + std::to_string(b) + ")"); }
 };
+
 using intPair = Pair<int, int>;
 
 // triple of integers
-template<typename T1, typename T2, typename T3>
+template <typename T1, typename T2, typename T3>
 struct Triple {
     T1 a;
     T2 b;
@@ -74,20 +74,22 @@ struct Triple {
         return os << "(" << std::to_string(a) << ", " << std::to_string(b) << ", " << std::to_string(c) << ")";
     }
 };
+
 using intTriple = Triple<int, int, int>;
 
 inline bool isDisjoint(std::vector<intPair> &intervals) {
-
     sort(intervals.begin(), intervals.end());
-    for (size_t i = 0; i + 1 < intervals.size(); ++i)
-        if (intervals[i].b > intervals[i + 1].a)
+    for (size_t i = 0; i + 1 < intervals.size(); ++i) {
+        if (intervals[i].b > intervals[i + 1].a) {
             return false;
+        }
+    }
 
     return true;
 }
 
 // computes power of an integer
-template<typename T>
+template <typename T>
 constexpr T intpow(T base, unsigned exp) {
     static_assert(std::is_integral<T>::value);
 
@@ -118,26 +120,25 @@ struct contractionEdge {
     }
 };
 
-
-
 // List of initializaton methods available
-static const std::vector<std::string> possibleModes{"random", "SJF",      "cilk",        "BSPg",  "ETF",
-                                                    "BL-EST", "ETF-NUMA", "BL-EST-NUMA", "Layers"};
+static const std::vector<std::string> possibleModes{
+    "random", "SJF", "cilk", "BSPg", "ETF", "BL-EST", "ETF-NUMA", "BL-EST-NUMA", "Layers"};
 
 // modify problem filename by adding substring at the right place
 inline std::string editFilename(const std::string &filename, const std::string &toInsert) {
     auto pos = filename.find("_coarse");
-    if (pos == std::string::npos)
+    if (pos == std::string::npos) {
         pos = filename.find("_instance");
-    if (pos == std::string::npos)
+    }
+    if (pos == std::string::npos) {
         return toInsert + filename;
+    }
 
     return filename.substr(0, pos) + toInsert + filename.substr(pos, filename.length() - pos);
 }
 
-
 // unordered set intersection
-template<typename T>
+template <typename T>
 std::unordered_set<T> get_intersection(const std::unordered_set<T> &a, const std::unordered_set<T> &b) {
     std::vector<T> result;
     const auto &larger = a.size() > b.size() ? a : b;
@@ -151,7 +152,7 @@ std::unordered_set<T> get_intersection(const std::unordered_set<T> &a, const std
 }
 
 // unordered set union
-template<typename T>
+template <typename T>
 std::unordered_set<T> get_union(const std::unordered_set<T> &a, const std::unordered_set<T> &b) {
     std::unordered_set<T> larger = a.size() > b.size() ? a : b;
     std::unordered_set<T> smaller = a.size() <= b.size() ? a : b;
@@ -162,7 +163,7 @@ std::unordered_set<T> get_union(const std::unordered_set<T> &a, const std::unord
 }
 
 // zip two vectors of equal length
-template<typename S, typename T>
+template <typename S, typename T>
 std::vector<std::pair<S, T>> zip(const std::vector<S> &a, const std::vector<T> &b) {
     assert(a.size() == b.size());
 
@@ -175,7 +176,7 @@ std::vector<std::pair<S, T>> zip(const std::vector<S> &a, const std::vector<T> &
     return result;
 }
 
-template<typename S, typename T>
+template <typename S, typename T>
 void unzip(std::vector<std::pair<S, T>> &zipped, std::vector<S> &a, std::vector<T> &b) {
     a.resize(zipped.size());
     b.resize(zipped.size());
@@ -186,7 +187,7 @@ void unzip(std::vector<std::pair<S, T>> &zipped, std::vector<S> &a, std::vector<
     }
 }
 
-template<typename T>
+template <typename T>
 std::vector<size_t> sort_and_sorting_arrangement(std::vector<T> &a) {
     std::vector<size_t> rearrangement;
     rearrangement.resize(a.size());
@@ -200,7 +201,7 @@ std::vector<size_t> sort_and_sorting_arrangement(std::vector<T> &a) {
     return rearrangement;
 }
 
-template<typename T, typename retT = size_t>
+template <typename T, typename retT = size_t>
 std::vector<retT> sorting_arrangement(const std::vector<T> &a, bool increasing = true) {
     std::vector<retT> rearrangement;
     rearrangement.resize(a.size());
@@ -235,7 +236,7 @@ inline bool check_vector_is_rearrangement_of_0_to_N(const std::vector<size_t> &a
 }
 
 // sorts a vector like the arrangement
-template<typename T>
+template <typename T>
 void sort_like_arrangement(std::vector<T> &a, const std::vector<size_t> &arrangement) {
     assert(a.size() == arrangement.size());
     assert(check_vector_is_rearrangement_of_0_to_N(arrangement));
@@ -254,13 +255,13 @@ void sort_like_arrangement(std::vector<T> &a, const std::vector<size_t> &arrange
             prev_j = j;
             j = arrangement[j];
         }
-        a[prev_j] = i_val; // j == i
+        a[prev_j] = i_val;    // j == i
         moved[prev_j] = true;
     }
 }
 
 // sorts vector according to values in second vector w/o changing second vector
-template<typename S, typename T>
+template <typename S, typename T>
 void sort_like(std::vector<S> &a, const std::vector<T> &b) {
     assert(a.size() == b.size());
 
@@ -276,7 +277,7 @@ void sort_like(std::vector<S> &a, const std::vector<T> &b) {
  * @param ordered_set
  * @return T KeyType of SetType
  */
-template<class SetType, typename T = typename SetType::key_type>
+template <class SetType, typename T = typename SetType::key_type>
 T Get_Median(SetType ordered_set) {
     assert(ordered_set.size() != 0);
     typename SetType::iterator it = ordered_set.begin();
@@ -299,7 +300,7 @@ T Get_Median(SetType ordered_set) {
  * @param ordered_set
  * @return T KeyType of SetType
  */
-template<class SetType, typename T = typename SetType::key_type>
+template <class SetType, typename T = typename SetType::key_type>
 T Get_Lower_Median(SetType ordered_set) {
     assert(ordered_set.size() != 0);
     typename SetType::iterator it = ordered_set.begin();
@@ -316,7 +317,7 @@ T Get_Lower_Median(SetType ordered_set) {
  * @param ordered_set
  * @return T KeyType of SetType
  */
-template<class SetType, typename T = typename SetType::key_type>
+template <class SetType, typename T = typename SetType::key_type>
 T Get_upper_third_percentile(SetType ordered_set) {
     assert(ordered_set.size() != 0);
     typename SetType::iterator it = ordered_set.begin();
@@ -333,7 +334,7 @@ T Get_upper_third_percentile(SetType ordered_set) {
  * @param ordered_set
  * @return T KeyType of SetType
  */
-template<class SetType, typename T = typename SetType::key_type>
+template <class SetType, typename T = typename SetType::key_type>
 T Get_lower_third_percentile(SetType ordered_set) {
     assert(ordered_set.size() != 0);
     typename SetType::iterator it = ordered_set.begin();
@@ -342,4 +343,4 @@ T Get_lower_third_percentile(SetType ordered_set) {
     return *it;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/permute.hpp b/include/osp/auxiliary/permute.hpp
index 5676a4f2..31ae7857 100644
--- a/include/osp/auxiliary/permute.hpp
+++ b/include/osp/auxiliary/permute.hpp
@@ -18,29 +18,36 @@ limitations under the License.
 
 #pragma once
 
+#include <cassert>
 #include <type_traits>
 #include <utility>
 #include <vector>
 
 namespace osp {
 
-template<typename T, typename Ind>
+template <typename T, typename Ind>
 void permute_inplace(std::vector<T> &vec, std::vector<Ind> &perm) {
     static_assert(std::is_integral_v<Ind>);
     static_assert(std::is_unsigned_v<Ind>);
 
     assert(vec.size() == perm.size());
-    assert([&]() -> bool{
+    assert([&]() -> bool {
         std::vector<bool> found(perm.size(), false);
         for (const Ind &val : perm) {
-            if (val < 0) return false;
-            if (val >= perm.size()) return false;
-            if (found[val]) return false;
+            if (val < 0) {
+                return false;
+            }
+            if (val >= perm.size()) {
+                return false;
+            }
+            if (found[val]) {
+                return false;
+            }
             found[val] = true;
         }
         return true;
     }());
-    assert(reinterpret_cast<void*>(&vec) != reinterpret_cast<void*>(&perm));
+    assert(reinterpret_cast<void *>(&vec) != reinterpret_cast<void *>(&perm));
 
     for (Ind i = 0; i < perm.size(); ++i) {
         while (perm[i] != i) {
@@ -50,26 +57,31 @@ void permute_inplace(std::vector<T> &vec, std::vector<Ind> &perm) {
     }
 }
 
-template<typename T, typename Ind>
+template <typename T, typename Ind>
 void inverse_permute_inplace(std::vector<T> &vec, std::vector<Ind> &perm) {
     static_assert(std::is_integral_v<Ind>);
     static_assert(std::is_unsigned_v<Ind>);
 
     assert(vec.size() == perm.size());
-    assert([&]() -> bool{
+    assert([&]() -> bool {
         std::vector<bool> found(perm.size(), false);
         for (const Ind &val : perm) {
-            if (val < 0) return false;
-            if (val >= perm.size()) return false;
-            if (found[val]) return false;
+            if (val < 0) {
+                return false;
+            }
+            if (val >= perm.size()) {
+                return false;
+            }
+            if (found[val]) {
+                return false;
+            }
             found[val] = true;
         }
         return true;
     }());
-    assert(reinterpret_cast<void*>(&vec) != reinterpret_cast<void*>(&perm));
+    assert(reinterpret_cast<void *>(&vec) != reinterpret_cast<void *>(&perm));
 
     for (Ind i = 0; i < perm.size(); ++i) {
-
         Ind j = i;
         while (i != perm[i]) {
             std::swap(vec[j], vec[perm[i]]);
@@ -79,4 +91,4 @@ void inverse_permute_inplace(std::vector<T> &vec, std::vector<Ind> &perm) {
     }
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp b/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp
index 36291b1a..692415c3 100644
--- a/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp
+++ b/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp
@@ -18,10 +18,10 @@ limitations under the License.
 
 #pragma once
 
+#include <numeric>
 #include <random>
-#include <vector>
 #include <unordered_set>
-#include <numeric>
+#include <vector>
 
 #include "osp/concepts/constructable_computational_dag_concept.hpp"
 
@@ -34,9 +34,8 @@ namespace osp {
  * @param chance chance/num_vertices is the probability of edge inclusion
  * @return DAG
  */
-template<typename Graph_t>
-void erdos_renyi_graph_gen(Graph_t& dag_out, vertex_idx_t<Graph_t> num_vertices, double chance) {
-
+template <typename Graph_t>
+void erdos_renyi_graph_gen(Graph_t &dag_out, vertex_idx_t<Graph_t> num_vertices, double chance) {
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG type");
 
     dag_out = Graph_t(num_vertices);
@@ -45,10 +44,8 @@ void erdos_renyi_graph_gen(Graph_t& dag_out, vertex_idx_t<Graph_t> num_vertices,
     std::mt19937 gen(rd());
 
     for (const auto &v : dag_out.vertices()) {
-
         const auto one = static_cast<vertex_idx_t<Graph_t>>(1);
-        std::binomial_distribution<vertex_idx_t<Graph_t>> bino_dist(num_vertices - one - v,
-                                                                    chance / double(num_vertices));
+        std::binomial_distribution<vertex_idx_t<Graph_t>> bino_dist(num_vertices - one - v, chance / double(num_vertices));
         auto out_edges_num = bino_dist(gen);
 
         std::unordered_set<vertex_idx_t<Graph_t>> out_edges;
@@ -56,8 +53,9 @@ void erdos_renyi_graph_gen(Graph_t& dag_out, vertex_idx_t<Graph_t> num_vertices,
             std::uniform_int_distribution<vertex_idx_t<Graph_t>> dist(0, num_vertices - one - v);
             vertex_idx_t<Graph_t> edge = v + one + dist(gen);
 
-            if (out_edges.find(edge) != out_edges.cend())
+            if (out_edges.find(edge) != out_edges.cend()) {
                 continue;
+            }
 
             out_edges.emplace(edge);
         }
@@ -68,4 +66,4 @@ void erdos_renyi_graph_gen(Graph_t& dag_out, vertex_idx_t<Graph_t> num_vertices,
     }
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp b/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp
index dabb04fe..67728ad0 100644
--- a/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp
+++ b/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp
@@ -34,9 +34,8 @@ namespace osp {
  * @param prob probability of an edge immediately off the diagonal to be included
  * @return DAG
  */
-template<typename Graph_t>
+template <typename Graph_t>
 void near_diag_random_graph(Graph_t &dag_out, vertex_idx_t<Graph_t> num_vertices, double bandwidth, double prob) {
-
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG type");
 
     dag_out = Graph_t(num_vertices);
@@ -45,8 +44,8 @@ void near_diag_random_graph(Graph_t &dag_out, vertex_idx_t<Graph_t> num_vertices
     std::mt19937 gen(rd());
 
     for (vertex_idx_t<Graph_t> v = 1; v < num_vertices; ++v) {
-    
-        std::binomial_distribution<vertex_idx_t<Graph_t>> bino_dist(vertex_idx_t<Graph_t>(num_vertices - v), prob * std::exp( 1.0 - static_cast<double>(v) / bandwidth)  );
+        std::binomial_distribution<vertex_idx_t<Graph_t>> bino_dist(vertex_idx_t<Graph_t>(num_vertices - v),
+                                                                    prob * std::exp(1.0 - static_cast<double>(v) / bandwidth));
         vertex_idx_t<Graph_t> off_diag_edges_num = bino_dist(gen);
 
         std::vector<vertex_idx_t<Graph_t>> range(num_vertices - v, 0);
@@ -61,4 +60,4 @@ void near_diag_random_graph(Graph_t &dag_out, vertex_idx_t<Graph_t> num_vertices
     }
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/return_status.hpp b/include/osp/auxiliary/return_status.hpp
index e5f0b870..3aa23889 100644
--- a/include/osp/auxiliary/return_status.hpp
+++ b/include/osp/auxiliary/return_status.hpp
@@ -22,10 +22,7 @@ limitations under the License.
 
 namespace osp {
 
-enum class RETURN_STATUS { OSP_SUCCESS,
-                           BEST_FOUND,
-                           TIMEOUT,
-                           ERROR };
+enum class RETURN_STATUS { OSP_SUCCESS, BEST_FOUND, TIMEOUT, ERROR };
 
 /**
  * @brief Converts the enum to a string literal.
@@ -33,24 +30,22 @@ enum class RETURN_STATUS { OSP_SUCCESS,
  */
 inline const char *to_string(const RETURN_STATUS status) {
     switch (status) {
-    case RETURN_STATUS::OSP_SUCCESS:
-        return "SUCCESS";
-    case RETURN_STATUS::BEST_FOUND:
-        return "BEST FOUND";
-    case RETURN_STATUS::TIMEOUT:
-        return "TIMEOUT";
-    case RETURN_STATUS::ERROR:
-        return "ERROR";
-    default:
-        return "UNKNOWN";
+        case RETURN_STATUS::OSP_SUCCESS:
+            return "SUCCESS";
+        case RETURN_STATUS::BEST_FOUND:
+            return "BEST FOUND";
+        case RETURN_STATUS::TIMEOUT:
+            return "TIMEOUT";
+        case RETURN_STATUS::ERROR:
+            return "ERROR";
+        default:
+            return "UNKNOWN";
     }
 }
 
 /**
  * @brief Stream operator overload using the helper function.
  */
-inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) {
-    return os << to_string(status);
-}
+inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) { return os << to_string(status); }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp b/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp
index 556d4b64..2594267f 100644
--- a/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp
+++ b/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp
@@ -13,16 +13,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
 
-
-#include <vector>
 #include <algorithm>
-#include <set>
 #include <queue>
+#include <set>
+#include <vector>
 
 #include "osp/bsp/model/BspSchedule.hpp"
 
@@ -32,46 +31,47 @@ enum SCHEDULE_NODE_PERMUTATION_MODES { LOOP_PROCESSORS, SNAKE_PROCESSORS, PROCES
 
 /**
  * @brief Computes a permutation to improve locality of a schedule, looping through processors
- * 
+ *
  * @param sched BSP Schedule
  * @param mode ordering of processors
  * @return std::vector<size_t> vec[prev_node_name] = new_node_name(location)
  */
-template<typename Graph_t>
-std::vector<size_t> schedule_node_permuter_basic(const BspSchedule<Graph_t>& sched, const SCHEDULE_NODE_PERMUTATION_MODES mode = LOOP_PROCESSORS) {
-// superstep, processor, nodes
-    std::vector<std::vector<std::vector<size_t>>> allocation(sched.numberOfSupersteps(),
-                                                                std::vector<std::vector<size_t>>(sched.getInstance().numberOfProcessors(),
-                                                                    std::vector<size_t>({})));
+template <typename Graph_t>
+std::vector<size_t> schedule_node_permuter_basic(const BspSchedule<Graph_t> &sched,
+                                                 const SCHEDULE_NODE_PERMUTATION_MODES mode = LOOP_PROCESSORS) {
+    // superstep, processor, nodes
+    std::vector<std::vector<std::vector<size_t>>> allocation(
+        sched.numberOfSupersteps(),
+        std::vector<std::vector<size_t>>(sched.getInstance().numberOfProcessors(), std::vector<size_t>({})));
     for (size_t node = 0; node < sched.getInstance().numberOfVertices(); node++) {
-        allocation[ sched.assignedSuperstep(node) ][ sched.assignedProcessor(node) ].emplace_back(node);
+        allocation[sched.assignedSuperstep(node)][sched.assignedProcessor(node)].emplace_back(node);
     }
 
     // reordering and allocating into permutation
     std::vector<size_t> permutation(sched.getInstance().numberOfVertices());
 
-    if(mode == LOOP_PROCESSORS || mode == SNAKE_PROCESSORS) {
+    if (mode == LOOP_PROCESSORS || mode == SNAKE_PROCESSORS) {
         bool forward = true;
         size_t counter = 0;
         for (auto step_it = allocation.begin(); step_it != allocation.cend(); step_it++) {
             if (forward) {
                 for (auto proc_it = step_it->begin(); proc_it != step_it->cend(); proc_it++) {
-                    //topological_sort_for_data_locality_interior_basic(*proc_it, sched);
-                    for (const auto& node : *proc_it) {
+                    // topological_sort_for_data_locality_interior_basic(*proc_it, sched);
+                    for (const auto &node : *proc_it) {
                         permutation[node] = counter;
                         counter++;
                     }
                 }
             } else {
                 for (auto proc_it = step_it->rbegin(); proc_it != step_it->crend(); proc_it++) {
-                    //topological_sort_for_data_locality_interior_basic(*proc_it, sched);
-                    for (const auto& node : *proc_it) {
+                    // topological_sort_for_data_locality_interior_basic(*proc_it, sched);
+                    for (const auto &node : *proc_it) {
                         permutation[node] = counter;
                         counter++;
                     }
                 }
             }
-            
+
             if (mode == SNAKE_PROCESSORS) {
                 forward = !forward;
             }
@@ -83,4 +83,4 @@ std::vector<size_t> schedule_node_permuter_basic(const BspSchedule<Graph_t>& sch
     return permutation;
 }
 
-}
+}    // namespace osp
diff --git a/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp b/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp
index d7f7e77f..86f4e0f3 100644
--- a/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp
+++ b/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp
@@ -20,23 +20,24 @@ limitations under the License.
 
 #ifdef EIGEN_FOUND
 
-#include <Eigen/Core>
-#include <algorithm>
-#include <iostream>
-#include <list>
-#include <map>
-#include <omp.h>
-#include <random>
-#include <stdexcept>
-#include <vector>
-
-#include "osp/bsp/model/BspInstance.hpp"
-#include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
+#    include <omp.h>
+
+#    include <Eigen/Core>
+#    include <algorithm>
+#    include <iostream>
+#    include <list>
+#    include <map>
+#    include <random>
+#    include <stdexcept>
+#    include <vector>
+
+#    include "osp/bsp/model/BspInstance.hpp"
+#    include "osp/bsp/model/BspSchedule.hpp"
+#    include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
 
 namespace osp {
 
-template<typename eigen_idx_type>
+template <typename eigen_idx_type>
 class Sptrsv {
     using uVertType = typename SparseMatrixImp<eigen_idx_type>::vertex_idx;
 
@@ -74,88 +75,88 @@ class Sptrsv {
 
     void setup_csr_no_permutation(const BspSchedule<SparseMatrixImp<eigen_idx_type>> &schedule) {
         vector_step_processor_vertices = std::vector<std::vector<std::vector<eigen_idx_type>>>(
-            schedule.numberOfSupersteps(),
-            std::vector<std::vector<eigen_idx_type>>(schedule.getInstance().numberOfProcessors()));
+            schedule.numberOfSupersteps(), std::vector<std::vector<eigen_idx_type>>(schedule.getInstance().numberOfProcessors()));
 
         vector_step_processor_vertices_u = std::vector<std::vector<std::vector<eigen_idx_type>>>(
-            schedule.numberOfSupersteps(),
-            std::vector<std::vector<eigen_idx_type>>(schedule.getInstance().numberOfProcessors()));
+            schedule.numberOfSupersteps(), std::vector<std::vector<eigen_idx_type>>(schedule.getInstance().numberOfProcessors()));
 
         bounds_array_l = std::vector<std::vector<std::vector<eigen_idx_type>>>(
-            schedule.numberOfSupersteps(),
-            std::vector<std::vector<eigen_idx_type>>(schedule.getInstance().numberOfProcessors()));
+            schedule.numberOfSupersteps(), std::vector<std::vector<eigen_idx_type>>(schedule.getInstance().numberOfProcessors()));
         bounds_array_u = std::vector<std::vector<std::vector<eigen_idx_type>>>(
-            schedule.numberOfSupersteps(),
-            std::vector<std::vector<eigen_idx_type>>(schedule.getInstance().numberOfProcessors()));
+            schedule.numberOfSupersteps(), std::vector<std::vector<eigen_idx_type>>(schedule.getInstance().numberOfProcessors()));
 
         num_supersteps = schedule.numberOfSupersteps();
         size_t number_of_vertices = instance->getComputationalDag().num_vertices();
 
-#pragma omp parallel num_threads(2)
+#    pragma omp parallel num_threads(2)
         {
             int id = omp_get_thread_num();
             switch (id) {
-            case 0: {
-                for (size_t node = 0; node < number_of_vertices; ++node) {
-                    vector_step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].push_back(static_cast<eigen_idx_type>(node));
-                }
+                case 0: {
+                    for (size_t node = 0; node < number_of_vertices; ++node) {
+                        vector_step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].push_back(
+                            static_cast<eigen_idx_type>(node));
+                    }
 
-                for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) {
-                    for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) {
-                        if (!vector_step_processor_vertices[step][proc].empty()) {
-                            eigen_idx_type start = vector_step_processor_vertices[step][proc][0];
-                            eigen_idx_type prev = vector_step_processor_vertices[step][proc][0];
-
-                            for (size_t i = 1; i < vector_step_processor_vertices[step][proc].size(); ++i) {
-                                if (vector_step_processor_vertices[step][proc][i] != prev + 1) {
-                                    bounds_array_l[step][proc].push_back(start);
-                                    bounds_array_l[step][proc].push_back(prev);
-                                    start = vector_step_processor_vertices[step][proc][i];
+                    for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) {
+                        for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+                            if (!vector_step_processor_vertices[step][proc].empty()) {
+                                eigen_idx_type start = vector_step_processor_vertices[step][proc][0];
+                                eigen_idx_type prev = vector_step_processor_vertices[step][proc][0];
+
+                                for (size_t i = 1; i < vector_step_processor_vertices[step][proc].size(); ++i) {
+                                    if (vector_step_processor_vertices[step][proc][i] != prev + 1) {
+                                        bounds_array_l[step][proc].push_back(start);
+                                        bounds_array_l[step][proc].push_back(prev);
+                                        start = vector_step_processor_vertices[step][proc][i];
+                                    }
+                                    prev = vector_step_processor_vertices[step][proc][i];
                                 }
-                                prev = vector_step_processor_vertices[step][proc][i];
-                            }
 
-                            bounds_array_l[step][proc].push_back(start);
-                            bounds_array_l[step][proc].push_back(prev);
+                                bounds_array_l[step][proc].push_back(start);
+                                bounds_array_l[step][proc].push_back(prev);
+                            }
                         }
                     }
-                }
 
-                break;
-            }
-            case 1: {
-                size_t node = number_of_vertices;
-                do {
-                    node--;
-                    vector_step_processor_vertices_u[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].push_back(static_cast<eigen_idx_type>(node));
-                } while (node > 0);
-
-                for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) {
-                    for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) {
-                        if (!vector_step_processor_vertices_u[step][proc].empty()) {
-                            eigen_idx_type start_u = static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][0]);
-                            eigen_idx_type prev_u = static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][0]);
-
-                            for (size_t i = 1; i < vector_step_processor_vertices_u[step][proc].size(); ++i) {
-                                if (static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][i]) != prev_u - 1) {
-                                    bounds_array_u[step][proc].push_back(start_u);
-                                    bounds_array_u[step][proc].push_back(prev_u);
-                                    start_u = static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][i]);
+                    break;
+                }
+                case 1: {
+                    size_t node = number_of_vertices;
+                    do {
+                        node--;
+                        vector_step_processor_vertices_u[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)]
+                            .push_back(static_cast<eigen_idx_type>(node));
+                    } while (node > 0);
+
+                    for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) {
+                        for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+                            if (!vector_step_processor_vertices_u[step][proc].empty()) {
+                                eigen_idx_type start_u
+                                    = static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][0]);
+                                eigen_idx_type prev_u
+                                    = static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][0]);
+
+                                for (size_t i = 1; i < vector_step_processor_vertices_u[step][proc].size(); ++i) {
+                                    if (static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][i]) != prev_u - 1) {
+                                        bounds_array_u[step][proc].push_back(start_u);
+                                        bounds_array_u[step][proc].push_back(prev_u);
+                                        start_u = static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][i]);
+                                    }
+                                    prev_u = static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][i]);
                                 }
-                                prev_u = static_cast<eigen_idx_type>(vector_step_processor_vertices_u[step][proc][i]);
-                            }
 
-                            bounds_array_u[step][proc].push_back(start_u);
-                            bounds_array_u[step][proc].push_back(prev_u);
+                                bounds_array_u[step][proc].push_back(start_u);
+                                bounds_array_u[step][proc].push_back(prev_u);
+                            }
                         }
                     }
-                }
 
-                break;
-            }
-            default: {
-                std::cout << "Unexpected Behaviour" << std::endl;
-            }
+                    break;
+                }
+                default: {
+                    std::cout << "Unexpected Behaviour" << std::endl;
+                }
             }
         }
     }
@@ -177,8 +178,8 @@ class Sptrsv {
         row_ptr.clear();
         row_ptr.reserve(instance->numberOfVertices() + 1);
 
-        step_proc_ptr =
-            std::vector<std::vector<unsigned>>(num_supersteps, std::vector<unsigned>(instance->numberOfProcessors(), 0));
+        step_proc_ptr
+            = std::vector<std::vector<unsigned>>(num_supersteps, std::vector<unsigned>(instance->numberOfProcessors(), 0));
 
         step_proc_num = schedule.numAssignedNodesPerSuperstepProcessor();
 
@@ -188,12 +189,8 @@ class Sptrsv {
         step_proc_ptr[current_step][current_processor] = 0;
 
         for (const uVertType &node : perm_inv) {
-
             if (schedule.assignedProcessor(node) != current_processor || schedule.assignedSuperstep(node) != current_step) {
-
-                while (schedule.assignedProcessor(node) != current_processor ||
-                       schedule.assignedSuperstep(node) != current_step) {
-
+                while (schedule.assignedProcessor(node) != current_processor || schedule.assignedSuperstep(node) != current_step) {
                     if (current_processor < instance->numberOfProcessors() - 1) {
                         current_processor++;
                     } else {
@@ -218,8 +215,9 @@ class Sptrsv {
                 unsigned found = 0;
 
                 const auto *outer = instance->getComputationalDag().getCSR()->outerIndexPtr();
-                for (uVertType par_ind = static_cast<uVertType>(outer[node]); par_ind < static_cast<uVertType>(outer[node + 1] - 1); ++par_ind) {
-
+                for (uVertType par_ind = static_cast<uVertType>(outer[node]);
+                     par_ind < static_cast<uVertType>(outer[node + 1] - 1);
+                     ++par_ind) {
                     if (static_cast<size_t>(instance->getComputationalDag().getCSR()->innerIndexPtr()[par_ind]) == perm_inv[par]) {
                         val.push_back(instance->getComputationalDag().getCSR()->valuePtr()[par_ind]);
                         found++;
@@ -229,7 +227,9 @@ class Sptrsv {
             }
 
             col_idx.push_back(perm[node]);
-            val.push_back(instance->getComputationalDag().getCSR()->valuePtr()[instance->getComputationalDag().getCSR()->outerIndexPtr()[node + 1] - 1]);
+            val.push_back(instance->getComputationalDag()
+                              .getCSR()
+                              ->valuePtr()[instance->getComputationalDag().getCSR()->outerIndexPtr()[node + 1] - 1]);
         }
 
         row_ptr.push_back(col_idx.size());
@@ -239,10 +239,14 @@ class Sptrsv {
         eigen_idx_type number_of_vertices = static_cast<eigen_idx_type>(instance->numberOfVertices());
         for (eigen_idx_type i = 0; i < number_of_vertices; ++i) {
             x[i] = b[i];
-            for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i]; j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1; ++j) {
-                x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j] * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]];
+            for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i];
+                 j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1;
+                 ++j) {
+                x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j]
+                        * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]];
             }
-            x[i] /= (*(instance->getComputationalDag().getCSR())).valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1];
+            x[i] /= (*(instance->getComputationalDag().getCSR()))
+                        .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1];
         }
     }
 
@@ -253,15 +257,19 @@ class Sptrsv {
         do {
             i--;
             x[i] = b[i];
-            for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1; j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1]; ++j) {
-                x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j] * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]];
+            for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1;
+                 j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1];
+                 ++j) {
+                x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j]
+                        * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]];
             }
-            x[i] /= (*(instance->getComputationalDag().getCSC())).valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]];
+            x[i] /= (*(instance->getComputationalDag().getCSC()))
+                        .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]];
         } while (i != 0);
     }
 
     void lsolve_no_permutation_in_place() {
-#pragma omp parallel num_threads(instance->numberOfProcessors())
+#    pragma omp parallel num_threads(instance->numberOfProcessors())
         {
             const size_t proc = static_cast<size_t>(omp_get_thread_num());
             for (unsigned step = 0; step < num_supersteps; ++step) {
@@ -272,19 +280,23 @@ class Sptrsv {
                     const eigen_idx_type upper_b = bounds_array_l[step][proc][index + 1];
 
                     for (eigen_idx_type node = lower_b; node <= upper_b; ++node) {
-                        for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node]; i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1; ++i) {
-                            x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i] * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]];
+                        for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node];
+                             i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1;
+                             ++i) {
+                            x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i]
+                                       * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]];
                         }
-                        x[node] /= (*(instance->getComputationalDag().getCSR())).valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1];
+                        x[node] /= (*(instance->getComputationalDag().getCSR()))
+                                       .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1];
                     }
                 }
-#pragma omp barrier
+#    pragma omp barrier
             }
         }
     }
 
     void usolve_no_permutation_in_place() {
-#pragma omp parallel num_threads(instance->numberOfProcessors())
+#    pragma omp parallel num_threads(instance->numberOfProcessors())
         {
             // Process each superstep starting from the last one (opposite of lsolve)
             const size_t proc = static_cast<size_t>(omp_get_thread_num());
@@ -298,19 +310,23 @@ class Sptrsv {
 
                     do {
                         node--;
-                        for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1; i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1]; ++i) {
-                            x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i] * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]];
+                        for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1;
+                             i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1];
+                             ++i) {
+                            x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i]
+                                       * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]];
                         }
-                        x[node] /= (*(instance->getComputationalDag().getCSC())).valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]];
+                        x[node] /= (*(instance->getComputationalDag().getCSC()))
+                                       .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]];
                     } while (node != lower_b);
                 }
-#pragma omp barrier
+#    pragma omp barrier
             } while (step != 0);
         }
     }
 
     void lsolve_no_permutation() {
-#pragma omp parallel num_threads(instance->numberOfProcessors())
+#    pragma omp parallel num_threads(instance->numberOfProcessors())
         {
             const size_t proc = static_cast<size_t>(omp_get_thread_num());
             for (unsigned step = 0; step < num_supersteps; ++step) {
@@ -322,19 +338,23 @@ class Sptrsv {
 
                     for (eigen_idx_type node = lower_b; node <= upper_b; ++node) {
                         x[node] = b[node];
-                        for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node]; i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1; ++i) {
-                            x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i] * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]];
+                        for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node];
+                             i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1;
+                             ++i) {
+                            x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i]
+                                       * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]];
                         }
-                        x[node] /= (*(instance->getComputationalDag().getCSR())).valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1];
+                        x[node] /= (*(instance->getComputationalDag().getCSR()))
+                                       .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1];
                     }
                 }
-#pragma omp barrier
+#    pragma omp barrier
             }
         }
     }
 
     void usolve_no_permutation() {
-#pragma omp parallel num_threads(instance->numberOfProcessors())
+#    pragma omp parallel num_threads(instance->numberOfProcessors())
         {
             // Process each superstep starting from the last one (opposite of lsolve)
             const size_t proc = static_cast<size_t>(omp_get_thread_num());
@@ -349,13 +369,17 @@ class Sptrsv {
                     do {
                         node--;
                         x[node] = b[node];
-                        for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1; i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1]; ++i) {
-                            x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i] * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]];
+                        for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1;
+                             i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1];
+                             ++i) {
+                            x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i]
+                                       * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]];
                         }
-                        x[node] /= (*(instance->getComputationalDag().getCSC())).valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]];
+                        x[node] /= (*(instance->getComputationalDag().getCSC()))
+                                       .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]];
                     } while (node != lower_b);
                 }
-#pragma omp barrier
+#    pragma omp barrier
             } while (step != 0);
         }
     }
@@ -363,10 +387,14 @@ class Sptrsv {
     void lsolve_serial_in_place() {
         eigen_idx_type number_of_vertices = static_cast<eigen_idx_type>(instance->numberOfVertices());
         for (eigen_idx_type i = 0; i < number_of_vertices; ++i) {
-            for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i]; j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1; ++j) {
-                x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j] * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]];
+            for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i];
+                 j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1;
+                 ++j) {
+                x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j]
+                        * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]];
             }
-            x[i] /= (*(instance->getComputationalDag().getCSR())).valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1];
+            x[i] /= (*(instance->getComputationalDag().getCSR()))
+                        .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1];
         }
     }
 
@@ -375,22 +403,24 @@ class Sptrsv {
         eigen_idx_type i = number_of_vertices;
         do {
             i--;
-            for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1; j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1]; ++j) {
-                x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j] * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]];
+            for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1;
+                 j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1];
+                 ++j) {
+                x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j]
+                        * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]];
             }
-            x[i] /= (*(instance->getComputationalDag().getCSC())).valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]];
+            x[i] /= (*(instance->getComputationalDag().getCSC()))
+                        .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]];
         } while (i != 0);
     }
 
     void lsolve_with_permutation_in_place() {
-#pragma omp parallel num_threads(instance->numberOfProcessors())
+#    pragma omp parallel num_threads(instance->numberOfProcessors())
         {
             for (unsigned step = 0; step < num_supersteps; step++) {
-
                 const size_t proc = static_cast<size_t>(omp_get_thread_num());
                 const uVertType upper_limit = step_proc_ptr[step][proc] + step_proc_num[step][proc];
                 for (uVertType _row_idx = step_proc_ptr[step][proc]; _row_idx < upper_limit; _row_idx++) {
-
                     for (uVertType i = row_ptr[_row_idx]; i < row_ptr[_row_idx + 1] - 1; i++) {
                         x[_row_idx] -= val[i] * x[col_idx[i]];
                     }
@@ -398,16 +428,15 @@ class Sptrsv {
                     x[_row_idx] /= val[row_ptr[_row_idx + 1] - 1];
                 }
 
-#pragma omp barrier
+#    pragma omp barrier
             }
         }
     }
 
     void lsolve_with_permutation() {
-#pragma omp parallel num_threads(instance->numberOfProcessors())
+#    pragma omp parallel num_threads(instance->numberOfProcessors())
         {
             for (unsigned step = 0; step < num_supersteps; step++) {
-
                 const size_t proc = static_cast<size_t>(omp_get_thread_num());
                 const uVertType upper_limit = step_proc_ptr[step][proc] + step_proc_num[step][proc];
                 for (uVertType _row_idx = step_proc_ptr[step][proc]; _row_idx < upper_limit; _row_idx++) {
@@ -419,7 +448,7 @@ class Sptrsv {
                     x[_row_idx] /= val[row_ptr[_row_idx + 1] - 1];
                 }
 
-#pragma omp barrier
+#    pragma omp barrier
             }
         }
     }
@@ -451,13 +480,11 @@ class Sptrsv {
         }
     }
 
-    std::size_t get_number_of_vertices() {
-        return instance->numberOfVertices();
-    }
+    std::size_t get_number_of_vertices() { return instance->numberOfVertices(); }
 
     virtual ~Sptrsv() = default;
 };
 
-} // namespace osp
+}    // namespace osp
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp
index 52240fa2..535f0d98 100644
--- a/include/osp/bsp/model/BspArchitecture.hpp
+++ b/include/osp/bsp/model/BspArchitecture.hpp
@@ -37,13 +37,15 @@ namespace osp {
 /**
  * @enum MEMORY_CONSTRAINT_TYPE
  * @brief Enumerates the different types of memory constraints.
- * Memory bounds are set per processor and apply to aggregated memory weights of nodes according to the different types of memory constraints.
+ * Memory bounds are set per processor and apply to aggregated memory weights of nodes according to the different types of memory
+ * constraints.
  */
 enum class MEMORY_CONSTRAINT_TYPE {
-    NONE,                     /** No memory constraints. */
-    LOCAL,                    /** The memory bounds apply to the sum of memory weights of nodes assigned to the same processor and superstep. */
-    GLOBAL,                   /** The memory bounds apply to the sum of memory weights of the nodes assigned to the same processor. */
-    PERSISTENT_AND_TRANSIENT, /** Memory bounds apply to the sum of memory weights of nodes assigned to the same processor plus the maximum communication weight of a node assigned to a processor. */
+    NONE,   /** No memory constraints. */
+    LOCAL,  /** The memory bounds apply to the sum of memory weights of nodes assigned to the same processor and superstep. */
+    GLOBAL, /** The memory bounds apply to the sum of memory weights of the nodes assigned to the same processor. */
+    PERSISTENT_AND_TRANSIENT, /** Memory bounds apply to the sum of memory weights of nodes assigned to the same processor plus
+                                 the maximum communication weight of a node assigned to a processor. */
     LOCAL_IN_OUT,             /** Memory constraints are local in-out. Experimental. */
     LOCAL_INC_EDGES,          /** Memory constraints are local incident edges. Experimental. */
     LOCAL_SOURCES_INC_EDGES   /** Memory constraints are local source incident edges. Experimental. */
@@ -55,31 +57,29 @@ enum class MEMORY_CONSTRAINT_TYPE {
  */
 inline const char *to_string(MEMORY_CONSTRAINT_TYPE type) {
     switch (type) {
-    case MEMORY_CONSTRAINT_TYPE::NONE:
-        return "NONE";
-    case MEMORY_CONSTRAINT_TYPE::LOCAL:
-        return "LOCAL";
-    case MEMORY_CONSTRAINT_TYPE::GLOBAL:
-        return "GLOBAL";
-    case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT:
-        return "PERSISTENT_AND_TRANSIENT";
-    case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT:
-        return "LOCAL_IN_OUT";
-    case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES:
-        return "LOCAL_INC_EDGES";
-    case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES:
-        return "LOCAL_SOURCES_INC_EDGES";
-    default:
-        return "UNKNOWN";
+        case MEMORY_CONSTRAINT_TYPE::NONE:
+            return "NONE";
+        case MEMORY_CONSTRAINT_TYPE::LOCAL:
+            return "LOCAL";
+        case MEMORY_CONSTRAINT_TYPE::GLOBAL:
+            return "GLOBAL";
+        case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT:
+            return "PERSISTENT_AND_TRANSIENT";
+        case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT:
+            return "LOCAL_IN_OUT";
+        case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES:
+            return "LOCAL_INC_EDGES";
+        case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES:
+            return "LOCAL_SOURCES_INC_EDGES";
+        default:
+            return "UNKNOWN";
     }
 }
 
 /**
  * @brief Stream operator overload using the helper function.
  */
-inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) {
-    return os << to_string(type);
-}
+inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) { return os << to_string(type); }
 
 /**
  * @class BspArchitecture
@@ -110,7 +110,7 @@ inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) {
  * Each processor has a memory bound. The `MEMORY_CONSTRAINT_TYPE` determines how these bounds are applied
  * (e.g., local per superstep, global per processor).
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class BspArchitecture {
     static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
 
@@ -133,7 +133,8 @@ class BspArchitecture {
     /** @brief Flag to indicate whether the architecture is NUMA , i.e., whether the send costs are different for different pairs of processors. */
     bool isNuma_;
 
-    /** @brief The architecture allows to specify processor types. Processor types are used to express compatabilities, which can be specified in the BspInstance, regarding node types. */
+    /** @brief The architecture allows to specify processor types. Processor types are used to express compatabilities, which can
+     * be specified in the BspInstance, regarding node types. */
     std::vector<unsigned> processorTypes_;
 
     /** @brief A flattened p x p matrix of send costs. Access via index [i * numberOfProcessors_ + j]. */
@@ -148,16 +149,19 @@ class BspArchitecture {
     }
 
     bool AreSendCostsNuma() {
-        if (numberOfProcessors_ == 1U)
+        if (numberOfProcessors_ == 1U) {
             return false;
+        }
 
         const v_commw_t<Graph_t> val = sendCosts_[1U];
         for (unsigned p1 = 0U; p1 < numberOfProcessors_; p1++) {
             for (unsigned p2 = 0U; p2 < numberOfProcessors_; p2++) {
-                if (p1 == p2)
+                if (p1 == p2) {
                     continue;
-                if (sendCosts_[FlatIndex(p1, p2)] != val)
+                }
+                if (sendCosts_[FlatIndex(p1, p2)] != val) {
                     return true;
+                }
             }
         }
         return false;
@@ -193,13 +197,20 @@ class BspArchitecture {
      * @param CommunicationCost The communication cost between processors. Default: 1.
      * @param SynchronisationCost The synchronization cost between processors. Default: 2.
      * @param MemoryBound The memory bound for each processor (default: 100).
-     * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero. Default: empty (uniform costs).
-     */
-    BspArchitecture(const unsigned NumberOfProcessors = 2U, const v_commw_t<Graph_t> CommunicationCost = 1U, const v_commw_t<Graph_t> SynchronisationCost = 2U,
-                    const v_memw_t<Graph_t> MemoryBound = 100U, const std::vector<std::vector<v_commw_t<Graph_t>>> &SendCosts = {})
-        : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost),
+     * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal
+     * entries are forced to zero. Default: empty (uniform costs).
+     */
+    BspArchitecture(const unsigned NumberOfProcessors = 2U,
+                    const v_commw_t<Graph_t> CommunicationCost = 1U,
+                    const v_commw_t<Graph_t> SynchronisationCost = 2U,
+                    const v_memw_t<Graph_t> MemoryBound = 100U,
+                    const std::vector<std::vector<v_commw_t<Graph_t>>> &SendCosts = {})
+        : numberOfProcessors_(NumberOfProcessors),
+          numberOfProcessorTypes_(1U),
+          communicationCosts_(CommunicationCost),
           synchronisationCosts_(SynchronisationCost),
-          memoryBound_(NumberOfProcessors, MemoryBound), isNuma_(false),
+          memoryBound_(NumberOfProcessors, MemoryBound),
+          isNuma_(false),
           processorTypes_(NumberOfProcessors, 0U) {
         if (NumberOfProcessors == 0U) {
             throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0.");
@@ -211,8 +222,9 @@ class BspArchitecture {
             if (NumberOfProcessors != SendCosts.size()) {
                 throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
             }
-            if (std::any_of(SendCosts.begin(), SendCosts.end(),
-                            [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) {
+            if (std::any_of(SendCosts.begin(), SendCosts.end(), [NumberOfProcessors](const auto &thing) {
+                    return thing.size() != NumberOfProcessors;
+                })) {
                 throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
             }
 
@@ -238,11 +250,15 @@ class BspArchitecture {
      * @tparam Graph_t_other The graph type of the other BspArchitecture.
      * @param other The other BspArchitecture object.
      */
-    template<typename Graph_t_other>
+    template <typename Graph_t_other>
     BspArchitecture(const BspArchitecture<Graph_t_other> &other)
-        : numberOfProcessors_(other.numberOfProcessors()), numberOfProcessorTypes_(other.getNumberOfProcessorTypes()),
-          communicationCosts_(other.communicationCosts()), synchronisationCosts_(other.synchronisationCosts()),
-          memoryBound_(other.memoryBound()), isNuma_(other.isNumaArchitecture()), processorTypes_(other.processorTypes()),
+        : numberOfProcessors_(other.numberOfProcessors()),
+          numberOfProcessorTypes_(other.getNumberOfProcessorTypes()),
+          communicationCosts_(other.communicationCosts()),
+          synchronisationCosts_(other.synchronisationCosts()),
+          memoryBound_(other.memoryBound()),
+          isNuma_(other.isNumaArchitecture()),
+          processorTypes_(other.processorTypes()),
           sendCosts_(other.sendCostsVector()) {
         static_assert(std::is_same_v<v_memw_t<Graph_t>, v_memw_t<Graph_t_other>>,
                       "BspArchitecture: Graph_t and Graph_t_other have the same memory weight type.");
@@ -260,9 +276,12 @@ class BspArchitecture {
      * @param NumberOfProcessors The number of processors. Must be greater than 0.
      * @param CommunicationCost The communication cost.
      * @param SynchronisationCost The synchronization cost.
-     * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero.
+     * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal
+     * entries are forced to zero.
      */
-    BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t<Graph_t> CommunicationCost, const v_commw_t<Graph_t> SynchronisationCost,
+    BspArchitecture(const unsigned NumberOfProcessors,
+                    const v_commw_t<Graph_t> CommunicationCost,
+                    const v_commw_t<Graph_t> SynchronisationCost,
                     const std::vector<std::vector<v_commw_t<Graph_t>>> &SendCosts)
         : BspArchitecture(NumberOfProcessors, CommunicationCost, SynchronisationCost, 100U, SendCosts) {}
 
@@ -296,8 +315,7 @@ class BspArchitecture {
 
         unsigned maxPos = 1;
         constexpr unsigned two = 2;
-        for (; intpow(two, maxPos + 1) <= numberOfProcessors_ - 1; ++maxPos) {
-        }
+        for (; intpow(two, maxPos + 1) <= numberOfProcessors_ - 1; ++maxPos) {}
 
         for (unsigned i = 0U; i < numberOfProcessors_; ++i) {
             for (unsigned j = i + 1U; j < numberOfProcessors_; ++j) {
@@ -357,8 +375,9 @@ class BspArchitecture {
      * @throws std::invalid_argument if the processor indices are out of bounds.
      */
     void SetSendCosts(const unsigned p1, const unsigned p2, const v_commw_t<Graph_t> cost) {
-        if (p1 >= numberOfProcessors_ || p2 >= numberOfProcessors_) // Fixed condition: p2 >= number_processors
+        if (p1 >= numberOfProcessors_ || p2 >= numberOfProcessors_) {    // Fixed condition: p2 >= number_processors
             throw std::invalid_argument("Invalid Argument: Processor index out of bounds.");
+        }
 
         if (p1 != p2) {
             sendCosts_.at(FlatIndex(p1, p2)) = cost;
@@ -370,9 +389,7 @@ class BspArchitecture {
      * @brief Sets the memory bound for all processors.
      * @param MemoryBound The new memory bound for all processors.
      */
-    void setMemoryBound(const v_memw_t<Graph_t> MemoryBound) {
-        memoryBound_.assign(numberOfProcessors_, MemoryBound);
-    }
+    void setMemoryBound(const v_memw_t<Graph_t> MemoryBound) { memoryBound_.assign(numberOfProcessors_, MemoryBound); }
 
     /**
      * @brief Sets the memory bound for all processors using a vector.
@@ -434,8 +451,8 @@ class BspArchitecture {
     }
 
     /**
-     * @brief Sets the number of processors and their types. Number of processors is set to the size of the processor types vector.
-     * Resets send costs to uniform (1). Resets memory bound to 100 for all processors.
+     * @brief Sets the number of processors and their types. Number of processors is set to the size of the processor types
+     * vector. Resets send costs to uniform (1). Resets memory bound to 100 for all processors.
      * @param processorTypes The types of the respective processors.
      */
     void setProcessorsWithTypes(const std::vector<v_type_t<Graph_t>> &processorTypes) {
@@ -509,7 +526,9 @@ class BspArchitecture {
      * @brief Returns the maximum memory bound over all processors.
      * @return The maximum memory bound.
      */
-    [[nodiscard]] v_memw_t<Graph_t> maxMemoryBound() const { return *(std::max_element(memoryBound_.begin(), memoryBound_.end())); }
+    [[nodiscard]] v_memw_t<Graph_t> maxMemoryBound() const {
+        return *(std::max_element(memoryBound_.begin(), memoryBound_.end()));
+    }
 
     /**
      * @brief Returns the maximum memory bound over all processors of a specific type.
@@ -546,11 +565,13 @@ class BspArchitecture {
     [[nodiscard]] v_commw_t<Graph_t> synchronisationCosts() const { return synchronisationCosts_; }
 
     /**
-     * @brief Returns a the send costs matrix. Internally the matrix is stored as a flattened matrix. The allocates, computes and returns the matrix on the fly.
+     * @brief Returns a the send costs matrix. Internally the matrix is stored as a flattened matrix. The allocates, computes and
+     * returns the matrix on the fly.
      * @return The send costs matrix.
      */
     [[nodiscard]] std::vector<std::vector<v_commw_t<Graph_t>>> sendCost() const {
-        std::vector<std::vector<v_commw_t<Graph_t>>> matrix(numberOfProcessors_, std::vector<v_commw_t<Graph_t>>(numberOfProcessors_));
+        std::vector<std::vector<v_commw_t<Graph_t>>> matrix(numberOfProcessors_,
+                                                            std::vector<v_commw_t<Graph_t>>(numberOfProcessors_));
         for (unsigned i = 0; i < numberOfProcessors_; ++i) {
             for (unsigned j = 0; j < numberOfProcessors_; ++j) {
                 matrix[i][j] = sendCosts_[FlatIndex(i, j)];
@@ -591,7 +612,9 @@ class BspArchitecture {
      * @param p2 The index of the second processor.
      * @return The send costs between the two processors.
      */
-    [[nodiscard]] v_commw_t<Graph_t> sendCosts(const unsigned p1, const unsigned p2) const { return sendCosts_[FlatIndex(p1, p2)]; }
+    [[nodiscard]] v_commw_t<Graph_t> sendCosts(const unsigned p1, const unsigned p2) const {
+        return sendCosts_[FlatIndex(p1, p2)];
+    }
 
     /**
      * @brief Returns the type of a specific processor. Does not perform bounds checking.
@@ -628,9 +651,8 @@ class BspArchitecture {
      */
     void print(std::ostream &os) const {
         os << "Architecture info:  number of processors: " << numberOfProcessors_
-           << ", Number of processor types: " << numberOfProcessorTypes_
-           << ", Communication costs: " << communicationCosts_ << ", Synchronization costs: " << synchronisationCosts_
-           << "\n";
+           << ", Number of processor types: " << numberOfProcessorTypes_ << ", Communication costs: " << communicationCosts_
+           << ", Synchronization costs: " << synchronisationCosts_ << "\n";
         os << std::setw(17) << " Processor: ";
         for (unsigned i = 0U; i < numberOfProcessors_; i++) {
             os << std::right << std::setw(5) << i << " ";
@@ -651,9 +673,10 @@ class BspArchitecture {
     [[nodiscard]] unsigned getNumberOfProcessorTypes() const { return numberOfProcessorTypes_; };
 
     [[nodiscard]] MEMORY_CONSTRAINT_TYPE getMemoryConstraintType() const { return memoryConstraintType_; }
+
     void setMemoryConstraintType(const MEMORY_CONSTRAINT_TYPE memoryConstraintType) {
         memoryConstraintType_ = memoryConstraintType;
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index 72f16e1e..34c17b98 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -57,7 +57,7 @@ namespace osp {
  *
  * @tparam Graph_t The type of the computational DAG, which must satisfy the `is_computational_dag` concept.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class BspInstance {
     static_assert(is_computational_dag_v<Graph_t>, "BspInstance can only be used with computational DAGs.");
 
@@ -91,7 +91,8 @@ class BspInstance {
      * @brief The type of the vectex types in the computational DAG.
      * If the DAG does not support vertex types, this is `unsigned`.
      */
-    using vertex_type_t_or_default = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
+    using vertex_type_t_or_default
+        = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
     using processor_type_t = unsigned;
 
   public:
@@ -107,7 +108,8 @@ class BspInstance {
      * @param cdag The computational DAG for the instance.
      * @param architecture The BSP architecture for the instance.
      */
-    BspInstance(const Graph_t &cdag_, const BspArchitecture<Graph_t> &architecture_,
+    BspInstance(const Graph_t &cdag_,
+                const BspArchitecture<Graph_t> &architecture_,
                 std::vector<std::vector<bool>> nodeProcessorCompatibility_ = std::vector<std::vector<bool>>({{true}}))
         : cdag(cdag_), architecture(architecture_), nodeProcessorCompatibility(nodeProcessorCompatibility_) {}
 
@@ -118,15 +120,16 @@ class BspInstance {
      * @param cdag The computational DAG for the instance.
      * @param architecture The BSP architecture for the instance.
      */
-    BspInstance(Graph_t &&cdag_, BspArchitecture<Graph_t> &&architecture_,
+    BspInstance(Graph_t &&cdag_,
+                BspArchitecture<Graph_t> &&architecture_,
                 std::vector<std::vector<bool>> nodeProcessorCompatibility_ = std::vector<std::vector<bool>>({{true}}))
-        : cdag(std::move(cdag_)), architecture(std::move(architecture_)), nodeProcessorCompatibility(nodeProcessorCompatibility_) {
-    }
+        : cdag(std::move(cdag_)),
+          architecture(std::move(architecture_)),
+          nodeProcessorCompatibility(nodeProcessorCompatibility_) {}
 
-    template<typename Graph_t_other>
+    template <typename Graph_t_other>
     explicit BspInstance(const BspInstance<Graph_t_other> &other)
-        : architecture(other.getArchitecture()),
-          nodeProcessorCompatibility(other.getNodeProcessorCompatibilityMatrix()) {
+        : architecture(other.getArchitecture()), nodeProcessorCompatibility(other.getNodeProcessorCompatibilityMatrix()) {
         constructComputationalDag(other.getComputationalDag(), cdag);
     }
 
@@ -142,6 +145,7 @@ class BspInstance {
      * The move operator may be used to transfer ownership of the architecture.
      */
     [[nodiscard]] const BspArchitecture<Graph_t> &getArchitecture() const { return architecture; }
+
     [[nodiscard]] BspArchitecture<Graph_t> &getArchitecture() { return architecture; }
 
     /**
@@ -150,6 +154,7 @@ class BspInstance {
      * The move operator may be used to transfer ownership of the DAG.
      */
     [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; }
+
     [[nodiscard]] Graph_t &getComputationalDag() { return cdag; }
 
     /**
@@ -202,9 +207,7 @@ class BspInstance {
     /**
      * @brief Returns the flattened send costs vector.
      */
-    [[nodiscard]] const std::vector<v_commw_t<Graph_t>> &sendCostsVector() const {
-        return architecture.sendCostsVector();
-    }
+    [[nodiscard]] const std::vector<v_commw_t<Graph_t>> &sendCostsVector() const { return architecture.sendCostsVector(); }
 
     /**
      * @brief Returns the communication costs of the BSP architecture.
@@ -288,7 +291,9 @@ class BspInstance {
     /**
      * @brief Returns the node type - processor type compatibility matrix.
      */
-    [[nodiscard]] const std::vector<std::vector<bool>> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; }
+    [[nodiscard]] const std::vector<std::vector<bool>> &getProcessorCompatibilityMatrix() const {
+        return nodeProcessorCompatibility;
+    }
 
     /**
      * @brief Sets the compatibility matrix to be diagonal. This implies that node type `i` is only compatible with processor type `i`.
@@ -296,15 +301,17 @@ class BspInstance {
      */
     void setDiagonalCompatibilityMatrix(const vertex_type_t_or_default number_of_types) {
         nodeProcessorCompatibility.assign(number_of_types, std::vector<bool>(number_of_types, false));
-        for (vertex_type_t_or_default i = 0; i < number_of_types; ++i)
+        for (vertex_type_t_or_default i = 0; i < number_of_types; ++i) {
             nodeProcessorCompatibility[i][i] = true;
+        }
     }
 
     /**
      * @brief Sets the compatibility matrix to all ones. This implies that all node types are compatible with all processor types.
      */
     void setAllOnesCompatibilityMatrix() {
-        nodeProcessorCompatibility.assign(cdag.num_vertex_types(), std::vector<bool>(architecture.getNumberOfProcessorTypes(), true));
+        nodeProcessorCompatibility.assign(cdag.num_vertex_types(),
+                                          std::vector<bool>(architecture.getNumberOfProcessorTypes(), true));
     }
 
     /**
@@ -314,8 +321,8 @@ class BspInstance {
     [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const {
         std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
         for (unsigned proc = 0U; proc < architecture.numberOfProcessors(); proc++) {
-            max_memory_per_proc_type[architecture.processorType(proc)] =
-                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
+            max_memory_per_proc_type[architecture.processorType(proc)]
+                = std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
         }
 
         for (vertex_type_t_or_default vertType = 0U; vertType < cdag.num_vertex_types(); vertType++) {
@@ -325,13 +332,15 @@ class BspInstance {
             for (processor_type_t proc_type = 0U; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) {
                 if (isCompatibleType(vertType, proc_type)) {
                     fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]);
-                    if (fits)
+                    if (fits) {
                         break;
+                    }
                 }
             }
 
-            if (!fits)
+            if (!fits) {
                 return false;
+            }
         }
 
         return true;
@@ -346,13 +355,16 @@ class BspInstance {
         processor_type_t numberOfProcTypes = architecture.getNumberOfProcessorTypes();
         std::vector<std::vector<processor_type_t>> compatibleProcTypes(numberOfNodeTypes);
 
-        for (vertex_type_t_or_default nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType)
-            for (processor_type_t processorType = 0U; processorType < numberOfProcTypes; ++processorType)
-                if (isCompatibleType(nodeType, processorType))
+        for (vertex_type_t_or_default nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType) {
+            for (processor_type_t processorType = 0U; processorType < numberOfProcTypes; ++processorType) {
+                if (isCompatibleType(nodeType, processorType)) {
                     compatibleProcTypes[nodeType].push_back(processorType);
+                }
+            }
+        }
 
         return compatibleProcTypes;
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp
index 9e5a5d52..23fe804e 100644
--- a/include/osp/bsp/model/BspSchedule.hpp
+++ b/include/osp/bsp/model/BspSchedule.hpp
@@ -45,18 +45,19 @@ namespace osp {
  * This class is templated on `Graph_t`, which must satisfy the `computational_dag_concept`.
  * Moreover, the work and communication weights of the nodes must be of the same type in order to properly compute the cost.
  *
- * It interacts closely with `BspInstance` to access problem-specific data and constraints. In fact, a `BspSchedule` object is tied to a `BspInstance` object.
+ * It interacts closely with `BspInstance` to access problem-specific data and constraints. In fact, a `BspSchedule` object is
+ * tied to a `BspInstance` object.
  *
  * @tparam Graph_t The type of the computational DAG, which must satisfy `is_computational_dag_v`.
  * @see BspInstance
  * @see IBspSchedule
  * @see IBspScheduleEval
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>, "BspSchedule requires work and comm. weights to have the same type.");
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "BspSchedule requires work and comm. weights to have the same type.");
 
   protected:
     using vertex_idx = vertex_idx_t<Graph_t>;
@@ -77,7 +78,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param inst The BspInstance for the schedule.
      */
     explicit BspSchedule(const BspInstance<Graph_t> &inst)
-        : instance(&inst), number_of_supersteps(1),
+        : instance(&inst),
+          number_of_supersteps(1),
           node_to_processor_assignment(std::vector<unsigned>(inst.numberOfVertices(), 0)),
           node_to_superstep_assignment(std::vector<unsigned>(inst.numberOfVertices(), 0)) {}
 
@@ -89,9 +91,11 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param processor_assignment_ The processor assignment for the nodes.
      * @param superstep_assignment_ The superstep assignment for the nodes.
      */
-    BspSchedule(const BspInstance<Graph_t> &inst, const std::vector<unsigned> &processor_assignment_,
+    BspSchedule(const BspInstance<Graph_t> &inst,
+                const std::vector<unsigned> &processor_assignment_,
                 const std::vector<unsigned> &superstep_assignment_)
-        : instance(&inst), node_to_processor_assignment(processor_assignment_),
+        : instance(&inst),
+          node_to_processor_assignment(processor_assignment_),
           node_to_superstep_assignment(superstep_assignment_) {
         updateNumberOfSupersteps();
     }
@@ -102,7 +106,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param schedule The schedule to copy.
      */
     explicit BspSchedule(const IBspSchedule<Graph_t> &schedule)
-        : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()),
+        : instance(&schedule.getInstance()),
+          number_of_supersteps(schedule.numberOfSupersteps()),
           node_to_processor_assignment(schedule.getInstance().numberOfVertices()),
           node_to_superstep_assignment(schedule.getInstance().numberOfVertices()) {
         for (const auto &v : schedule.getInstance().getComputationalDag().vertices()) {
@@ -117,7 +122,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param schedule The schedule to copy.
      */
     BspSchedule(const BspSchedule<Graph_t> &schedule)
-        : instance(schedule.instance), number_of_supersteps(schedule.number_of_supersteps),
+        : instance(schedule.instance),
+          number_of_supersteps(schedule.number_of_supersteps),
           node_to_processor_assignment(schedule.node_to_processor_assignment),
           node_to_superstep_assignment(schedule.node_to_superstep_assignment) {}
 
@@ -143,7 +149,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param schedule The schedule to move.
      */
     BspSchedule(BspSchedule<Graph_t> &&schedule) noexcept
-        : instance(schedule.instance), number_of_supersteps(schedule.number_of_supersteps),
+        : instance(schedule.instance),
+          number_of_supersteps(schedule.number_of_supersteps),
           node_to_processor_assignment(std::move(schedule.node_to_processor_assignment)),
           node_to_superstep_assignment(std::move(schedule.node_to_superstep_assignment)) {}
 
@@ -170,9 +177,10 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param instance_ The BspInstance for the new schedule.
      * @param schedule The other schedule to copy from.
      */
-    template<typename Graph_t_other>
+    template <typename Graph_t_other>
     BspSchedule(const BspInstance<Graph_t> &instance_, const BspSchedule<Graph_t_other> &schedule)
-        : instance(&instance_), number_of_supersteps(schedule.numberOfSupersteps()),
+        : instance(&instance_),
+          number_of_supersteps(schedule.numberOfSupersteps()),
           node_to_processor_assignment(schedule.assignedProcessors()),
           node_to_superstep_assignment(schedule.assignedSupersteps()) {}
 
@@ -229,6 +237,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @return The superstep assignment for the schedule.
      */
     [[nodiscard]] const std::vector<unsigned> &assignedSupersteps() const { return node_to_superstep_assignment; }
+
     [[nodiscard]] std::vector<unsigned> &assignedSupersteps() { return node_to_superstep_assignment; }
 
     /**
@@ -237,12 +246,13 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @return The processor assignment for the schedule.
      */
     [[nodiscard]] const std::vector<unsigned> &assignedProcessors() const { return node_to_processor_assignment; }
+
     [[nodiscard]] std::vector<unsigned> &assignedProcessors() { return node_to_processor_assignment; }
 
     /**
      * @brief Returns the staleness of the schedule.
-     * The staleness determines the minimum number of supersteps that must elapse between the assignment of a node to a processor and the assignment of one of its neighbors to a different processor.
-     * The staleness for the BspSchedule is always 1.
+     * The staleness determines the minimum number of supersteps that must elapse between the assignment of a node to a processor
+     * and the assignment of one of its neighbors to a different processor. The staleness for the BspSchedule is always 1.
      *
      * @return The staleness of the schedule.
      */
@@ -304,8 +314,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
                 node_to_superstep_assignment[i] = vec[i];
             }
         } else {
-            throw std::invalid_argument(
-                "Invalid Argument while assigning supersteps: size does not match number of nodes.");
+            throw std::invalid_argument("Invalid Argument while assigning supersteps: size does not match number of nodes.");
         }
     }
 
@@ -318,8 +327,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         if (vec.size() == static_cast<std::size_t>(instance->numberOfVertices())) {
             node_to_superstep_assignment = std::move(vec);
         } else {
-            throw std::invalid_argument(
-                "Invalid Argument while assigning supersteps: size does not match number of nodes.");
+            throw std::invalid_argument("Invalid Argument while assigning supersteps: size does not match number of nodes.");
         }
 
         updateNumberOfSupersteps();
@@ -334,8 +342,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         if (vec.size() == static_cast<std::size_t>(instance->numberOfVertices())) {
             node_to_processor_assignment = vec;
         } else {
-            throw std::invalid_argument(
-                "Invalid Argument while assigning processors: size does not match number of nodes.");
+            throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes.");
         }
     }
 
@@ -348,8 +355,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         if (vec.size() == static_cast<std::size_t>(instance->numberOfVertices())) {
             node_to_processor_assignment = std::move(vec);
         } else {
-            throw std::invalid_argument(
-                "Invalid Argument while assigning processors: size does not match number of nodes.");
+            throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes.");
         }
     }
 
@@ -377,7 +383,9 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      *
      * @return True if the schedule is valid, false otherwise.
      */
-    [[nodiscard]] bool isValid() const { return satisfiesPrecedenceConstraints() && satisfiesMemoryConstraints() && satisfiesNodeTypeConstraints(); }
+    [[nodiscard]] bool isValid() const {
+        return satisfiesPrecedenceConstraints() && satisfiesMemoryConstraints() && satisfiesNodeTypeConstraints();
+    }
 
     /**
      * @brief Returns true if the schedule satisfies the precedence constraints of the computational DAG.
@@ -389,8 +397,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @return True if the schedule satisfies the precedence constraints of the computational DAG, false otherwise.
      */
     [[nodiscard]] bool satisfiesPrecedenceConstraints() const {
-        if (static_cast<vertex_idx_t<Graph_t>>(node_to_processor_assignment.size()) != instance->numberOfVertices() ||
-            static_cast<vertex_idx_t<Graph_t>>(node_to_superstep_assignment.size()) != instance->numberOfVertices()) {
+        if (static_cast<vertex_idx_t<Graph_t>>(node_to_processor_assignment.size()) != instance->numberOfVertices()
+            || static_cast<vertex_idx_t<Graph_t>>(node_to_superstep_assignment.size()) != instance->numberOfVertices()) {
             return false;
         }
 
@@ -403,7 +411,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
             }
 
             for (const auto &target : instance->getComputationalDag().children(v)) {
-                const unsigned different_processors = (node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : getStaleness();
+                const unsigned different_processors
+                    = (node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : getStaleness();
                 if (node_to_superstep_assignment[v] + different_processors > node_to_superstep_assignment[target]) {
                     return false;
                 }
@@ -442,32 +451,30 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @return True if memory constraints are satisfied, false otherwise.
      */
     [[nodiscard]] bool satisfiesMemoryConstraints() const {
-
         switch (instance->getArchitecture().getMemoryConstraintType()) {
+            case MEMORY_CONSTRAINT_TYPE::LOCAL:
+                return satisfiesLocalMemoryConstraints();
 
-        case MEMORY_CONSTRAINT_TYPE::LOCAL:
-            return satisfiesLocalMemoryConstraints();
-
-        case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT:
-            return satisfiesPersistentAndTransientMemoryConstraints();
+            case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT:
+                return satisfiesPersistentAndTransientMemoryConstraints();
 
-        case MEMORY_CONSTRAINT_TYPE::GLOBAL:
-            return satisfiesGlobalMemoryConstraints();
+            case MEMORY_CONSTRAINT_TYPE::GLOBAL:
+                return satisfiesGlobalMemoryConstraints();
 
-        case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT:
-            return satisfiesLocalInOutMemoryConstraints();
+            case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT:
+                return satisfiesLocalInOutMemoryConstraints();
 
-        case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES:
-            return satisfiesLocalIncEdgesMemoryConstraints();
+            case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES:
+                return satisfiesLocalIncEdgesMemoryConstraints();
 
-        case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES:
-            return satisfiesLocalSourcesIncEdgesMemoryConstraints();
+            case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES:
+                return satisfiesLocalSourcesIncEdgesMemoryConstraints();
 
-        case MEMORY_CONSTRAINT_TYPE::NONE:
-            return true;
+            case MEMORY_CONSTRAINT_TYPE::NONE:
+                return true;
 
-        default:
-            throw std::invalid_argument("Unknown memory constraint type.");
+            default:
+                throw std::invalid_argument("Unknown memory constraint type.");
         }
     }
 
@@ -496,7 +503,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param superstep The superstep index.
      * @return A vector of nodes assigned to the specified processor and superstep.
      */
-    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(const unsigned processor, const unsigned superstep) const {
+    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(const unsigned processor,
+                                                                           const unsigned superstep) const {
         std::vector<vertex_idx_t<Graph_t>> vec;
 
         for (const auto &node : instance->vertices()) {
@@ -513,9 +521,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      *
      * @param number_of_supersteps_ The number of supersteps.
      */
-    void setNumberOfSupersteps(const unsigned number_of_supersteps_) {
-        number_of_supersteps = number_of_supersteps_;
-    }
+    void setNumberOfSupersteps(const unsigned number_of_supersteps_) { number_of_supersteps = number_of_supersteps_; }
 
     /**
      * @brief Returns the number of nodes assigned to the specified processor.
@@ -573,8 +579,9 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         for (const auto &node : instance->vertices()) {
             for (const auto &child : instance->getComputationalDag().children(node)) {
                 if (node_to_processor_assignment[node] != node_to_processor_assignment[child]) {
-                    for (unsigned offset = 1; offset <= getStaleness(); ++offset)
+                    for (unsigned offset = 1; offset <= getStaleness(); ++offset) {
                         comm_phase_empty[node_to_superstep_assignment[child] - offset] = false;
+                    }
                 }
             }
         }
@@ -583,8 +590,9 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         unsigned current_index = 0;
         for (unsigned step = 0; step < number_of_supersteps; ++step) {
             new_step_index[step] = current_index;
-            if (!comm_phase_empty[step])
+            if (!comm_phase_empty[step]) {
                 current_index++;
+            }
         }
         for (const auto &node : instance->vertices()) {
             node_to_superstep_assignment[node] = new_step_index[node_to_superstep_assignment[node]];
@@ -606,7 +614,6 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
 
         for (unsigned step = 0; step < number_of_supersteps; step++) {
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 v_memw_t<Graph_t> memory = 0;
                 for (const auto &node : set_schedule.step_processor_vertices[step][proc]) {
                     memory += instance->getComputationalDag().vertex_mem_weight(node);
@@ -636,11 +643,11 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         for (const auto &node : instance->vertices()) {
             const unsigned proc = node_to_processor_assignment[node];
             current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node);
-            current_proc_transient_memory[proc] = std::max(
-                current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(node));
+            current_proc_transient_memory[proc]
+                = std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(node));
 
-            if (current_proc_persistent_memory[proc] + current_proc_transient_memory[proc] >
-                instance->getArchitecture().memoryBound(proc)) {
+            if (current_proc_persistent_memory[proc] + current_proc_transient_memory[proc]
+                > instance->getArchitecture().memoryBound(proc)) {
                 return false;
             }
         }
@@ -676,13 +683,11 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
                 v_memw_t<Graph_t> memory = 0;
                 for (const auto &node : set_schedule.step_processor_vertices[step][proc]) {
-                    memory += instance->getComputationalDag().vertex_mem_weight(node) +
-                              instance->getComputationalDag().vertex_comm_weight(node);
+                    memory += instance->getComputationalDag().vertex_mem_weight(node)
+                              + instance->getComputationalDag().vertex_comm_weight(node);
 
                     for (const auto &parent : instance->getComputationalDag().parents(node)) {
-
-                        if (node_to_processor_assignment[parent] == proc &&
-                            node_to_superstep_assignment[parent] == step) {
+                        if (node_to_processor_assignment[parent] == proc && node_to_superstep_assignment[parent] == step) {
                             memory -= instance->getComputationalDag().vertex_comm_weight(parent);
                         }
                     }
@@ -760,4 +765,4 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/model/BspScheduleCS.hpp b/include/osp/bsp/model/BspScheduleCS.hpp
index 63d94798..ac906e39 100644
--- a/include/osp/bsp/model/BspScheduleCS.hpp
+++ b/include/osp/bsp/model/BspScheduleCS.hpp
@@ -48,9 +48,8 @@ namespace osp {
  *
  * @see BspInstance
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class BspScheduleCS : public BspSchedule<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "BspScheduleCS can only be used with computational DAGs.");
 
   public:
@@ -63,14 +62,15 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
     std::map<KeyTriple, unsigned> commSchedule;
 
   protected:
-    void compute_cs_communication_costs_helper(std::vector<std::vector<v_commw_t<Graph_t>>> &rec, std::vector<std::vector<v_commw_t<Graph_t>>> &send) const {
+    void compute_cs_communication_costs_helper(std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
+                                               std::vector<std::vector<v_commw_t<Graph_t>>> &send) const {
         for (auto const &[key, val] : commSchedule) {
-            send[std::get<1>(key)][val] +=
-                BspSchedule<Graph_t>::instance->sendCosts(std::get<1>(key), std::get<2>(key)) *
-                BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
-            rec[std::get<2>(key)][val] +=
-                BspSchedule<Graph_t>::instance->sendCosts(std::get<1>(key), std::get<2>(key)) *
-                BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
+            send[std::get<1>(key)][val]
+                += BspSchedule<Graph_t>::instance->sendCosts(std::get<1>(key), std::get<2>(key))
+                   * BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
+            rec[std::get<2>(key)][val]
+                += BspSchedule<Graph_t>::instance->sendCosts(std::get<1>(key), std::get<2>(key))
+                   * BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
         }
     }
 
@@ -92,7 +92,8 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
      * @param processor_assignment_ The processor assignment for the nodes.
      * @param superstep_assignment_ The superstep assignment for the nodes.
      */
-    BspScheduleCS(const BspInstance<Graph_t> &inst, const std::vector<unsigned> &processor_assignment_,
+    BspScheduleCS(const BspInstance<Graph_t> &inst,
+                  const std::vector<unsigned> &processor_assignment_,
                   const std::vector<unsigned> &superstep_assignment_)
         : BspSchedule<Graph_t>(inst, processor_assignment_, superstep_assignment_) {}
 
@@ -105,7 +106,8 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
      * @param superstep_assignment_ The superstep assignment for the nodes.
      * @param comm_ The communication schedule for the nodes.
      */
-    BspScheduleCS(const BspInstance<Graph_t> &inst, const std::vector<unsigned int> &processor_assignment_,
+    BspScheduleCS(const BspInstance<Graph_t> &inst,
+                  const std::vector<unsigned int> &processor_assignment_,
                   const std::vector<unsigned int> &superstep_assignment_,
                   const std::map<KeyTriple, unsigned int> &comm_)
         : BspSchedule<Graph_t>(inst, processor_assignment_, superstep_assignment_), commSchedule(comm_) {}
@@ -136,27 +138,28 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
     virtual ~BspScheduleCS() = default;
 
     inline const std::map<KeyTriple, unsigned> &getCommunicationSchedule() const { return commSchedule; }
+
     inline std::map<KeyTriple, unsigned> &getCommunicationSchedule() { return commSchedule; }
 
     inline bool hasValidCommSchedule() const { return checkCommScheduleValidity(commSchedule); }
 
     void addCommunicationScheduleEntry(KeyTriple key, unsigned step) {
+        if (step >= BspSchedule<Graph_t>::number_of_supersteps) {
+            throw std::invalid_argument("Invalid Argument while adding communication schedule entry: step out of range.");
+        }
 
-        if (step >= BspSchedule<Graph_t>::number_of_supersteps)
-            throw std::invalid_argument(
-                "Invalid Argument while adding communication schedule entry: step out of range.");
-
-        if (std::get<0>(key) >= BspSchedule<Graph_t>::instance->numberOfVertices())
-            throw std::invalid_argument(
-                "Invalid Argument while adding communication schedule entry: node out of range.");
+        if (std::get<0>(key) >= BspSchedule<Graph_t>::instance->numberOfVertices()) {
+            throw std::invalid_argument("Invalid Argument while adding communication schedule entry: node out of range.");
+        }
 
-        if (std::get<1>(key) >= BspSchedule<Graph_t>::instance->numberOfProcessors())
+        if (std::get<1>(key) >= BspSchedule<Graph_t>::instance->numberOfProcessors()) {
             throw std::invalid_argument(
                 "Invalid Argument while adding communication schedule entry: from processor out of range.");
+        }
 
-        if (std::get<2>(key) >= BspSchedule<Graph_t>::instance->numberOfProcessors())
-            throw std::invalid_argument(
-                "Invalid Argument while adding communication schedule entry: to processor out of range.");
+        if (std::get<2>(key) >= BspSchedule<Graph_t>::instance->numberOfProcessors()) {
+            throw std::invalid_argument("Invalid Argument while adding communication schedule entry: to processor out of range.");
+        }
 
         commSchedule[key] = step;
     }
@@ -179,7 +182,6 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
      * @param cs The communication schedule to set.
      */
     void setCommunicationSchedule(const std::map<KeyTriple, unsigned int> &cs) {
-
         if (checkCommScheduleValidity(cs)) {
             commSchedule = cs;
         } else {
@@ -188,37 +190,38 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
     }
 
     bool checkCommScheduleValidity(const std::map<KeyTriple, unsigned int> &cs) const {
-
-        std::vector<std::vector<unsigned>> first_at = std::vector<std::vector<unsigned>>(
-            BspSchedule<Graph_t>::instance->numberOfVertices(),
-            std::vector<unsigned>(BspSchedule<Graph_t>::instance->numberOfProcessors(),
-                                  BspSchedule<Graph_t>::number_of_supersteps));
+        std::vector<std::vector<unsigned>> first_at
+            = std::vector<std::vector<unsigned>>(BspSchedule<Graph_t>::instance->numberOfVertices(),
+                                                 std::vector<unsigned>(BspSchedule<Graph_t>::instance->numberOfProcessors(),
+                                                                       BspSchedule<Graph_t>::number_of_supersteps));
 
         for (const auto &node : BspSchedule<Graph_t>::instance->vertices()) {
-            first_at[node][BspSchedule<Graph_t>::node_to_processor_assignment[node]] =
-                BspSchedule<Graph_t>::node_to_superstep_assignment[node];
+            first_at[node][BspSchedule<Graph_t>::node_to_processor_assignment[node]]
+                = BspSchedule<Graph_t>::node_to_superstep_assignment[node];
         }
 
         for (auto const &[key, val] : cs) {
-
-            if (val >= BspSchedule<Graph_t>::number_of_supersteps)
+            if (val >= BspSchedule<Graph_t>::number_of_supersteps) {
                 return false;
+            }
 
-            if (std::get<0>(key) >= BspSchedule<Graph_t>::instance->numberOfVertices())
+            if (std::get<0>(key) >= BspSchedule<Graph_t>::instance->numberOfVertices()) {
                 return false;
+            }
 
-            if (std::get<1>(key) >= BspSchedule<Graph_t>::instance->numberOfProcessors())
+            if (std::get<1>(key) >= BspSchedule<Graph_t>::instance->numberOfProcessors()) {
                 return false;
+            }
 
-            if (std::get<2>(key) >= BspSchedule<Graph_t>::instance->numberOfProcessors())
+            if (std::get<2>(key) >= BspSchedule<Graph_t>::instance->numberOfProcessors()) {
                 return false;
+            }
 
-            first_at[std::get<0>(key)][std::get<2>(key)] =
-                std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + this->getStaleness());
+            first_at[std::get<0>(key)][std::get<2>(key)]
+                = std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + this->getStaleness());
         }
 
         for (auto const &[key, val] : cs) {
-
             if (val < first_at[std::get<0>(key)][std::get<1>(key)]) {
                 return false;
             }
@@ -226,11 +229,10 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
 
         for (const auto &v : BspSchedule<Graph_t>::instance->getComputationalDag().vertices()) {
             for (const auto &target : BspSchedule<Graph_t>::instance->getComputationalDag().children(v)) {
-
-                if (BspSchedule<Graph_t>::node_to_processor_assignment[v] !=
-                    BspSchedule<Graph_t>::node_to_processor_assignment[target]) {
-                    if (first_at[v][BspSchedule<Graph_t>::node_to_processor_assignment[target]] >
-                        BspSchedule<Graph_t>::node_to_superstep_assignment[target]) {
+                if (BspSchedule<Graph_t>::node_to_processor_assignment[v]
+                    != BspSchedule<Graph_t>::node_to_processor_assignment[target]) {
+                    if (first_at[v][BspSchedule<Graph_t>::node_to_processor_assignment[target]]
+                        > BspSchedule<Graph_t>::node_to_superstep_assignment[target]) {
                         return false;
                     }
                 }
@@ -241,9 +243,10 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
     }
 
     v_commw_t<Graph_t> compute_cs_communication_costs() const {
-
-        std::vector<std::vector<v_commw_t<Graph_t>>> rec(this->instance->numberOfProcessors(), std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
-        std::vector<std::vector<v_commw_t<Graph_t>>> send(this->instance->numberOfProcessors(), std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> rec(this->instance->numberOfProcessors(),
+                                                         std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> send(this->instance->numberOfProcessors(),
+                                                          std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
 
         compute_cs_communication_costs_helper(rec, send);
         const std::vector<v_commw_t<Graph_t>> max_comm_per_step = cost_helpers::compute_max_comm_per_step(*this, rec, send);
@@ -266,8 +269,8 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
 
     void setAutoCommunicationSchedule() {
         std::map<KeyTriple, unsigned> best_comm_schedule;
-        v_workw_t<Graph_t> best_comm_cost =
-            std::numeric_limits<v_workw_t<Graph_t>>::max(); // computeCosts retunrs v_workw_t<Graph_t>
+        v_workw_t<Graph_t> best_comm_cost
+            = std::numeric_limits<v_workw_t<Graph_t>>::max();    // computeCosts retunrs v_workw_t<Graph_t>
 
         if (hasValidCommSchedule()) {
             v_workw_t<Graph_t> costs_com = BspSchedule<Graph_t>::computeCosts();
@@ -306,9 +309,10 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
 
     void setImprovedLazyCommunicationSchedule() {
         commSchedule.clear();
-        if (BspSchedule<Graph_t>::instance->getComputationalDag().num_vertices() <= 1 ||
-            BspSchedule<Graph_t>::number_of_supersteps <= 1)
+        if (BspSchedule<Graph_t>::instance->getComputationalDag().num_vertices() <= 1
+            || BspSchedule<Graph_t>::number_of_supersteps <= 1) {
             return;
+        }
 
         std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> step_proc_node_list(
             BspSchedule<Graph_t>::number_of_supersteps,
@@ -327,17 +331,18 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
 
         // The data structure stores for each processor a set of tuples representing required sends.
         // Each tuple is (communication_cost, source_node, destination_processor).
-        std::vector<std::set<std::tuple<v_commw_t<Graph_t>, vertex_idx_t<Graph_t>, unsigned>, std::greater<>>> require_sending(BspSchedule<Graph_t>::instance->numberOfProcessors());
+        std::vector<std::set<std::tuple<v_commw_t<Graph_t>, vertex_idx_t<Graph_t>, unsigned>, std::greater<>>> require_sending(
+            BspSchedule<Graph_t>::instance->numberOfProcessors());
 
         for (unsigned proc = 0; proc < BspSchedule<Graph_t>::instance->numberOfProcessors(); proc++) {
             for (const auto &node : step_proc_node_list[0][proc]) {
-
                 for (const auto &target : BspSchedule<Graph_t>::instance->getComputationalDag().children(node)) {
                     if (proc != BspSchedule<Graph_t>::assignedProcessor(target)) {
-                        require_sending[proc].insert(
-                            {BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(node) * BspSchedule<Graph_t>::instance->getArchitecture().sendCosts(proc, BspSchedule<Graph_t>::node_to_processor_assignment[target]),
-                             node,
-                             BspSchedule<Graph_t>::node_to_processor_assignment[target]});
+                        require_sending[proc].insert({BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(node)
+                                                          * BspSchedule<Graph_t>::instance->getArchitecture().sendCosts(
+                                                              proc, BspSchedule<Graph_t>::node_to_processor_assignment[target]),
+                                                      node,
+                                                      BspSchedule<Graph_t>::node_to_processor_assignment[target]});
                     }
                 }
             }
@@ -351,18 +356,16 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
             for (unsigned proc = 0; proc < BspSchedule<Graph_t>::instance->numberOfProcessors(); proc++) {
                 for (const auto &node : step_proc_node_list[step][proc]) {
                     for (const auto &source : BspSchedule<Graph_t>::instance->getComputationalDag().parents(node)) {
-
                         if (!node_to_proc_been_sent[source][proc]) {
                             assert(BspSchedule<Graph_t>::node_to_superstep_assignment[source] < step + 1 - this->getStaleness());
                             commSchedule.emplace(
-                                std::make_tuple(source, BspSchedule<Graph_t>::node_to_processor_assignment[source],
-                                                proc),
+                                std::make_tuple(source, BspSchedule<Graph_t>::node_to_processor_assignment[source], proc),
                                 step - this->getStaleness());
                             node_to_proc_been_sent[source][proc] = true;
-                            v_commw_t<Graph_t> comm_cost =
-                                BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(source) *
-                                BspSchedule<Graph_t>::instance->getArchitecture().sendCosts(
-                                    BspSchedule<Graph_t>::node_to_processor_assignment[source], proc);
+                            v_commw_t<Graph_t> comm_cost
+                                = BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(source)
+                                  * BspSchedule<Graph_t>::instance->getArchitecture().sendCosts(
+                                      BspSchedule<Graph_t>::node_to_processor_assignment[source], proc);
                             require_sending[BspSchedule<Graph_t>::node_to_processor_assignment[source]].erase(
                                 {comm_cost, source, proc});
                             send_cost[BspSchedule<Graph_t>::node_to_processor_assignment[source]] += comm_cost;
@@ -382,15 +385,14 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
             // extra sends
             // TODO: permute the order of processors
             for (size_t proc = 0; proc < BspSchedule<Graph_t>::instance->numberOfProcessors(); proc++) {
-                if (require_sending[proc].empty() ||
-                    std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] >
-                        max_comm_cost)
+                if (require_sending[proc].empty()
+                    || std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] > max_comm_cost) {
                     continue;
+                }
                 auto iter = require_sending[proc].begin();
                 while (iter != require_sending[proc].end()) {
                     const auto &[comm_cost, node_to_send, dest_proc] = *iter;
-                    if (comm_cost + send_cost[proc] > max_comm_cost ||
-                        comm_cost + receive_cost[dest_proc] > max_comm_cost) {
+                    if (comm_cost + send_cost[proc] > max_comm_cost || comm_cost + receive_cost[dest_proc] > max_comm_cost) {
                         iter++;
                     } else {
                         commSchedule.emplace(std::make_tuple(node_to_send, proc, dest_proc), step - this->getStaleness());
@@ -398,10 +400,10 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
                         send_cost[proc] += comm_cost;
                         receive_cost[dest_proc] += comm_cost;
                         iter = require_sending[proc].erase(iter);
-                        if (require_sending[proc].empty() ||
-                            std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] >
-                                max_comm_cost)
-                            break; // Exit if no more sends can possibly fit.
+                        if (require_sending[proc].empty()
+                            || std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] > max_comm_cost) {
+                            break;    // Exit if no more sends can possibly fit.
+                        }
                     }
                 }
             }
@@ -409,15 +411,16 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
             // updating require_sending
             for (unsigned proc = 0; proc < BspSchedule<Graph_t>::instance->numberOfProcessors(); proc++) {
                 for (const auto &node : step_proc_node_list[step][proc]) {
-
-                    for (const auto &target : BspSchedule<Graph_t>::instance->getComputationalDag().children(node))
+                    for (const auto &target : BspSchedule<Graph_t>::instance->getComputationalDag().children(node)) {
                         if (proc != BspSchedule<Graph_t>::assignedProcessor(target)) {
                             require_sending[proc].insert(
-                                {BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(node) *
-                                     BspSchedule<Graph_t>::instance->getArchitecture().sendCosts(
+                                {BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(node)
+                                     * BspSchedule<Graph_t>::instance->getArchitecture().sendCosts(
                                          proc, BspSchedule<Graph_t>::node_to_processor_assignment[target]),
-                                 node, BspSchedule<Graph_t>::node_to_processor_assignment[target]});
+                                 node,
+                                 BspSchedule<Graph_t>::node_to_processor_assignment[target]});
                         }
+                    }
                 }
             }
         }
@@ -428,53 +431,59 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
 
         for (const auto &source : BspSchedule<Graph_t>::instance->getComputationalDag().vertices()) {
             for (const auto &target : BspSchedule<Graph_t>::instance->getComputationalDag().children(source)) {
-
-                if (BspSchedule<Graph_t>::node_to_processor_assignment[source] !=
-                    BspSchedule<Graph_t>::node_to_processor_assignment[target]) {
-
-                    const auto tmp = std::make_tuple(source, BspSchedule<Graph_t>::node_to_processor_assignment[source],
+                if (BspSchedule<Graph_t>::node_to_processor_assignment[source]
+                    != BspSchedule<Graph_t>::node_to_processor_assignment[target]) {
+                    const auto tmp = std::make_tuple(source,
+                                                     BspSchedule<Graph_t>::node_to_processor_assignment[source],
                                                      BspSchedule<Graph_t>::node_to_processor_assignment[target]);
                     if (commSchedule.find(tmp) == commSchedule.end()) {
                         commSchedule[tmp] = BspSchedule<Graph_t>::node_to_superstep_assignment[target] - this->getStaleness();
 
                     } else {
-                        commSchedule[tmp] =
-                            std::min(BspSchedule<Graph_t>::node_to_superstep_assignment[target] - this->getStaleness(), commSchedule[tmp]);
+                        commSchedule[tmp] = std::min(
+                            BspSchedule<Graph_t>::node_to_superstep_assignment[target] - this->getStaleness(), commSchedule[tmp]);
                     }
                 }
             }
         }
     }
+
     void setEagerCommunicationSchedule() {
         commSchedule.clear();
 
         for (const auto &source : BspSchedule<Graph_t>::instance->getComputationalDag().vertices()) {
             for (const auto &target : BspSchedule<Graph_t>::instance->getComputationalDag().children(source)) {
-
-                if (BspSchedule<Graph_t>::node_to_processor_assignment[source] !=
-                    BspSchedule<Graph_t>::node_to_processor_assignment[target]) {
-
-                    commSchedule[std::make_tuple(source, BspSchedule<Graph_t>::node_to_processor_assignment[source],
-                                                 BspSchedule<Graph_t>::node_to_processor_assignment[target])] =
-                        BspSchedule<Graph_t>::node_to_superstep_assignment[source];
+                if (BspSchedule<Graph_t>::node_to_processor_assignment[source]
+                    != BspSchedule<Graph_t>::node_to_processor_assignment[target]) {
+                    commSchedule[std::make_tuple(source,
+                                                 BspSchedule<Graph_t>::node_to_processor_assignment[source],
+                                                 BspSchedule<Graph_t>::node_to_processor_assignment[target])]
+                        = BspSchedule<Graph_t>::node_to_superstep_assignment[source];
                 }
             }
         }
     }
 
     virtual void shrinkByMergingSupersteps() override {
-
         std::vector<unsigned> superstep_latest_dependency(this->number_of_supersteps, 0);
         std::vector<std::vector<unsigned>> first_at = getFirstPresence();
 
-        for (auto const &[key, val] : commSchedule)
-            if (this->assignedProcessor(std::get<0>(key)) != std::get<1>(key))
-                superstep_latest_dependency[val] = std::max(superstep_latest_dependency[val], first_at[std::get<0>(key)][std::get<1>(key)]);
+        for (auto const &[key, val] : commSchedule) {
+            if (this->assignedProcessor(std::get<0>(key)) != std::get<1>(key)) {
+                superstep_latest_dependency[val]
+                    = std::max(superstep_latest_dependency[val], first_at[std::get<0>(key)][std::get<1>(key)]);
+            }
+        }
 
-        for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag().vertices())
-            for (const auto &child : BspSchedule<Graph_t>::instance->getComputationalDag().children(node))
-                if (this->assignedProcessor(node) != this->assignedProcessor(child))
-                    superstep_latest_dependency[this->assignedSuperstep(child)] = std::max(superstep_latest_dependency[this->assignedSuperstep(child)], first_at[node][this->assignedProcessor(child)]);
+        for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag().vertices()) {
+            for (const auto &child : BspSchedule<Graph_t>::instance->getComputationalDag().children(node)) {
+                if (this->assignedProcessor(node) != this->assignedProcessor(child)) {
+                    superstep_latest_dependency[this->assignedSuperstep(child)]
+                        = std::max(superstep_latest_dependency[this->assignedSuperstep(child)],
+                                   first_at[node][this->assignedProcessor(child)]);
+                }
+            }
+        }
 
         std::vector<bool> merge_with_previous(this->number_of_supersteps, false);
         for (unsigned step = this->number_of_supersteps - 1; step < this->number_of_supersteps; --step) {
@@ -491,89 +500,105 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
         std::vector<unsigned> new_step_index(this->number_of_supersteps);
         unsigned current_index = std::numeric_limits<unsigned>::max();
         for (unsigned step = 0; step < this->number_of_supersteps; ++step) {
-            if (!merge_with_previous[step])
+            if (!merge_with_previous[step]) {
                 current_index++;
+            }
 
             new_step_index[step] = current_index;
         }
-        for (const auto &node : this->instance->vertices())
+        for (const auto &node : this->instance->vertices()) {
             this->node_to_superstep_assignment[node] = new_step_index[this->node_to_superstep_assignment[node]];
-        for (auto &[key, val] : commSchedule)
+        }
+        for (auto &[key, val] : commSchedule) {
             val = new_step_index[val];
+        }
 
         this->setNumberOfSupersteps(current_index + 1);
     }
 
     // for each vertex v and processor p, find the first superstep where v is present on p by the end of the compute phase
     std::vector<std::vector<unsigned>> getFirstPresence() const {
+        std::vector<std::vector<unsigned>> first_at(
+            BspSchedule<Graph_t>::instance->numberOfVertices(),
+            std::vector<unsigned>(BspSchedule<Graph_t>::instance->numberOfProcessors(), std::numeric_limits<unsigned>::max()));
 
-        std::vector<std::vector<unsigned>> first_at(BspSchedule<Graph_t>::instance->numberOfVertices(),
-                                                    std::vector<unsigned>(BspSchedule<Graph_t>::instance->numberOfProcessors(), std::numeric_limits<unsigned>::max()));
-
-        for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag().vertices())
+        for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag().vertices()) {
             first_at[node][this->assignedProcessor(node)] = this->assignedSuperstep(node);
+        }
 
-        for (auto const &[key, val] : commSchedule)
-            first_at[std::get<0>(key)][std::get<2>(key)] =
-                std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + 1); // TODO: replace by staleness after merge
+        for (auto const &[key, val] : commSchedule) {
+            first_at[std::get<0>(key)][std::get<2>(key)]
+                = std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + 1);    // TODO: replace by staleness after merge
+        }
 
         return first_at;
     }
 
     // remove unneeded comm. schedule entries - these can happen in ILPs, partial ILPs, etc.
     void cleanCommSchedule() {
-
         // data that is already present before it arrives
-        std::vector<std::vector<std::multiset<unsigned>>> arrives_at(BspSchedule<Graph_t>::instance->numberOfVertices(),
-                                                                     std::vector<std::multiset<unsigned>>(BspSchedule<Graph_t>::instance->numberOfProcessors()));
-        for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag().vertices())
+        std::vector<std::vector<std::multiset<unsigned>>> arrives_at(
+            BspSchedule<Graph_t>::instance->numberOfVertices(),
+            std::vector<std::multiset<unsigned>>(BspSchedule<Graph_t>::instance->numberOfProcessors()));
+        for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag().vertices()) {
             arrives_at[node][this->assignedProcessor(node)].insert(this->assignedSuperstep(node));
+        }
 
-        for (auto const &[key, val] : commSchedule)
+        for (auto const &[key, val] : commSchedule) {
             arrives_at[std::get<0>(key)][std::get<2>(key)].insert(val);
+        }
 
         std::vector<KeyTriple> toErase;
         for (auto const &[key, val] : commSchedule) {
             auto itr = arrives_at[std::get<0>(key)][std::get<2>(key)].begin();
-            if (*itr < val)
+            if (*itr < val) {
                 toErase.push_back(key);
-            else if (*itr == val && ++itr != arrives_at[std::get<0>(key)][std::get<2>(key)].end() && *itr == val) {
+            } else if (*itr == val && ++itr != arrives_at[std::get<0>(key)][std::get<2>(key)].end() && *itr == val) {
                 toErase.push_back(key);
                 arrives_at[std::get<0>(key)][std::get<2>(key)].erase(itr);
             }
         }
 
-        for (const KeyTriple &key : toErase)
+        for (const KeyTriple &key : toErase) {
             commSchedule.erase(key);
+        }
 
         // data that is not used after being sent
-        std::vector<std::vector<std::multiset<unsigned>>> used_at(BspSchedule<Graph_t>::instance->numberOfVertices(),
-                                                                  std::vector<std::multiset<unsigned>>(BspSchedule<Graph_t>::instance->numberOfProcessors()));
-        for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag().vertices())
-            for (const auto &child : BspSchedule<Graph_t>::instance->getComputationalDag().children(node))
+        std::vector<std::vector<std::multiset<unsigned>>> used_at(
+            BspSchedule<Graph_t>::instance->numberOfVertices(),
+            std::vector<std::multiset<unsigned>>(BspSchedule<Graph_t>::instance->numberOfProcessors()));
+        for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag().vertices()) {
+            for (const auto &child : BspSchedule<Graph_t>::instance->getComputationalDag().children(node)) {
                 used_at[node][this->assignedProcessor(child)].insert(this->assignedSuperstep(child));
+            }
+        }
 
-        for (auto const &[key, val] : commSchedule)
+        for (auto const &[key, val] : commSchedule) {
             used_at[std::get<0>(key)][std::get<1>(key)].insert(val);
+        }
 
         // (need to visit cs entries in reverse superstep order here)
         std::vector<std::vector<KeyTriple>> entries(this->number_of_supersteps);
-        for (auto const &[key, val] : commSchedule)
+        for (auto const &[key, val] : commSchedule) {
             entries[val].push_back(key);
+        }
 
         toErase.clear();
-        for (unsigned step = this->number_of_supersteps - 1; step < this->number_of_supersteps; --step)
-            for (const KeyTriple &key : entries[step])
-                if (used_at[std::get<0>(key)][std::get<2>(key)].empty() ||
-                    *used_at[std::get<0>(key)][std::get<2>(key)].rbegin() <= step) {
+        for (unsigned step = this->number_of_supersteps - 1; step < this->number_of_supersteps; --step) {
+            for (const KeyTriple &key : entries[step]) {
+                if (used_at[std::get<0>(key)][std::get<2>(key)].empty()
+                    || *used_at[std::get<0>(key)][std::get<2>(key)].rbegin() <= step) {
                     toErase.push_back(key);
                     auto itr = used_at[std::get<0>(key)][std::get<1>(key)].find(step);
                     used_at[std::get<0>(key)][std::get<1>(key)].erase(itr);
                 }
+            }
+        }
 
-        for (const KeyTriple &key : toErase)
+        for (const KeyTriple &key : toErase) {
             commSchedule.erase(key);
+        }
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/model/BspScheduleRecomp.hpp b/include/osp/bsp/model/BspScheduleRecomp.hpp
index 8e8a9cc2..7f3f233c 100644
--- a/include/osp/bsp/model/BspScheduleRecomp.hpp
+++ b/include/osp/bsp/model/BspScheduleRecomp.hpp
@@ -18,16 +18,14 @@ limitations under the License.
 
 #pragma once
 
-
 #include "IBspScheduleEval.hpp"
 #include "osp/bsp/model/BspScheduleCS.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class BspScheduleRecomp : public IBspScheduleEval<Graph_t> {
-
   public:
     using vertex_idx = vertex_idx_t<Graph_t>;
     using cost_type = v_workw_t<Graph_t>;
@@ -35,10 +33,10 @@ class BspScheduleRecomp : public IBspScheduleEval<Graph_t> {
     using KeyTriple = std::tuple<vertex_idx_t<Graph_t>, unsigned int, unsigned int>;
 
     static_assert(is_computational_dag_v<Graph_t>, "BspScheduleRecomp can only be used with computational DAGs.");
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t> >, "BspScheduleRecomp requires work and comm. weights to have the same type."); 
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "BspScheduleRecomp requires work and comm. weights to have the same type.");
 
   private:
-
     const BspInstance<Graph_t> *instance;
 
     unsigned int number_of_supersteps = 0;
@@ -48,13 +46,14 @@ class BspScheduleRecomp : public IBspScheduleEval<Graph_t> {
     std::map<KeyTriple, unsigned> commSchedule;
 
   public:
-
     BspScheduleRecomp() = default;
 
-    BspScheduleRecomp(const BspInstance<Graph_t> &inst) : instance(&inst)
-                            {node_to_processor_and_supertep_assignment.resize(inst.numberOfVertices());}
+    BspScheduleRecomp(const BspInstance<Graph_t> &inst) : instance(&inst) {
+        node_to_processor_and_supertep_assignment.resize(inst.numberOfVertices());
+    }
 
     BspScheduleRecomp(const BspScheduleCS<Graph_t> &schedule);
+
     BspScheduleRecomp(const BspSchedule<Graph_t> &schedule) : BspScheduleRecomp<Graph_t>(BspScheduleCS<Graph_t>(schedule)) {}
 
     virtual ~BspScheduleRecomp() = default;
@@ -67,17 +66,17 @@ class BspScheduleRecomp : public IBspScheduleEval<Graph_t> {
      * @return The number of supersteps in the schedule.
      */
     virtual unsigned numberOfSupersteps() const override { return number_of_supersteps; }
+
     void setNumberOfSupersteps(unsigned number_of_supersteps_) { number_of_supersteps = number_of_supersteps_; }
 
-    std::vector<std::pair<unsigned, unsigned>>& assignments(vertex_idx node) {
+    std::vector<std::pair<unsigned, unsigned>> &assignments(vertex_idx node) {
         return node_to_processor_and_supertep_assignment[node];
     }
 
-    const std::vector<std::pair<unsigned, unsigned>>& assignments(vertex_idx node) const {
+    const std::vector<std::pair<unsigned, unsigned>> &assignments(vertex_idx node) const {
         return node_to_processor_and_supertep_assignment[node];
     }
 
-
     /**
      * @brief Sets the communication schedule for the schedule.
      *
@@ -126,85 +125,87 @@ class BspScheduleRecomp : public IBspScheduleEval<Graph_t> {
 
     vertex_idx getTotalAssignments() const;
 
-    void mergeSupersteps();   
-
+    void mergeSupersteps();
 };
 
-template<typename Graph_t>
-BspScheduleRecomp<Graph_t>::BspScheduleRecomp(const BspScheduleCS<Graph_t> &schedule) : instance(&schedule.getInstance())
-{
+template <typename Graph_t>
+BspScheduleRecomp<Graph_t>::BspScheduleRecomp(const BspScheduleCS<Graph_t> &schedule) : instance(&schedule.getInstance()) {
     node_to_processor_and_supertep_assignment.clear();
     node_to_processor_and_supertep_assignment.resize(instance->numberOfVertices());
     number_of_supersteps = schedule.numberOfSupersteps();
 
-    for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-        node_to_processor_and_supertep_assignment[node].emplace_back(schedule.assignedProcessor(node), schedule.assignedSuperstep(node));
+    for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+        node_to_processor_and_supertep_assignment[node].emplace_back(schedule.assignedProcessor(node),
+                                                                     schedule.assignedSuperstep(node));
+    }
 
     commSchedule = schedule.getCommunicationSchedule();
 }
 
-template<typename Graph_t>
-void BspScheduleRecomp<Graph_t>::addCommunicationScheduleEntry(unsigned node, unsigned from_proc, unsigned to_proc,
-                                                      unsigned step) {
+template <typename Graph_t>
+void BspScheduleRecomp<Graph_t>::addCommunicationScheduleEntry(unsigned node, unsigned from_proc, unsigned to_proc, unsigned step) {
     addCommunicationScheduleEntry(std::make_tuple(node, from_proc, to_proc), step);
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void BspScheduleRecomp<Graph_t>::addCommunicationScheduleEntry(KeyTriple key, unsigned step) {
-
-    if (step >= number_of_supersteps)
+    if (step >= number_of_supersteps) {
         throw std::invalid_argument("Invalid Argument while adding communication schedule entry: step out of range.");
+    }
 
-    if (std::get<0>(key) >= instance->numberOfVertices())
+    if (std::get<0>(key) >= instance->numberOfVertices()) {
         throw std::invalid_argument("Invalid Argument while adding communication schedule entry: node out of range.");
+    }
 
-    if (std::get<1>(key) >= instance->numberOfProcessors())
-        throw std::invalid_argument(
-            "Invalid Argument while adding communication schedule entry: from processor out of range.");
+    if (std::get<1>(key) >= instance->numberOfProcessors()) {
+        throw std::invalid_argument("Invalid Argument while adding communication schedule entry: from processor out of range.");
+    }
 
-    if (std::get<2>(key) >= instance->numberOfProcessors())
-        throw std::invalid_argument(
-            "Invalid Argument while adding communication schedule entry: to processor out of range.");
+    if (std::get<2>(key) >= instance->numberOfProcessors()) {
+        throw std::invalid_argument("Invalid Argument while adding communication schedule entry: to processor out of range.");
+    }
 
     commSchedule[key] = step;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 bool BspScheduleRecomp<Graph_t>::satisfiesConstraints() const {
-
     // find first availability
-    
-    std::vector<std::vector<unsigned> > node_first_available_on_proc(instance->numberOfVertices(),
-                                                                    std::vector<unsigned>(instance->numberOfProcessors(), std::numeric_limits<unsigned>::max()));
 
-    for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-        for(const std::pair<unsigned, unsigned>& compute_step : node_to_processor_and_supertep_assignment[node])
-            node_first_available_on_proc[node][compute_step.first] =
-                std::min(node_first_available_on_proc[node][compute_step.first], compute_step.second);
+    std::vector<std::vector<unsigned>> node_first_available_on_proc(
+        instance->numberOfVertices(), std::vector<unsigned>(instance->numberOfProcessors(), std::numeric_limits<unsigned>::max()));
 
-    for (auto const &[key, val] : commSchedule) {
+    for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+        for (const std::pair<unsigned, unsigned> &compute_step : node_to_processor_and_supertep_assignment[node]) {
+            node_first_available_on_proc[node][compute_step.first]
+                = std::min(node_first_available_on_proc[node][compute_step.first], compute_step.second);
+        }
+    }
 
-        const vertex_idx& node = std::get<0>(key);
-        const unsigned& to_proc = std::get<2>(key);
+    for (auto const &[key, val] : commSchedule) {
+        const vertex_idx &node = std::get<0>(key);
+        const unsigned &to_proc = std::get<2>(key);
 
         node_first_available_on_proc[node][to_proc] = std::min(node_first_available_on_proc[node][to_proc], val + 1);
     }
 
     // check validity
 
-    for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-        for(vertex_idx pred : instance->getComputationalDag().parents(node))
-            for(const std::pair<unsigned, unsigned>& compute_step : node_to_processor_and_supertep_assignment[node])
-                if(node_first_available_on_proc[pred][compute_step.first] > compute_step.second){
+    for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+        for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
+            for (const std::pair<unsigned, unsigned> &compute_step : node_to_processor_and_supertep_assignment[node]) {
+                if (node_first_available_on_proc[pred][compute_step.first] > compute_step.second) {
                     // std::cout << "Not a valid schedule: parent " << pred << " of node "<< node <<
                     //" not yet available on processor " << compute_step.first << " in superstep "<< compute_step.second <<"." << std::endl;
                     return false;
                 }
+            }
+        }
+    }
 
     for (auto const &[key, val] : commSchedule) {
-
-        const vertex_idx& node = std::get<0>(key);
-        const unsigned& from_proc = std::get<1>(key);
+        const vertex_idx &node = std::get<0>(key);
+        const unsigned &from_proc = std::get<1>(key);
 
         if (node_first_available_on_proc[node][from_proc] > val) {
             // std::cout << "Not a valid schedule: node " << node << " not yet available for sending from processor "
@@ -212,33 +213,29 @@ bool BspScheduleRecomp<Graph_t>::satisfiesConstraints() const {
             return false;
         }
     }
-    
+
     return true;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 v_workw_t<Graph_t> BspScheduleRecomp<Graph_t>::computeWorkCosts() const {
-
     assert(satisfiesConstraints());
 
     std::vector<std::vector<cost_type>> step_proc_work(number_of_supersteps,
-                                                      std::vector<cost_type>(instance->numberOfProcessors(), 0));
+                                                       std::vector<cost_type>(instance->numberOfProcessors(), 0));
 
     for (vertex_idx node = 0; node < instance->numberOfVertices(); node++) {
-
-        for (const std::pair<unsigned, unsigned>& processor_superstep : node_to_processor_and_supertep_assignment[node]) {
-            step_proc_work[processor_superstep.second][processor_superstep.first] +=
-                instance->getComputationalDag().vertex_work_weight(node);
+        for (const std::pair<unsigned, unsigned> &processor_superstep : node_to_processor_and_supertep_assignment[node]) {
+            step_proc_work[processor_superstep.second][processor_superstep.first]
+                += instance->getComputationalDag().vertex_work_weight(node);
         }
     }
 
     cost_type total_costs = 0;
     for (unsigned step = 0; step < number_of_supersteps; step++) {
-
         cost_type max_work = 0;
 
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
             if (max_work < step_proc_work[step][proc]) {
                 max_work = step_proc_work[step][proc];
             }
@@ -250,35 +247,31 @@ v_workw_t<Graph_t> BspScheduleRecomp<Graph_t>::computeWorkCosts() const {
     return total_costs;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 v_workw_t<Graph_t> BspScheduleRecomp<Graph_t>::computeCosts() const {
-    
     assert(satisfiesConstraints());
 
-    std::vector<std::vector<cost_type>> rec(number_of_supersteps,
-                                            std::vector<cost_type>(instance->numberOfProcessors(), 0));
-    std::vector<std::vector<cost_type>> send(number_of_supersteps,
-                                            std::vector<cost_type>(instance->numberOfProcessors(), 0));
+    std::vector<std::vector<cost_type>> rec(number_of_supersteps, std::vector<cost_type>(instance->numberOfProcessors(), 0));
+    std::vector<std::vector<cost_type>> send(number_of_supersteps, std::vector<cost_type>(instance->numberOfProcessors(), 0));
 
     for (auto const &[key, val] : commSchedule) {
-
-        send[val][std::get<1>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key)) *
-                                    instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
-        rec[val][std::get<2>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key)) *
-                                    instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
+        send[val][std::get<1>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key))
+                                       * instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
+        rec[val][std::get<2>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key))
+                                      * instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
     }
 
     cost_type total_costs = 0;
     for (unsigned step = 0; step < number_of_supersteps; step++) {
-
         cost_type max_comm = 0;
 
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            if (max_comm < send[step][proc])
+            if (max_comm < send[step][proc]) {
                 max_comm = send[step][proc];
-            if (max_comm < rec[step][proc])
+            }
+            if (max_comm < rec[step][proc]) {
                 max_comm = rec[step][proc];
-
+            }
         }
 
         if (max_comm > 0) {
@@ -289,10 +282,9 @@ v_workw_t<Graph_t> BspScheduleRecomp<Graph_t>::computeCosts() const {
     total_costs += computeWorkCosts();
 
     return total_costs;
-        
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 vertex_idx_t<Graph_t> BspScheduleRecomp<Graph_t>::getTotalAssignments() const {
     vertex_idx total = 0;
     for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
@@ -301,28 +293,28 @@ vertex_idx_t<Graph_t> BspScheduleRecomp<Graph_t>::getTotalAssignments() const {
     return total;
 }
 
-template<typename Graph_t>
-void BspScheduleRecomp<Graph_t>::mergeSupersteps()
-{
+template <typename Graph_t>
+void BspScheduleRecomp<Graph_t>::mergeSupersteps() {
     std::vector<unsigned> new_step_idx(number_of_supersteps);
     std::vector<bool> comm_phase_empty(number_of_supersteps, true);
 
-    for (auto const &[key, val] : commSchedule)
+    for (auto const &[key, val] : commSchedule) {
         comm_phase_empty[val] = false;
+    }
 
     unsigned current_step_idx = 0;
-    for(unsigned step = 0; step < number_of_supersteps; ++step)
-    {
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
         new_step_idx[step] = current_step_idx;
-        if(!comm_phase_empty[step] || step == number_of_supersteps - 1)
+        if (!comm_phase_empty[step] || step == number_of_supersteps - 1) {
             ++current_step_idx;
+        }
     }
-    for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-    {
+    for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
         std::vector<std::pair<unsigned, unsigned>> new_assignment;
-        for(const std::pair<unsigned, unsigned>& entry : node_to_processor_and_supertep_assignment[node])
+        for (const std::pair<unsigned, unsigned> &entry : node_to_processor_and_supertep_assignment[node]) {
             new_assignment.emplace_back(entry.first, new_step_idx[entry.second]);
-        node_to_processor_and_supertep_assignment[node] = new_assignment; 
+        }
+        node_to_processor_and_supertep_assignment[node] = new_assignment;
     }
     for (auto &key_step_pair : commSchedule) {
         auto &step = key_step_pair.second;
@@ -332,4 +324,4 @@ void BspScheduleRecomp<Graph_t>::mergeSupersteps()
     number_of_supersteps = current_step_idx;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/model/IBspSchedule.hpp b/include/osp/bsp/model/IBspSchedule.hpp
index 9840c56f..0a4a3d7e 100644
--- a/include/osp/bsp/model/IBspSchedule.hpp
+++ b/include/osp/bsp/model/IBspSchedule.hpp
@@ -24,9 +24,8 @@ namespace osp {
 
 /// @class IBspSchedule
 /// @brief Interface for a BSP (Bulk Synchronous Parallel) schedule.
-template<typename Graph_t>
+template <typename Graph_t>
 class IBspSchedule {
-
     using vertex_idx = vertex_idx_t<Graph_t>;
 
   public:
@@ -62,4 +61,4 @@ class IBspSchedule {
     virtual unsigned numberOfSupersteps() const = 0;
 };
 
-} // namespace  osp
\ No newline at end of file
+}    // namespace  osp
diff --git a/include/osp/bsp/model/IBspScheduleEval.hpp b/include/osp/bsp/model/IBspScheduleEval.hpp
index 55a4290b..6e0f7a51 100644
--- a/include/osp/bsp/model/IBspScheduleEval.hpp
+++ b/include/osp/bsp/model/IBspScheduleEval.hpp
@@ -24,9 +24,8 @@ namespace osp {
 
 /// @class IBspSchedule
 /// @brief Interface for a BSP (Bulk Synchronous Parallel) schedule.
-template<typename Graph_t>
+template <typename Graph_t>
 class IBspScheduleEval {
-
     using vertex_idx = vertex_idx_t<Graph_t>;
 
   public:
@@ -37,7 +36,6 @@ class IBspScheduleEval {
     virtual v_workw_t<Graph_t> computeWorkCosts() const = 0;
     virtual unsigned numberOfSupersteps() const = 0;
     virtual const BspInstance<Graph_t> &getInstance() const = 0;
-
 };
 
-} // namespace  osp
\ No newline at end of file
+}    // namespace  osp
diff --git a/include/osp/bsp/model/MaxBspSchedule.hpp b/include/osp/bsp/model/MaxBspSchedule.hpp
index e56c99d6..d35024d2 100644
--- a/include/osp/bsp/model/MaxBspSchedule.hpp
+++ b/include/osp/bsp/model/MaxBspSchedule.hpp
@@ -38,11 +38,11 @@ namespace osp {
  *
  * @see BspInstance
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class MaxBspSchedule : public BspSchedule<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>, "BspSchedule requires work and comm. weights to have the same type.");
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "BspSchedule requires work and comm. weights to have the same type.");
 
   protected:
     using vertex_idx = vertex_idx_t<Graph_t>;
@@ -65,8 +65,10 @@ class MaxBspSchedule : public BspSchedule<Graph_t> {
      * @param processor_assignment_ The processor assignment for the nodes.
      * @param superstep_assignment_ The superstep assignment for the nodes.
      */
-    MaxBspSchedule(const BspInstance<Graph_t> &inst, const std::vector<unsigned> &processor_assignment_,
-                   const std::vector<unsigned> &superstep_assignment_) : BspSchedule<Graph_t>(inst, processor_assignment_, superstep_assignment_) {}
+    MaxBspSchedule(const BspInstance<Graph_t> &inst,
+                   const std::vector<unsigned> &processor_assignment_,
+                   const std::vector<unsigned> &superstep_assignment_)
+        : BspSchedule<Graph_t>(inst, processor_assignment_, superstep_assignment_) {}
 
     MaxBspSchedule(const IBspSchedule<Graph_t> &schedule) : BspSchedule<Graph_t>(schedule) {}
 
@@ -80,8 +82,9 @@ class MaxBspSchedule : public BspSchedule<Graph_t> {
 
     MaxBspSchedule<Graph_t> &operator=(MaxBspSchedule<Graph_t> &&schedule) noexcept = default;
 
-    template<typename Graph_t_other>
-    MaxBspSchedule(const BspInstance<Graph_t> &instance_, const MaxBspSchedule<Graph_t_other> &schedule) : BspSchedule<Graph_t>(instance_, schedule) {}
+    template <typename Graph_t_other>
+    MaxBspSchedule(const BspInstance<Graph_t> &instance_, const MaxBspSchedule<Graph_t_other> &schedule)
+        : BspSchedule<Graph_t>(instance_, schedule) {}
 
     /**
      * @brief Destructor for the BspSchedule class.
@@ -89,9 +92,10 @@ class MaxBspSchedule : public BspSchedule<Graph_t> {
     virtual ~MaxBspSchedule() = default;
 
     virtual v_workw_t<Graph_t> computeCosts() const override {
-
-        std::vector<std::vector<v_commw_t<Graph_t>>> rec(this->instance->numberOfProcessors(), std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
-        std::vector<std::vector<v_commw_t<Graph_t>>> send(this->instance->numberOfProcessors(), std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> rec(this->instance->numberOfProcessors(),
+                                                         std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> send(this->instance->numberOfProcessors(),
+                                                          std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
 
         compute_lazy_communication_costs(*this, rec, send);
         const std::vector<v_commw_t<Graph_t>> max_comm_per_step = cost_helpers::compute_max_comm_per_step(*this, rec, send);
@@ -99,7 +103,8 @@ class MaxBspSchedule : public BspSchedule<Graph_t> {
 
         v_workw_t<Graph_t> costs = 0U;
         for (unsigned step = 0U; step < this->number_of_supersteps; step++) {
-            const v_commw_t<Graph_t> step_comm_cost = (step == 0U) ? static_cast<v_commw_t<Graph_t>>(0) : max_comm_per_step[step - 1U];
+            const v_commw_t<Graph_t> step_comm_cost = (step == 0U) ? static_cast<v_commw_t<Graph_t>>(0)
+                                                                   : max_comm_per_step[step - 1U];
             costs += std::max(step_comm_cost, max_work_per_step[step]);
 
             if (step_comm_cost > static_cast<v_commw_t<Graph_t>>(0)) {
@@ -112,4 +117,4 @@ class MaxBspSchedule : public BspSchedule<Graph_t> {
     unsigned virtual getStaleness() const override { return 2; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/model/MaxBspScheduleCS.hpp b/include/osp/bsp/model/MaxBspScheduleCS.hpp
index 79b49b33..7027ea53 100644
--- a/include/osp/bsp/model/MaxBspScheduleCS.hpp
+++ b/include/osp/bsp/model/MaxBspScheduleCS.hpp
@@ -31,11 +31,11 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class MaxBspScheduleCS : public BspScheduleCS<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>, "BspSchedule requires work and comm. weights to have the same type.");
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "BspSchedule requires work and comm. weights to have the same type.");
 
   protected:
     using vertex_idx = vertex_idx_t<Graph_t>;
@@ -58,10 +58,13 @@ class MaxBspScheduleCS : public BspScheduleCS<Graph_t> {
      * @param processor_assignment_ The processor assignment for the nodes.
      * @param superstep_assignment_ The superstep assignment for the nodes.
      */
-    MaxBspScheduleCS(const BspInstance<Graph_t> &inst, const std::vector<unsigned> &processor_assignment_, const std::vector<unsigned> &superstep_assignment_)
+    MaxBspScheduleCS(const BspInstance<Graph_t> &inst,
+                     const std::vector<unsigned> &processor_assignment_,
+                     const std::vector<unsigned> &superstep_assignment_)
         : BspScheduleCS<Graph_t>(inst, processor_assignment_, superstep_assignment_) {}
 
     MaxBspScheduleCS(const BspScheduleCS<Graph_t> &schedule) : BspScheduleCS<Graph_t>(schedule) {}
+
     MaxBspScheduleCS(BspScheduleCS<Graph_t> &&schedule) : BspScheduleCS<Graph_t>(std::move(schedule)) {}
 
     MaxBspScheduleCS(const MaxBspSchedule<Graph_t> &schedule) : BspScheduleCS<Graph_t>(schedule) {
@@ -78,7 +81,7 @@ class MaxBspScheduleCS : public BspScheduleCS<Graph_t> {
     MaxBspScheduleCS<Graph_t> &operator=(const MaxBspScheduleCS<Graph_t> &schedule) = default;
     MaxBspScheduleCS<Graph_t> &operator=(MaxBspScheduleCS<Graph_t> &&schedule) = default;
 
-    template<typename Graph_t_other>
+    template <typename Graph_t_other>
     MaxBspScheduleCS(const BspInstance<Graph_t> &instance_, const MaxBspScheduleCS<Graph_t_other> &schedule)
         : BspScheduleCS<Graph_t>(instance_, schedule) {}
 
@@ -88,7 +91,6 @@ class MaxBspScheduleCS : public BspScheduleCS<Graph_t> {
     virtual ~MaxBspScheduleCS() = default;
 
     virtual v_workw_t<Graph_t> computeCosts() const override {
-
         std::vector<std::vector<v_commw_t<Graph_t>>> rec(this->getInstance().numberOfProcessors(),
                                                          std::vector<v_commw_t<Graph_t>>(this->number_of_supersteps, 0));
 
@@ -113,4 +115,5 @@ class MaxBspScheduleCS : public BspScheduleCS<Graph_t> {
 
     unsigned virtual getStaleness() const override { return 2; }
 };
-} // namespace osp
\ No newline at end of file
+
+}    // namespace osp
diff --git a/include/osp/bsp/model/cost/BufferedSendingCost.hpp b/include/osp/bsp/model/cost/BufferedSendingCost.hpp
index f8b61f91..747174d9 100644
--- a/include/osp/bsp/model/cost/BufferedSendingCost.hpp
+++ b/include/osp/bsp/model/cost/BufferedSendingCost.hpp
@@ -18,20 +18,20 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/bsp/model/cost/CostModelHelpers.hpp"
-#include "osp/concepts/computational_dag_concept.hpp"
 #include <algorithm>
 #include <vector>
 
+#include "osp/bsp/model/cost/CostModelHelpers.hpp"
+#include "osp/concepts/computational_dag_concept.hpp"
+
 namespace osp {
 
 /**
  * @struct BufferedSendingCost
  * @brief Implements the buffered sending cost model.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct BufferedSendingCost {
-
     using cost_type = v_commw_t<Graph_t>;
 
     cost_type operator()(const BspSchedule<Graph_t> &schedule) const {
@@ -41,26 +41,29 @@ struct BufferedSendingCost {
         const auto &node_to_superstep_assignment = schedule.assignedSupersteps();
         const auto staleness = schedule.getStaleness();
 
-        std::vector<std::vector<v_commw_t<Graph_t>>> rec(instance.numberOfProcessors(), std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
-        std::vector<std::vector<v_commw_t<Graph_t>>> send(instance.numberOfProcessors(), std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> rec(instance.numberOfProcessors(),
+                                                         std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> send(instance.numberOfProcessors(),
+                                                          std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
 
         for (vertex_idx_t<Graph_t> node = 0; node < instance.numberOfVertices(); node++) {
-
             std::vector<unsigned> step_needed(instance.numberOfProcessors(), number_of_supersteps);
             for (const auto &target : instance.getComputationalDag().children(node)) {
-
                 if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) {
-                    step_needed[node_to_processor_assignment[target]] = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]);
+                    step_needed[node_to_processor_assignment[target]]
+                        = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]);
                 }
             }
 
             for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
-
                 if (step_needed[proc] < number_of_supersteps) {
-                    send[node_to_processor_assignment[node]][node_to_superstep_assignment[node]] += instance.sendCosts(node_to_processor_assignment[node], proc) * instance.getComputationalDag().vertex_comm_weight(node);
+                    send[node_to_processor_assignment[node]][node_to_superstep_assignment[node]]
+                        += instance.sendCosts(node_to_processor_assignment[node], proc)
+                           * instance.getComputationalDag().vertex_comm_weight(node);
 
                     if (step_needed[proc] >= staleness) {
-                        rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) * instance.getComputationalDag().vertex_comm_weight(node);
+                        rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc)
+                                                                    * instance.getComputationalDag().vertex_comm_weight(node);
                     }
                 }
             }
@@ -81,4 +84,4 @@ struct BufferedSendingCost {
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/model/cost/CostModelHelpers.hpp b/include/osp/bsp/model/cost/CostModelHelpers.hpp
index b1d449b4..fe9b269f 100644
--- a/include/osp/bsp/model/cost/CostModelHelpers.hpp
+++ b/include/osp/bsp/model/cost/CostModelHelpers.hpp
@@ -18,59 +18,58 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/bsp/model/BspInstance.hpp"
 #include <algorithm>
 #include <vector>
 
+#include "osp/bsp/model/BspInstance.hpp"
+
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class BspSchedule;
 
 namespace cost_helpers {
 
-template<typename Graph_t>
-std::vector<v_commw_t<Graph_t>> compute_max_comm_per_step(
-    const BspInstance<Graph_t> &instance,
-    unsigned number_of_supersteps,
-    const std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
-    const std::vector<std::vector<v_commw_t<Graph_t>>> &send) {
-
+template <typename Graph_t>
+std::vector<v_commw_t<Graph_t>> compute_max_comm_per_step(const BspInstance<Graph_t> &instance,
+                                                          unsigned number_of_supersteps,
+                                                          const std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
+                                                          const std::vector<std::vector<v_commw_t<Graph_t>>> &send) {
     std::vector<v_commw_t<Graph_t>> max_comm_per_step(number_of_supersteps, 0);
     for (unsigned step = 0; step < number_of_supersteps; step++) {
         v_commw_t<Graph_t> max_send = 0;
         v_commw_t<Graph_t> max_rec = 0;
 
         for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
-            if (max_send < send[proc][step])
+            if (max_send < send[proc][step]) {
                 max_send = send[proc][step];
-            if (max_rec < rec[proc][step])
+            }
+            if (max_rec < rec[proc][step]) {
                 max_rec = rec[proc][step];
+            }
         }
         max_comm_per_step[step] = std::max(max_send, max_rec) * instance.communicationCosts();
     }
     return max_comm_per_step;
 }
 
-template<typename Graph_t>
-std::vector<v_commw_t<Graph_t>> compute_max_comm_per_step(
-    const BspSchedule<Graph_t> &schedule,
-    const std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
-    const std::vector<std::vector<v_commw_t<Graph_t>>> &send) {
+template <typename Graph_t>
+std::vector<v_commw_t<Graph_t>> compute_max_comm_per_step(const BspSchedule<Graph_t> &schedule,
+                                                          const std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
+                                                          const std::vector<std::vector<v_commw_t<Graph_t>>> &send) {
     return compute_max_comm_per_step(schedule.getInstance(), schedule.numberOfSupersteps(), rec, send);
 }
 
-template<typename Graph_t>
-std::vector<v_workw_t<Graph_t>> compute_max_work_per_step(
-    const BspInstance<Graph_t> &instance,
-    unsigned number_of_supersteps,
-    const std::vector<unsigned> &node_to_processor_assignment,
-    const std::vector<unsigned> &node_to_superstep_assignment) {
+template <typename Graph_t>
+std::vector<v_workw_t<Graph_t>> compute_max_work_per_step(const BspInstance<Graph_t> &instance,
+                                                          unsigned number_of_supersteps,
+                                                          const std::vector<unsigned> &node_to_processor_assignment,
+                                                          const std::vector<unsigned> &node_to_superstep_assignment) {
     std::vector<std::vector<v_workw_t<Graph_t>>> work = std::vector<std::vector<v_workw_t<Graph_t>>>(
         number_of_supersteps, std::vector<v_workw_t<Graph_t>>(instance.numberOfProcessors(), 0));
     for (const auto &node : instance.vertices()) {
-        work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]] +=
-            instance.getComputationalDag().vertex_work_weight(node);
+        work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]]
+            += instance.getComputationalDag().vertex_work_weight(node);
     }
 
     std::vector<v_workw_t<Graph_t>> max_work_per_step(number_of_supersteps, 0);
@@ -88,30 +87,28 @@ std::vector<v_workw_t<Graph_t>> compute_max_work_per_step(
     return max_work_per_step;
 }
 
-template<typename Graph_t>
-std::vector<v_workw_t<Graph_t>> compute_max_work_per_step(
-    const BspSchedule<Graph_t> &schedule) {
-    return compute_max_work_per_step(schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps());
+template <typename Graph_t>
+std::vector<v_workw_t<Graph_t>> compute_max_work_per_step(const BspSchedule<Graph_t> &schedule) {
+    return compute_max_work_per_step(
+        schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps());
 }
 
-template<typename Graph_t>
-v_workw_t<Graph_t> compute_work_costs(
-    const BspInstance<Graph_t> &instance,
-    unsigned number_of_supersteps,
-    const std::vector<unsigned> &node_to_processor_assignment,
-    const std::vector<unsigned> &node_to_superstep_assignment) {
-
-    std::vector<v_workw_t<Graph_t>> max_work_per_step = compute_max_work_per_step(instance, number_of_supersteps, node_to_processor_assignment, node_to_superstep_assignment);
+template <typename Graph_t>
+v_workw_t<Graph_t> compute_work_costs(const BspInstance<Graph_t> &instance,
+                                      unsigned number_of_supersteps,
+                                      const std::vector<unsigned> &node_to_processor_assignment,
+                                      const std::vector<unsigned> &node_to_superstep_assignment) {
+    std::vector<v_workw_t<Graph_t>> max_work_per_step
+        = compute_max_work_per_step(instance, number_of_supersteps, node_to_processor_assignment, node_to_superstep_assignment);
 
     return std::accumulate(max_work_per_step.begin(), max_work_per_step.end(), static_cast<v_workw_t<Graph_t>>(0));
 }
 
-template<typename Graph_t>
-v_workw_t<Graph_t> compute_work_costs(
-    const BspSchedule<Graph_t> &schedule) {
-
-    return compute_work_costs(schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps());
+template <typename Graph_t>
+v_workw_t<Graph_t> compute_work_costs(const BspSchedule<Graph_t> &schedule) {
+    return compute_work_costs(
+        schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps());
 }
 
-} // namespace cost_helpers
-} // namespace osp
+}    // namespace cost_helpers
+}    // namespace osp
diff --git a/include/osp/bsp/model/cost/LazyCommunicationCost.hpp b/include/osp/bsp/model/cost/LazyCommunicationCost.hpp
index 64338481..a0497174 100644
--- a/include/osp/bsp/model/cost/LazyCommunicationCost.hpp
+++ b/include/osp/bsp/model/cost/LazyCommunicationCost.hpp
@@ -18,65 +18,72 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/bsp/model/cost/CostModelHelpers.hpp"
-#include "osp/concepts/computational_dag_concept.hpp"
 #include <algorithm>
 #include <vector>
 
+#include "osp/bsp/model/cost/CostModelHelpers.hpp"
+#include "osp/concepts/computational_dag_concept.hpp"
+
 namespace osp {
 
-template<typename Graph_t>
-void compute_lazy_communication_costs(
-    const BspInstance<Graph_t> &instance,
-    unsigned number_of_supersteps,
-    const std::vector<unsigned> &node_to_processor_assignment,
-    const std::vector<unsigned> &node_to_superstep_assignment,
-    const unsigned staleness,
-    std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
-    std::vector<std::vector<v_commw_t<Graph_t>>> &send) {
+template <typename Graph_t>
+void compute_lazy_communication_costs(const BspInstance<Graph_t> &instance,
+                                      unsigned number_of_supersteps,
+                                      const std::vector<unsigned> &node_to_processor_assignment,
+                                      const std::vector<unsigned> &node_to_superstep_assignment,
+                                      const unsigned staleness,
+                                      std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
+                                      std::vector<std::vector<v_commw_t<Graph_t>>> &send) {
     for (const auto &node : instance.vertices()) {
-
         std::vector<unsigned> step_needed(instance.numberOfProcessors(), number_of_supersteps);
         for (const auto &target : instance.getComputationalDag().children(node)) {
-
             if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) {
-                step_needed[node_to_processor_assignment[target]] = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]);
+                step_needed[node_to_processor_assignment[target]]
+                    = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]);
             }
         }
 
         for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
-
             if (step_needed[proc] < number_of_supersteps) {
-                send[node_to_processor_assignment[node]][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) * instance.getComputationalDag().vertex_comm_weight(node);
-                rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) * instance.getComputationalDag().vertex_comm_weight(node);
+                send[node_to_processor_assignment[node]][step_needed[proc] - staleness]
+                    += instance.sendCosts(node_to_processor_assignment[node], proc)
+                       * instance.getComputationalDag().vertex_comm_weight(node);
+                rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc)
+                                                            * instance.getComputationalDag().vertex_comm_weight(node);
             }
         }
     }
 }
 
-template<typename Graph_t>
-void compute_lazy_communication_costs(
-    const BspSchedule<Graph_t> &schedule,
-    std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
-    std::vector<std::vector<v_commw_t<Graph_t>>> &send) {
-    compute_lazy_communication_costs(schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps(), schedule.getStaleness(), rec, send);
+template <typename Graph_t>
+void compute_lazy_communication_costs(const BspSchedule<Graph_t> &schedule,
+                                      std::vector<std::vector<v_commw_t<Graph_t>>> &rec,
+                                      std::vector<std::vector<v_commw_t<Graph_t>>> &send) {
+    compute_lazy_communication_costs(schedule.getInstance(),
+                                     schedule.numberOfSupersteps(),
+                                     schedule.assignedProcessors(),
+                                     schedule.assignedSupersteps(),
+                                     schedule.getStaleness(),
+                                     rec,
+                                     send);
 }
 
 /**
  * @struct LazyCommunicationCost
  * @brief Implements the lazy communication cost model.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct LazyCommunicationCost {
-
     using cost_type = v_workw_t<Graph_t>;
 
     cost_type operator()(const BspSchedule<Graph_t> &schedule) const {
         const auto &number_of_processors = schedule.getInstance().numberOfProcessors();
         const auto &number_of_supersteps = schedule.numberOfSupersteps();
 
-        std::vector<std::vector<v_commw_t<Graph_t>>> rec(number_of_processors, std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
-        std::vector<std::vector<v_commw_t<Graph_t>>> send(number_of_processors, std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> rec(number_of_processors,
+                                                         std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> send(number_of_processors,
+                                                          std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
 
         compute_lazy_communication_costs(schedule, rec, send);
         const auto max_comm_per_step = cost_helpers::compute_max_comm_per_step(schedule, rec, send);
@@ -95,4 +102,4 @@ struct LazyCommunicationCost {
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/model/cost/TotalCommunicationCost.hpp b/include/osp/bsp/model/cost/TotalCommunicationCost.hpp
index 3182f3c5..af97e5c8 100644
--- a/include/osp/bsp/model/cost/TotalCommunicationCost.hpp
+++ b/include/osp/bsp/model/cost/TotalCommunicationCost.hpp
@@ -27,13 +27,11 @@ namespace osp {
  * @struct TotalCommunicationCost
  * @brief Implements the total communication cost model.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct TotalCommunicationCost {
-
     using cost_type = double;
 
     cost_type operator()(const BspSchedule<Graph_t> &schedule) const {
-
         const auto &instance = schedule.getInstance();
         const auto &node_to_processor_assignment = schedule.assignedProcessors();
 
@@ -41,22 +39,24 @@ struct TotalCommunicationCost {
 
         for (const auto &v : instance.vertices()) {
             for (const auto &target : instance.getComputationalDag().children(v)) {
-
                 if (node_to_processor_assignment[v] != node_to_processor_assignment[target]) {
-                    total_communication += instance.sendCosts(node_to_processor_assignment[v], node_to_processor_assignment[target]) * instance.getComputationalDag().vertex_comm_weight(v);
+                    total_communication += instance.sendCosts(node_to_processor_assignment[v], node_to_processor_assignment[target])
+                                           * instance.getComputationalDag().vertex_comm_weight(v);
                 }
             }
         }
 
-        auto comm_cost = total_communication * static_cast<double>(instance.communicationCosts()) / static_cast<double>(instance.numberOfProcessors());
+        auto comm_cost = total_communication * static_cast<double>(instance.communicationCosts())
+                         / static_cast<double>(instance.numberOfProcessors());
 
         const unsigned number_of_supersteps = schedule.numberOfSupersteps();
 
         auto work_cost = cost_helpers::compute_work_costs(schedule);
-        auto sync_cost = static_cast<v_commw_t<Graph_t>>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0) * instance.synchronisationCosts();
+        auto sync_cost = static_cast<v_commw_t<Graph_t>>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0)
+                         * instance.synchronisationCosts();
 
         return comm_cost + work_cost + sync_cost;
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp b/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp
index acab210f..27641937 100644
--- a/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp
+++ b/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp
@@ -18,9 +18,10 @@ limitations under the License.
 
 #pragma once
 
+#include <unordered_set>
+
 #include "osp/bsp/model/cost/CostModelHelpers.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
-#include <unordered_set>
 
 namespace osp {
 
@@ -28,9 +29,8 @@ namespace osp {
  * @struct TotalLambdaCommunicationCost
  * @brief Implements the total lambda communication cost model.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct TotalLambdaCommunicationCost {
-
     using cost_type = double;
 
     cost_type operator()(const BspSchedule<Graph_t> &schedule) const {
@@ -41,8 +41,9 @@ struct TotalLambdaCommunicationCost {
         const double comm_multiplier = 1.0 / instance.numberOfProcessors();
 
         for (const auto &v : instance.vertices()) {
-            if (instance.getComputationalDag().out_degree(v) == 0)
+            if (instance.getComputationalDag().out_degree(v) == 0) {
                 continue;
+            }
 
             std::unordered_set<unsigned> target_procs;
             for (const auto &target : instance.getComputationalDag().children(v)) {
@@ -61,10 +62,11 @@ struct TotalLambdaCommunicationCost {
 
         auto comm_cost = comm_costs * comm_multiplier * static_cast<double>(instance.communicationCosts());
         auto work_cost = cost_helpers::compute_work_costs(schedule);
-        auto sync_cost = static_cast<v_commw_t<Graph_t>>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0) * instance.synchronisationCosts();
+        auto sync_cost = static_cast<v_commw_t<Graph_t>>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0)
+                         * instance.synchronisationCosts();
 
         return comm_cost + static_cast<double>(work_cost) + static_cast<double>(sync_cost);
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
index c4d8df30..1cc65621 100644
--- a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
+++ b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
@@ -18,9 +18,10 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/bsp/model/BspInstance.hpp"
 #include <vector>
 
+#include "osp/bsp/model/BspInstance.hpp"
+
 namespace osp {
 
 /**
@@ -31,9 +32,8 @@ namespace osp {
  *
  * @tparam Graph_t The type of the computational DAG.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class CompatibleProcessorRange {
-
     std::vector<std::vector<unsigned>> typeProcessorIdx;
     const BspInstance<Graph_t> *instance = nullptr;
 
@@ -48,9 +48,7 @@ class CompatibleProcessorRange {
      *
      * @param inst The BspInstance.
      */
-    CompatibleProcessorRange(const BspInstance<Graph_t> &inst) {
-        initialize(inst);
-    }
+    CompatibleProcessorRange(const BspInstance<Graph_t> &inst) { initialize(inst); }
 
     /**
      * @brief Initializes the CompatibleProcessorRange with a BspInstance.
@@ -64,9 +62,11 @@ class CompatibleProcessorRange {
             typeProcessorIdx.resize(inst.getComputationalDag().num_vertex_types());
 
             for (v_type_t<Graph_t> v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) {
-                for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++)
-                    if (inst.isCompatibleType(v_type, inst.processorType(proc)))
+                for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++) {
+                    if (inst.isCompatibleType(v_type, inst.processorType(proc))) {
                         typeProcessorIdx[v_type].push_back(proc);
+                    }
+                }
             }
         }
     }
@@ -98,4 +98,4 @@ class CompatibleProcessorRange {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/model/util/SetSchedule.hpp b/include/osp/bsp/model/util/SetSchedule.hpp
index 61946fae..2fce50d8 100644
--- a/include/osp/bsp/model/util/SetSchedule.hpp
+++ b/include/osp/bsp/model/util/SetSchedule.hpp
@@ -37,9 +37,8 @@ namespace osp {
  *
  * @note This class assumes that the `BspInstance` and `ICommunicationScheduler` classes are defined and accessible.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class SetSchedule : public IBspSchedule<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
 
   private:
@@ -56,14 +55,12 @@ class SetSchedule : public IBspSchedule<Graph_t> {
 
     SetSchedule(const BspInstance<Graph_t> &inst, unsigned num_supersteps)
         : instance(&inst), number_of_supersteps(num_supersteps) {
-
         step_processor_vertices = std::vector<std::vector<std::unordered_set<vertex_idx>>>(
             num_supersteps, std::vector<std::unordered_set<vertex_idx>>(inst.numberOfProcessors()));
     }
 
     SetSchedule(const IBspSchedule<Graph_t> &schedule)
         : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()) {
-
         step_processor_vertices = std::vector<std::vector<std::unordered_set<vertex_idx>>>(
             schedule.numberOfSupersteps(),
             std::vector<std::unordered_set<vertex_idx>>(schedule.getInstance().numberOfProcessors()));
@@ -85,12 +82,9 @@ class SetSchedule : public IBspSchedule<Graph_t> {
     unsigned numberOfSupersteps() const override { return number_of_supersteps; }
 
     void setAssignedSuperstep(vertex_idx node, unsigned superstep) override {
-
         unsigned assigned_processor = 0;
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
             for (unsigned step = 0; step < number_of_supersteps; step++) {
-
                 if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) {
                     assigned_processor = proc;
                     step_processor_vertices[step][proc].erase(node);
@@ -102,12 +96,9 @@ class SetSchedule : public IBspSchedule<Graph_t> {
     }
 
     void setAssignedProcessor(vertex_idx node, unsigned processor) override {
-
         unsigned assigned_step = 0;
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
             for (unsigned step = 0; step < number_of_supersteps; step++) {
-
                 if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) {
                     assigned_step = step;
                     step_processor_vertices[step][proc].erase(node);
@@ -122,13 +113,11 @@ class SetSchedule : public IBspSchedule<Graph_t> {
     /// @param node
     /// @return the assigned superstep
     unsigned assignedSuperstep(vertex_idx node) const override {
-
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
             for (unsigned step = 0; step < number_of_supersteps; step++) {
-
-                if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end())
+                if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) {
                     return step;
+                }
             }
         }
 
@@ -139,13 +128,11 @@ class SetSchedule : public IBspSchedule<Graph_t> {
     /// @param node
     /// @return the assigned processor
     unsigned assignedProcessor(vertex_idx node) const override {
-
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
             for (unsigned step = 0; step < number_of_supersteps; step++) {
-
-                if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end())
+                if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) {
                     return proc;
+                }
             }
         }
 
@@ -153,39 +140,34 @@ class SetSchedule : public IBspSchedule<Graph_t> {
     }
 
     void mergeSupersteps(unsigned start_step, unsigned end_step) {
-
         unsigned step = start_step + 1;
         for (; step <= end_step; step++) {
-
             for (unsigned proc = 0; proc < getInstance().numberOfProcessors(); proc++) {
-
                 step_processor_vertices[start_step][proc].merge(step_processor_vertices[step][proc]);
             }
         }
 
         for (; step < number_of_supersteps; step++) {
-
             for (unsigned proc = 0; proc < getInstance().numberOfProcessors(); proc++) {
-
-                step_processor_vertices[step - (end_step - start_step)][proc] =
-                    std::move(step_processor_vertices[step][proc]);
+                step_processor_vertices[step - (end_step - start_step)][proc] = std::move(step_processor_vertices[step][proc]);
             }
         }
     }
 };
 
-
-template<typename Graph_t>
-static void printSetScheduleWorkMemNodesGrid(std::ostream &os, const SetSchedule<Graph_t> &set_schedule, bool print_detailed_node_assignment = false) {
+template <typename Graph_t>
+static void printSetScheduleWorkMemNodesGrid(std::ostream &os,
+                                             const SetSchedule<Graph_t> &set_schedule,
+                                             bool print_detailed_node_assignment = false) {
     const auto &instance = set_schedule.getInstance();
     const unsigned num_processors = instance.numberOfProcessors();
     const unsigned num_supersteps = set_schedule.numberOfSupersteps();
 
     // Data structures to store aggregated work, memory, and nodes
-    std::vector<std::vector<v_workw_t<Graph_t>>> total_work_per_cell(
-        num_processors, std::vector<v_workw_t<Graph_t>>(num_supersteps, 0.0));
-    std::vector<std::vector<v_memw_t<Graph_t>>> total_memory_per_cell(
-        num_processors, std::vector<v_memw_t<Graph_t>>(num_supersteps, 0.0));
+    std::vector<std::vector<v_workw_t<Graph_t>>> total_work_per_cell(num_processors,
+                                                                     std::vector<v_workw_t<Graph_t>>(num_supersteps, 0.0));
+    std::vector<std::vector<v_memw_t<Graph_t>>> total_memory_per_cell(num_processors,
+                                                                      std::vector<v_memw_t<Graph_t>>(num_supersteps, 0.0));
     std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> nodes_per_cell(
         num_processors, std::vector<std::vector<vertex_idx_t<Graph_t>>>(num_supersteps));
 
@@ -225,21 +207,20 @@ static void printSetScheduleWorkMemNodesGrid(std::ostream &os, const SetSchedule
         os << std::left << std::setw(cell_width) << ("P " + std::to_string(p));
         for (unsigned s = 0; s < num_supersteps; ++s) {
             std::stringstream cell_content;
-            cell_content << "W:" << std::fixed << std::setprecision(0) << total_work_per_cell[p][s]
-                         << " M:" << std::fixed << std::setprecision(0) << total_memory_per_cell[p][s]
-                         << " N:" << nodes_per_cell[p][s].size(); // Add node count
+            cell_content << "W:" << std::fixed << std::setprecision(0) << total_work_per_cell[p][s] << " M:" << std::fixed
+                         << std::setprecision(0) << total_memory_per_cell[p][s]
+                         << " N:" << nodes_per_cell[p][s].size();    // Add node count
             os << std::left << std::setw(cell_width) << cell_content.str();
         }
         os << "\n";
     }
-    
-    if (print_detailed_node_assignment) {
-        os << "\n"; // Add a newline for separation between grid and detailed list
 
+    if (print_detailed_node_assignment) {
+        os << "\n";    // Add a newline for separation between grid and detailed list
 
         // Print detailed node lists below the grid
         os << "Detailed Node Assignments:\n";
-        os << std::string(30, '=') << "\n"; // Separator
+        os << std::string(30, '=') << "\n";    // Separator
         for (unsigned p = 0; p < num_processors; ++p) {
             for (unsigned s = 0; s < num_supersteps; ++s) {
                 if (!nodes_per_cell[p][s].empty()) {
@@ -254,8 +235,8 @@ static void printSetScheduleWorkMemNodesGrid(std::ostream &os, const SetSchedule
                 }
             }
         }
-        os << std::string(30, '=') << "\n"; // Separator
+        os << std::string(30, '=') << "\n";    // Separator
     }
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/model/util/VectorSchedule.hpp b/include/osp/bsp/model/util/VectorSchedule.hpp
index ea856c1b..3c0ae212 100644
--- a/include/osp/bsp/model/util/VectorSchedule.hpp
+++ b/include/osp/bsp/model/util/VectorSchedule.hpp
@@ -18,17 +18,16 @@ limitations under the License.
 
 #pragma once
 
+#include <vector>
+
 #include "osp/bsp/model/IBspSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
-#include <vector>
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class VectorSchedule : public IBspSchedule<Graph_t> {
-
-    static_assert(is_computational_dag_v<Graph_t>,
-        "BspSchedule can only be used with computational DAGs.");
+    static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
 
   private:
     const BspInstance<Graph_t> *instance;
@@ -39,7 +38,6 @@ class VectorSchedule : public IBspSchedule<Graph_t> {
     std::vector<unsigned> node_to_processor_assignment;
     std::vector<unsigned> node_to_superstep_assignment;
 
-
     /**
      * @brief Default constructor for VectorSchedule.
      */
@@ -52,21 +50,20 @@ class VectorSchedule : public IBspSchedule<Graph_t> {
 
     VectorSchedule(const IBspSchedule<Graph_t> &schedule)
         : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()) {
-
-        node_to_processor_assignment =
-            std::vector<unsigned>(schedule.getInstance().numberOfVertices(), instance->numberOfProcessors());
-        node_to_superstep_assignment =
-            std::vector<unsigned>(schedule.getInstance().numberOfVertices(), schedule.numberOfSupersteps());
+        node_to_processor_assignment
+            = std::vector<unsigned>(schedule.getInstance().numberOfVertices(), instance->numberOfProcessors());
+        node_to_superstep_assignment
+            = std::vector<unsigned>(schedule.getInstance().numberOfVertices(), schedule.numberOfSupersteps());
 
         for (vertex_idx_t<Graph_t> i = 0; i < schedule.getInstance().numberOfVertices(); i++) {
-
             node_to_processor_assignment[i] = schedule.assignedProcessor(i);
             node_to_superstep_assignment[i] = schedule.assignedSuperstep(i);
         }
     }
 
     VectorSchedule(const VectorSchedule &other)
-        : instance(other.instance), number_of_supersteps(other.number_of_supersteps),
+        : instance(other.instance),
+          number_of_supersteps(other.number_of_supersteps),
           node_to_processor_assignment(other.node_to_processor_assignment),
           node_to_superstep_assignment(other.node_to_superstep_assignment) {}
 
@@ -74,8 +71,7 @@ class VectorSchedule : public IBspSchedule<Graph_t> {
         if (this != &other) {
             instance = &other.getInstance();
             number_of_supersteps = other.numberOfSupersteps();
-            node_to_processor_assignment =
-                std::vector<unsigned>(instance->numberOfVertices(), instance->numberOfProcessors());
+            node_to_processor_assignment = std::vector<unsigned>(instance->numberOfVertices(), instance->numberOfProcessors());
             node_to_superstep_assignment = std::vector<unsigned>(instance->numberOfVertices(), number_of_supersteps);
 
             for (vertex_idx_t<Graph_t> i = 0; i < instance->numberOfVertices(); i++) {
@@ -97,7 +93,8 @@ class VectorSchedule : public IBspSchedule<Graph_t> {
     }
 
     VectorSchedule(VectorSchedule &&other) noexcept
-        : instance(other.instance), number_of_supersteps(other.number_of_supersteps),
+        : instance(other.instance),
+          number_of_supersteps(other.number_of_supersteps),
           node_to_processor_assignment(std::move(other.node_to_processor_assignment)),
           node_to_superstep_assignment(std::move(other.node_to_superstep_assignment)) {}
 
@@ -114,27 +111,22 @@ class VectorSchedule : public IBspSchedule<Graph_t> {
     void setAssignedSuperstep(vertex_idx_t<Graph_t> vertex, unsigned superstep) override {
         node_to_superstep_assignment[vertex] = superstep;
     };
+
     void setAssignedProcessor(vertex_idx_t<Graph_t> vertex, unsigned processor) override {
         node_to_processor_assignment[vertex] = processor;
     };
 
     unsigned numberOfSupersteps() const override { return number_of_supersteps; }
 
-    unsigned assignedSuperstep(vertex_idx_t<Graph_t> vertex) const override {
-        return node_to_superstep_assignment[vertex];
-    }
-    unsigned assignedProcessor(vertex_idx_t<Graph_t> vertex) const override {
-        return node_to_processor_assignment[vertex];
-    }
+    unsigned assignedSuperstep(vertex_idx_t<Graph_t> vertex) const override { return node_to_superstep_assignment[vertex]; }
 
-    void mergeSupersteps(unsigned start_step, unsigned end_step) {
+    unsigned assignedProcessor(vertex_idx_t<Graph_t> vertex) const override { return node_to_processor_assignment[vertex]; }
 
+    void mergeSupersteps(unsigned start_step, unsigned end_step) {
         number_of_supersteps = 0;
 
         for (const auto &vertex : getInstance().vertices()) {
-
             if (node_to_superstep_assignment[vertex] > start_step && node_to_superstep_assignment[vertex] <= end_step) {
-
                 node_to_superstep_assignment[vertex] = start_step;
             } else if (node_to_superstep_assignment[vertex] > end_step) {
                 node_to_superstep_assignment[vertex] -= end_step - start_step;
@@ -147,11 +139,9 @@ class VectorSchedule : public IBspSchedule<Graph_t> {
     }
 
     void insertSupersteps(const unsigned step_before, const unsigned num_new_steps) {
-
         number_of_supersteps += num_new_steps;
 
         for (const auto &vertex : getInstance().vertices()) {
-
             if (node_to_superstep_assignment[vertex] > step_before) {
                 node_to_superstep_assignment[vertex] += num_new_steps;
             }
@@ -159,4 +149,4 @@ class VectorSchedule : public IBspSchedule<Graph_t> {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
index 2e23c22e..ec98b977 100644
--- a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
+++ b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
@@ -24,9 +24,8 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 class CoarseAndSchedule : public Scheduler<Graph_t> {
-
   private:
     Coarser<Graph_t, Graph_t_coarse> &coarser;
     Scheduler<Graph_t_coarse> &scheduler;
@@ -35,18 +34,18 @@ class CoarseAndSchedule : public Scheduler<Graph_t> {
     CoarseAndSchedule(Coarser<Graph_t, Graph_t_coarse> &coarser_, Scheduler<Graph_t_coarse> &scheduler_)
         : coarser(coarser_), scheduler(scheduler_) {}
 
-    std::string getScheduleName() const override { return "Coarse(" + coarser.getCoarserName() + ")AndSchedule(" + scheduler.getScheduleName() + ")"; }
+    std::string getScheduleName() const override {
+        return "Coarse(" + coarser.getCoarserName() + ")AndSchedule(" + scheduler.getScheduleName() + ")";
+    }
 
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         const auto &instance = schedule.getInstance();
 
         BspInstance<Graph_t_coarse> instance_coarse;
 
         std::vector<vertex_idx_t<Graph_t_coarse>> reverse_vertex_map;
 
-        bool status = coarser.coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(),
-                                         reverse_vertex_map);
+        bool status = coarser.coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), reverse_vertex_map);
 
         if (!status) {
             return RETURN_STATUS::ERROR;
@@ -69,4 +68,4 @@ class CoarseAndSchedule : public Scheduler<Graph_t> {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp b/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp
index 949c933b..e722989b 100644
--- a/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp
+++ b/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp
@@ -13,27 +13,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 #pragma once
 
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
-#include "osp/coarser/StepByStep/StepByStepCoarser.hpp"
 #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp"
+#include "osp/coarser/StepByStep/StepByStepCoarser.hpp"
 
-namespace osp{
+namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class MultiLevelHillClimbingScheduler : public Scheduler<Graph_t> {
-
     using vertex_idx = vertex_idx_t<Graph_t>;
 
-    using vertex_type_t_or_default = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
+    using vertex_type_t_or_default
+        = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
     using edge_commw_t_or_default = std::conditional_t<has_edge_weights_v<Graph_t>, e_commw_t<Graph_t>, v_commw_t<Graph_t>>;
 
-    private:
-
-    typename StepByStepCoarser<Graph_t>::COARSENING_STRATEGY coarsening_strategy = StepByStepCoarser<Graph_t>::COARSENING_STRATEGY::EDGE_BY_EDGE;
+  private:
+    typename StepByStepCoarser<Graph_t>::COARSENING_STRATEGY coarsening_strategy
+        = StepByStepCoarser<Graph_t>::COARSENING_STRATEGY::EDGE_BY_EDGE;
     unsigned number_hc_steps;
     unsigned target_nr_of_nodes = 0;
     unsigned min_target_nr_of_nodes_ = 1U;
@@ -47,57 +47,62 @@ class MultiLevelHillClimbingScheduler : public Scheduler<Graph_t> {
 
     std::deque<vertex_idx> refinement_points;
 
-    BspSchedule<Graph_t> Refine(const BspInstance<Graph_t>& instance, const StepByStepCoarser<Graph_t>& coarser,
-        const BspSchedule<Graph_t> &coarse_schedule) const;
+    BspSchedule<Graph_t> Refine(const BspInstance<Graph_t> &instance,
+                                const StepByStepCoarser<Graph_t> &coarser,
+                                const BspSchedule<Graph_t> &coarse_schedule) const;
 
-    BspSchedule<Graph_t> ComputeUncontractedSchedule(const StepByStepCoarser<Graph_t>& coarser,
-                                                const BspInstance<Graph_t>& full_instance,
-                                                const BspSchedule<Graph_t> &coarse_schedule, vertex_idx index_until) const;
+    BspSchedule<Graph_t> ComputeUncontractedSchedule(const StepByStepCoarser<Graph_t> &coarser,
+                                                     const BspInstance<Graph_t> &full_instance,
+                                                     const BspSchedule<Graph_t> &coarse_schedule,
+                                                     vertex_idx index_until) const;
 
     void setLinearRefinementPoints(vertex_idx OriginalNrOfNodes, unsigned stepSize);
     void setExponentialRefinementPoints(vertex_idx OriginalNrOfNodes, double stepRatio);
 
     void set_parameter(const size_t num_vertices) {
-        target_nr_of_nodes = std::max(min_target_nr_of_nodes_, static_cast<unsigned>(static_cast<float>(num_vertices) * contraction_rate_));
+        target_nr_of_nodes
+            = std::max(min_target_nr_of_nodes_, static_cast<unsigned>(static_cast<float>(num_vertices) * contraction_rate_));
         target_nr_of_nodes = std::min(target_nr_of_nodes, static_cast<unsigned>(num_vertices));
 
-        if(use_linear_refinement_) {
+        if (use_linear_refinement_) {
             setLinearRefinementPoints(num_vertices, linear_refinement_step_size_);
-        } else if (use_exponential_refinement_)  {
+        } else if (use_exponential_refinement_) {
             setExponentialRefinementPoints(num_vertices, exponential_refinement_step_ratio_);
         }
     }
 
   public:
-
     virtual ~MultiLevelHillClimbingScheduler() = default;
 
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override;
 
     virtual std::string getScheduleName() const override { return "MultiLevelHillClimbing"; }
 
-    void setCoarseningStrategy(typename StepByStepCoarser<Graph_t>::COARSENING_STRATEGY strategy_){ coarsening_strategy = strategy_;}
-    void setContractionRate(double rate_){ contraction_rate_ = rate_;}
+    void setCoarseningStrategy(typename StepByStepCoarser<Graph_t>::COARSENING_STRATEGY strategy_) {
+        coarsening_strategy = strategy_;
+    }
+
+    void setContractionRate(double rate_) { contraction_rate_ = rate_; }
+
     void setNumberOfHcSteps(unsigned steps_) { number_hc_steps = steps_; }
+
     void setMinTargetNrOfNodes(unsigned min_target_nr_of_nodes) { min_target_nr_of_nodes_ = min_target_nr_of_nodes; }
 
-    void useLinearRefinementSteps(unsigned steps) { 
+    void useLinearRefinementSteps(unsigned steps) {
         use_linear_refinement_ = true;
         use_exponential_refinement_ = false;
         linear_refinement_step_size_ = steps;
     }
 
-    void useExponentialRefinementPoints(double ratio) { 
+    void useExponentialRefinementPoints(double ratio) {
         use_exponential_refinement_ = true;
         use_linear_refinement_ = false;
         exponential_refinement_step_ratio_ = ratio;
     }
-
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 RETURN_STATUS MultiLevelHillClimbingScheduler<Graph_t>::computeSchedule(BspSchedule<Graph_t> &schedule) {
-
     StepByStepCoarser<Graph_t> coarser;
     Graph_t coarseDAG;
     std::vector<vertex_idx> new_vertex_id;
@@ -116,10 +121,12 @@ RETURN_STATUS MultiLevelHillClimbingScheduler<Graph_t>::computeSchedule(BspSched
     HillClimbingScheduler<Graph_t> coarse_hc;
     coarse_hc.improveSchedule(coarse_schedule);
 
-    if(refinement_points.empty())
+    if (refinement_points.empty()) {
         setExponentialRefinementPoints(num_verices, 1.1);
-    while(!refinement_points.empty() && refinement_points.front() <= coarseDAG.num_vertices())
+    }
+    while (!refinement_points.empty() && refinement_points.front() <= coarseDAG.num_vertices()) {
         refinement_points.pop_front();
+    }
 
     schedule = Refine(schedule.getInstance(), coarser, coarse_schedule);
 
@@ -127,13 +134,14 @@ RETURN_STATUS MultiLevelHillClimbingScheduler<Graph_t>::computeSchedule(BspSched
 }
 
 // run refinement: uncoarsify the DAG in small batches, and apply some steps of hill climbing after each iteration
-template<typename Graph_t>
-BspSchedule<Graph_t> MultiLevelHillClimbingScheduler<Graph_t>::Refine(const BspInstance<Graph_t>& full_instance, const StepByStepCoarser<Graph_t>& coarser, const BspSchedule<Graph_t> &coarse_schedule) const {
-
-    BspSchedule<Graph_t> schedule_on_full_graph = ComputeUncontractedSchedule(coarser, full_instance, coarse_schedule, coarser.getContractionHistory().size());
-
-    for (vertex_idx next_size : refinement_points)
-    {
+template <typename Graph_t>
+BspSchedule<Graph_t> MultiLevelHillClimbingScheduler<Graph_t>::Refine(const BspInstance<Graph_t> &full_instance,
+                                                                      const StepByStepCoarser<Graph_t> &coarser,
+                                                                      const BspSchedule<Graph_t> &coarse_schedule) const {
+    BspSchedule<Graph_t> schedule_on_full_graph
+        = ComputeUncontractedSchedule(coarser, full_instance, coarse_schedule, coarser.getContractionHistory().size());
+
+    for (vertex_idx next_size : refinement_points) {
         const vertex_idx contract_steps = coarser.getOriginalDag().num_vertices() - next_size;
         std::vector<vertex_idx> new_ids = coarser.GetIntermediateIDs(contract_steps);
         Graph_t dag = coarser.Contract(new_ids);
@@ -158,52 +166,55 @@ BspSchedule<Graph_t> MultiLevelHillClimbingScheduler<Graph_t>::Refine(const BspI
 }
 
 // given an original DAG G, a schedule on the coarsified G and the contraction steps, project the coarse schedule to the entire G
-template<typename Graph_t>
-BspSchedule<Graph_t> MultiLevelHillClimbingScheduler<Graph_t>::ComputeUncontractedSchedule(const StepByStepCoarser<Graph_t>& coarser,
-                                                const BspInstance<Graph_t>& full_instance,
-                                                const BspSchedule<Graph_t> &coarse_schedule, vertex_idx index_until) const {
-                                                    
+template <typename Graph_t>
+BspSchedule<Graph_t> MultiLevelHillClimbingScheduler<Graph_t>::ComputeUncontractedSchedule(
+    const StepByStepCoarser<Graph_t> &coarser,
+    const BspInstance<Graph_t> &full_instance,
+    const BspSchedule<Graph_t> &coarse_schedule,
+    vertex_idx index_until) const {
     std::vector<vertex_idx> new_ids = coarser.GetIntermediateIDs(index_until);
 
     BspSchedule<Graph_t> schedule(full_instance);
 
-    for (vertex_idx node = 0; node < full_instance.numberOfVertices(); ++node)
-    {
+    for (vertex_idx node = 0; node < full_instance.numberOfVertices(); ++node) {
         schedule.setAssignedProcessor(node, coarse_schedule.assignedProcessor(new_ids[node]));
         schedule.setAssignedSuperstep(node, coarse_schedule.assignedSuperstep(new_ids[node]));
     }
     return schedule;
 }
 
-template<typename Graph_t>
-void MultiLevelHillClimbingScheduler<Graph_t>::setLinearRefinementPoints(vertex_idx OriginalNrOfNodes, unsigned stepSize)
-{
+template <typename Graph_t>
+void MultiLevelHillClimbingScheduler<Graph_t>::setLinearRefinementPoints(vertex_idx OriginalNrOfNodes, unsigned stepSize) {
     refinement_points.clear();
-    if(stepSize<5)
+    if (stepSize < 5) {
         stepSize = 5;
+    }
 
-    for (vertex_idx nextN = target_nr_of_nodes + stepSize; nextN < OriginalNrOfNodes; nextN += stepSize)
+    for (vertex_idx nextN = target_nr_of_nodes + stepSize; nextN < OriginalNrOfNodes; nextN += stepSize) {
         refinement_points.push_back(nextN);
+    }
 
-    if (!refinement_points.empty())
+    if (!refinement_points.empty()) {
         refinement_points.pop_back();
+    }
     refinement_points.push_back(OriginalNrOfNodes);
 }
 
-template<typename Graph_t>
-void MultiLevelHillClimbingScheduler<Graph_t>::setExponentialRefinementPoints(vertex_idx OriginalNrOfNodes, double stepRatio)
-{
+template <typename Graph_t>
+void MultiLevelHillClimbingScheduler<Graph_t>::setExponentialRefinementPoints(vertex_idx OriginalNrOfNodes, double stepRatio) {
     refinement_points.clear();
-    if(stepRatio<1.01)
+    if (stepRatio < 1.01) {
         stepRatio = 1.01;
+    }
 
-    for (vertex_idx nextN = std::max(static_cast<unsigned>(std::round(target_nr_of_nodes * stepRatio)), target_nr_of_nodes+5);
-                        nextN < OriginalNrOfNodes;
-                        nextN = std::max(static_cast<vertex_idx>(std::round(static_cast<double>(nextN) * stepRatio)), refinement_points.back()+5))
+    for (vertex_idx nextN = std::max(static_cast<unsigned>(std::round(target_nr_of_nodes * stepRatio)), target_nr_of_nodes + 5);
+         nextN < OriginalNrOfNodes;
+         nextN
+         = std::max(static_cast<vertex_idx>(std::round(static_cast<double>(nextN) * stepRatio)), refinement_points.back() + 5)) {
         refinement_points.push_back(nextN);
+    }
 
     refinement_points.push_back(OriginalNrOfNodes);
 }
 
-
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp b/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp
index 6446800e..08a52aa0 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp
@@ -1,635 +1,632 @@
-/*
-Copyright 2024 Huawei Technologies Co., Ltd.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
-*/
-
-#pragma once
-
-#include <chrono>
-#include <climits>
-#include <cmath>
-#include <list>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "MemoryConstraintModules.hpp"
-#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp"
-#include "osp/auxiliary/misc.hpp"
-#include "osp/bsp/scheduler/Scheduler.hpp"
-#include "osp/graph_algorithms/directed_graph_top_sort.hpp"
-
-namespace osp {
-
-/**
- * @brief The GreedyBspLocking class represents a scheduler that uses a greedy algorithm to compute schedules for
- * BspInstance.
- *
- * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods.
- * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm.
- * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case.
- */
-
-template<typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
-class BspLocking : public Scheduler<Graph_t> {
-
-    static_assert(is_computational_dag_v<Graph_t>, "BspLocking can only be used with computational DAGs.");
-
-  private:
-    using VertexType = vertex_idx_t<Graph_t>;
-
-    constexpr static bool use_memory_constraint =
-        is_memory_constraint_v<MemoryConstraint_t> or is_memory_constraint_schedule_v<MemoryConstraint_t>;
-
-    static_assert(not use_memory_constraint or std::is_same_v<Graph_t, typename MemoryConstraint_t::Graph_impl_t>,
-                  "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t.");
-
-    MemoryConstraint_t memory_constraint;
-
-    using Priority = std::tuple<int, unsigned, VertexType>;
-
-    struct PriorityCompare {
-        bool operator()(const Priority &a, const Priority &b) const {
-            if (std::get<0>(a) != std::get<0>(b)) {
-                return std::get<0>(a) > std::get<0>(b); // Higher score is better
-            }
-            if (std::get<1>(a) != std::get<1>(b)) {
-                return std::get<1>(a) > std::get<1>(b); // Higher secondary_score is better
-            }
-            return std::get<2>(a) < std::get<2>(b); // Smaller node index is better for tie-breaking
-        }
-    };
-
-    using MaxHeap = PairingHeap<VertexType, Priority, PriorityCompare>;
-
-    std::vector<MaxHeap> max_proc_score_heap;
-    std::vector<MaxHeap> max_all_proc_score_heap;
-
-    static std::vector<v_workw_t<Graph_t>> get_longest_path(const Graph_t &graph) {
-
-        std::vector<v_workw_t<Graph_t>> longest_path(graph.num_vertices(), 0);
-
-        const std::vector<VertexType> top_order = GetTopOrder(graph);
-
-        for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) {
-            longest_path[*r_iter] = graph.vertex_work_weight(*r_iter);
-            if (graph.out_degree(*r_iter) > 0) {
-                v_workw_t<Graph_t> max = 0;
-                for (const auto &child : graph.children(*r_iter)) {
-                    if (max <= longest_path[child])
-                        max = longest_path[child];
-                }
-                longest_path[*r_iter] += max;
-            }
-        }
-
-        return longest_path;
-    }
-
-    std::deque<VertexType> locked_set;
-    std::vector<unsigned> locked;
-    int lock_penalty = 1;
-    std::vector<unsigned> ready_phase;
-
-    std::vector<int> default_value;
-
-    double max_percent_idle_processors;
-    bool increase_parallelism_in_new_superstep;
-
-    int computeScore(VertexType node, unsigned proc, const BspInstance<Graph_t> &instance) {
-
-        int score = 0;
-        for (const auto &succ : instance.getComputationalDag().children(node)) {
-            if (locked[succ] < instance.numberOfProcessors() && locked[succ] != proc)
-                score -= lock_penalty;
-        }
-
-        return score + default_value[node];
-    };
-
-    bool check_mem_feasibility(const BspInstance<Graph_t> &instance, const std::set<VertexType> &allReady,
-                               const std::vector<std::set<VertexType>> &procReady) const {
-
-        if constexpr (use_memory_constraint) {
-
-            if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) {
-
-                for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
-                    if (!procReady[i].empty()) {
-
-                        VertexType top_node = max_proc_score_heap[i].top();
-
-                        if (memory_constraint.can_add(top_node, i)) {
-                            return true;
-                        }
-                    }
-                }
-
-                if (!allReady.empty())
-                    for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
-
-                        VertexType top_node = max_all_proc_score_heap[i].top();
-
-                        if (memory_constraint.can_add(top_node, i)) {
-                            return true;
-                        }
-                    }
-
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    bool Choose(const BspInstance<Graph_t> &instance, std::set<VertexType> &allReady,
-                std::vector<std::set<VertexType>> &procReady, const std::vector<bool> &procFree, VertexType &node,
-                unsigned &p, const bool endSupStep, const v_workw_t<Graph_t> remaining_time) {
-
-        for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
-
-            if (procFree[proc] && !procReady[proc].empty()) {
-
-                // select node
-                VertexType top_node = max_proc_score_heap[proc].top();
-
-                // filling up
-                bool procready_empty = false;
-                while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) {
-                    procReady[proc].erase(top_node);
-                    ready_phase[top_node] = std::numeric_limits<unsigned>::max();
-                    max_proc_score_heap[proc].pop();
-                    if (!procReady[proc].empty()) {
-                        top_node = max_proc_score_heap[proc].top();
-                    } else {
-                        procready_empty = true;
-                        break;
-                    }
-                }
-                if (procready_empty) {
-                    continue;
-                }
-
-                node = top_node;
-                p = proc;
-            }
-        }
-
-        if (p < instance.numberOfProcessors())
-            return true;
-
-        Priority best_priority = {std::numeric_limits<int>::min(), 0, 0};
-        bool found_node = false;
-
-        for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
-            if (!procFree[proc] or max_all_proc_score_heap[proc].is_empty())
-                continue;
-
-            VertexType top_node = max_all_proc_score_heap[proc].top();
-
-            // filling up
-            bool all_procready_empty = false;
-            while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) {
-                allReady.erase(top_node);
-                for (unsigned proc_del = 0; proc_del < instance.numberOfProcessors(); proc_del++) {
-                    if (proc_del == proc || !instance.isCompatible(top_node, proc_del))
-                        continue;
-                    max_all_proc_score_heap[proc_del].erase(top_node);
-                }
-                max_all_proc_score_heap[proc].pop();
-                ready_phase[top_node] = std::numeric_limits<unsigned>::max();
-                if (!max_all_proc_score_heap[proc].is_empty()) {
-                    top_node = max_all_proc_score_heap[proc].top();
-                } else {
-                    all_procready_empty = true;
-                    break;
-                }
-            }
-            if (all_procready_empty)
-                continue;
-
-            Priority top_priority = max_all_proc_score_heap[proc].get_value(top_node);
-            if (!found_node || PriorityCompare{}(top_priority, best_priority)) {
-
-                if constexpr (use_memory_constraint) {
-                    if (memory_constraint.can_add(top_node, proc)) {
-                        best_priority = top_priority;
-                        node = top_node;
-                        p = proc;
-                        found_node = true;
-                    }
-
-                } else {
-
-                    best_priority = top_priority;
-                    node = top_node;
-                    p = proc;
-                    found_node = true;
-                }
-            }
-        }
-        return (found_node && std::get<0>(best_priority) > -3);
-    }
-    
-    bool CanChooseNode(const BspInstance<Graph_t> &instance, const std::vector<std::set<VertexType>> &procReady,
-                       const std::vector<bool> &procFree) const {
-
-        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-            if (procFree[i] && !procReady[i].empty())
-                return true;
-
-        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-            if (procFree[i] && !max_all_proc_score_heap[i].is_empty())
-                return true;
-
-        return false;
-    }
-
-    unsigned get_nr_parallelizable_nodes(const BspInstance<Graph_t> &instance,
-                                         const std::vector<unsigned> &nr_ready_nodes_per_type,
-                                         const std::vector<unsigned> &nr_procs_per_type) const {
-        unsigned nr_nodes = 0;
-
-        std::vector<unsigned> ready_nodes_per_type = nr_ready_nodes_per_type;
-        std::vector<unsigned> procs_per_type = nr_procs_per_type;
-        for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type)
-            for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type)
-                if (instance.isCompatibleType(node_type, proc_type)) {
-                    unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]);
-                    nr_nodes += matched;
-                    ready_nodes_per_type[node_type] -= matched;
-                    procs_per_type[proc_type] -= matched;
-                }
-
-        return nr_nodes;
-    }
-
-  public:
-    /**
-     * @brief Default constructor for GreedyBspLocking.
-     */
-    BspLocking(float max_percent_idle_processors_ = 0.4f, bool increase_parallelism_in_new_superstep_ = true)
-        : max_percent_idle_processors(max_percent_idle_processors_),
-          increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {}
-
-    /**
-     * @brief Default destructor for GreedyBspLocking.
-     */
-    virtual ~BspLocking() = default;
-
-    /**
-     * @brief Compute a schedule for the given BspInstance.
-     *
-     * This method computes a schedule for the given BspInstance using a greedy algorithm.
-     *
-     * @param instance The BspInstance object representing the instance to compute the schedule for.
-     * @return A pair containing the return status and the computed BspSchedule.
-     */
-    virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
-        const auto &instance = schedule.getInstance();
-
-        for (const auto &v : instance.getComputationalDag().vertices()) {
-            schedule.setAssignedProcessor(v, std::numeric_limits<unsigned>::max());
-        }
-
-        unsigned supstepIdx = 0;
-
-        if constexpr (is_memory_constraint_v<MemoryConstraint_t>) {
-            memory_constraint.initialize(instance);
-        } else if constexpr (is_memory_constraint_schedule_v<MemoryConstraint_t>) {
-            memory_constraint.initialize(schedule, supstepIdx);
-        }
-
-        const auto &N = instance.numberOfVertices();
-        const unsigned &params_p = instance.numberOfProcessors();
-        const auto &G = instance.getComputationalDag();
-
-        const std::vector<v_workw_t<Graph_t>> path_length = get_longest_path(G);
-        v_workw_t<Graph_t> max_path = 1;
-        for (const auto &i : instance.vertices())
-            if (path_length[i] > max_path)
-                max_path = path_length[i];
-
-        default_value.clear();
-        default_value.resize(N, 0);
-        for (const auto &i : instance.vertices()) {
-            //assert(path_length[i] * 20 / max_path <= std::numeric_limits<int>::max());
-            default_value[i] = static_cast<int>(path_length[i] * static_cast<v_workw_t<Graph_t>>(20) / max_path);
-        }
-
-        max_proc_score_heap = std::vector<MaxHeap>(params_p);
-        max_all_proc_score_heap = std::vector<MaxHeap>(params_p);
-
-        locked_set.clear();
-        locked.clear();
-        locked.resize(N, std::numeric_limits<unsigned>::max());
-
-        std::set<VertexType> ready;
-        ready_phase.clear();
-        ready_phase.resize(N, std::numeric_limits<unsigned>::max());
-
-        std::vector<std::set<VertexType>> procReady(params_p);
-        std::set<VertexType> allReady;
-
-        std::vector<VertexType> nrPredecDone(N, 0);
-        std::vector<bool> procFree(params_p, true);
-        unsigned free = params_p;
-
-        std::vector<unsigned> nr_ready_nodes_per_type(G.num_vertex_types(), 0);
-        std::vector<unsigned> nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < params_p; ++proc)
-            ++nr_procs_per_type[instance.getArchitecture().processorType(proc)];
-
-        std::set<std::pair<v_workw_t<Graph_t>, VertexType>> finishTimes;
-        finishTimes.emplace(0, std::numeric_limits<VertexType>::max());
-
-        for (const auto &v : source_vertices_view(G)) {
-            ready.insert(v);
-            allReady.insert(v);
-            ++nr_ready_nodes_per_type[G.vertex_type(v)];
-            ready_phase[v] = params_p;
-
-            for (unsigned proc = 0; proc < params_p; ++proc) {
-                if (instance.isCompatible(v, proc)) {
-                    Priority priority = {default_value[v], static_cast<unsigned>(G.out_degree(v)), v};
-                    max_all_proc_score_heap[proc].push(v, priority);
-                }
-            }
-        }
-
-        bool endSupStep = false;
-
-        while (!ready.empty() || !finishTimes.empty()) {
-
-            if (finishTimes.empty() && endSupStep) {
-                for (unsigned proc = 0; proc < params_p; ++proc) {
-                    procReady[proc].clear();
-                    max_proc_score_heap[proc].clear();
-
-                    if constexpr (use_memory_constraint) {
-                        memory_constraint.reset(proc);
-                    }
-                }
-
-                allReady = ready;
-
-                for (const auto &node : locked_set)
-                    locked[node] = std::numeric_limits<unsigned>::max();
-                locked_set.clear();
-
-                for (unsigned proc = 0; proc < params_p; ++proc) {
-                    max_all_proc_score_heap[proc].clear();
-                }
-
-                for (const auto &v : ready) {
-                    ready_phase[v] = params_p;
-                    for (unsigned proc = 0; proc < params_p; ++proc) {
-
-                        if (!instance.isCompatible(v, proc))
-                            continue;
-
-                        int score = computeScore(v, proc, instance);
-                        Priority priority = {score, static_cast<unsigned>(G.out_degree(v)), v};
-                        max_all_proc_score_heap[proc].push(v, priority);
-                    }
-                }
-
-                ++supstepIdx;
-
-                endSupStep = false;
-
-                finishTimes.emplace(0, std::numeric_limits<VertexType>::max());
-            }
-
-            const v_workw_t<Graph_t> time = finishTimes.begin()->first;
-            const v_workw_t<Graph_t> max_finish_time = finishTimes.rbegin()->first;
-
-            // Find new ready jobs
-            while (!finishTimes.empty() && finishTimes.begin()->first == time) {
-
-                const VertexType node = finishTimes.begin()->second;
-                finishTimes.erase(finishTimes.begin());
-
-                if (node != std::numeric_limits<VertexType>::max()) {
-                    for (const auto &succ : G.children(node)) {
-
-                        ++nrPredecDone[succ];
-                        if (nrPredecDone[succ] == G.in_degree(succ)) {
-                            ready.insert(succ);
-                            ++nr_ready_nodes_per_type[G.vertex_type(succ)];
-
-                            bool canAdd = true;
-                            for (const auto &pred : G.parents(succ)) {
-
-                                if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) &&
-                                    schedule.assignedSuperstep(pred) == supstepIdx) {
-                                    canAdd = false;
-                                    break;
-                                }
-                            }
-
-                            if constexpr (use_memory_constraint) {
-
-                                if (canAdd) {
-                                    if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node)))
-                                        canAdd = false;
-                                }
-                            }
-
-                            if (!instance.isCompatible(succ, schedule.assignedProcessor(node)))
-                                canAdd = false;
-
-                            if (canAdd) {
-                                procReady[schedule.assignedProcessor(node)].insert(succ);
-                                ready_phase[succ] = schedule.assignedProcessor(node);
-
-                                int score = computeScore(succ, schedule.assignedProcessor(node), instance);
-                                Priority priority = {score, static_cast<unsigned>(G.out_degree(succ)), succ};
-
-                                max_proc_score_heap[schedule.assignedProcessor(node)].push(succ, priority);
-                            }
-                        }
-                    }
-                    procFree[schedule.assignedProcessor(node)] = true;
-                    ++free;
-                }
-            }
-
-            // Assign new jobs to processors
-            if (!CanChooseNode(instance, procReady, procFree)) {
-                endSupStep = true;
-            }
-
-            while (CanChooseNode(instance, procReady, procFree)) {
-
-                VertexType nextNode = std::numeric_limits<VertexType>::max();
-                unsigned nextProc = instance.numberOfProcessors();
-                Choose(instance, allReady, procReady, procFree, nextNode, nextProc, endSupStep, max_finish_time - time);
-
-                if (nextNode == std::numeric_limits<VertexType>::max() || nextProc == instance.numberOfProcessors()) {
-                    endSupStep = true;
-                    break;
-                }
-
-                if (ready_phase[nextNode] < params_p) {
-
-                    procReady[nextProc].erase(nextNode);
-
-                    max_proc_score_heap[nextProc].erase(nextNode);
-
-                } else {
-
-                    allReady.erase(nextNode);
-
-                    for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
-                        if (instance.isCompatible(nextNode, proc) && max_all_proc_score_heap[proc].contains(nextNode)) {
-                            max_all_proc_score_heap[proc].erase(nextNode);
-                        }
-                    }
-                }
-
-                ready.erase(nextNode);
-                --nr_ready_nodes_per_type[G.vertex_type(nextNode)];
-                schedule.setAssignedProcessor(nextNode, nextProc);
-                schedule.setAssignedSuperstep(nextNode, supstepIdx);
-
-                ready_phase[nextNode] = std::numeric_limits<unsigned>::max();
-
-                if constexpr (use_memory_constraint) {
-                    memory_constraint.add(nextNode, nextProc);
-
-                    std::vector<VertexType> toErase;
-                    for (const auto &node : procReady[nextProc]) {
-                        if (not memory_constraint.can_add(node, nextProc)) {
-                            toErase.push_back(node);
-                        }
-                    }
-
-                    for (const auto &node : toErase) {
-                        procReady[nextProc].erase(node);
-                        max_proc_score_heap[nextProc].erase(node);
-                        ready_phase[node] = std::numeric_limits<unsigned>::max();
-                    }
-                }
-
-                finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode);
-                procFree[nextProc] = false;
-                --free;
-
-                // update auxiliary structures
-
-                for (const auto &succ : G.children(nextNode)) {
-
-                    if (locked[succ] < params_p && locked[succ] != nextProc) {
-                        for (const auto &parent : G.parents(succ)) {
-                            if (ready_phase[parent] < std::numeric_limits<unsigned>::max() &&
-                                ready_phase[parent] < params_p && ready_phase[parent] != locked[succ]) {
-                                Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent);
-                                std::get<0>(p) += lock_penalty;
-                                max_proc_score_heap[ready_phase[parent]].update(parent, p);
-                            }
-                            if (ready_phase[parent] == params_p) {
-                                for (unsigned proc = 0; proc < params_p; ++proc) {
-                                    if (proc == locked[succ] || !instance.isCompatible(parent, proc))
-                                        continue;
-                                    
-                                    if (max_all_proc_score_heap[proc].contains(parent))
-                                    {
-                                        Priority p = max_all_proc_score_heap[proc].get_value(parent);
-                                        std::get<0>(p) += lock_penalty;
-                                        max_all_proc_score_heap[proc].update(parent, p);
-                                    }
-                                }
-                            }
-                        }
-                        locked[succ] = params_p;
-                    } else if (locked[succ] == std::numeric_limits<unsigned>::max()) {
-                        locked_set.push_back(succ);
-                        locked[succ] = nextProc;
-
-                        for (const auto &parent : G.parents(succ)) {
-                            if (ready_phase[parent] < std::numeric_limits<unsigned>::max() &&
-                                ready_phase[parent] < params_p && ready_phase[parent] != nextProc) {
-                                Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent);
-                                std::get<0>(p) -= lock_penalty;
-                                max_proc_score_heap[ready_phase[parent]].update(parent, p);
-                            }
-                            if (ready_phase[parent] == params_p) {
-                                for (unsigned proc = 0; proc < params_p; ++proc) {
-                                    if (proc == nextProc || !instance.isCompatible(parent, proc))
-                                        continue;
-
-                                    if (max_all_proc_score_heap[proc].contains(parent))
-                                    {
-                                        Priority p = max_all_proc_score_heap[proc].get_value(parent);
-                                        std::get<0>(p) -= lock_penalty;
-                                        max_all_proc_score_heap[proc].update(parent, p);
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            if constexpr (use_memory_constraint) {
-
-                if (not check_mem_feasibility(instance, allReady, procReady)) {
-
-                    return RETURN_STATUS::ERROR;
-                }
-            }
-
-            if (free > params_p * max_percent_idle_processors &&
-                ((!increase_parallelism_in_new_superstep) ||
-                 get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) >=
-                     std::min(std::min(params_p, static_cast<unsigned>(1.2 * (params_p - free))),
-                              params_p - free + (static_cast<unsigned>(0.5 * free))))) {
-                endSupStep = true;
-            }
-        }
-
-
-
-        assert(schedule.satisfiesPrecedenceConstraints());
-
-        return RETURN_STATUS::OSP_SUCCESS;
-    }
-
-    /**
-     * @brief Get the name of the schedule.
-     *
-     * This method returns the name of the schedule, which is "BspGreedy" in this case.
-     *
-     * @return The name of the schedule.
-     */
-    virtual std::string getScheduleName() const override {
-
-        if (use_memory_constraint) {
-            return "BspGreedyLockingMemory";
-        } else {
-            return "BspGreedyLocking";
-        }
-    }
-
-    void set_max_percent_idle_processors(float max_percent_idle_processors_) {
-        max_percent_idle_processors = max_percent_idle_processors_;
-    }
-};
-
-} // namespace osp
\ No newline at end of file
+/*
+Copyright 2024 Huawei Technologies Co., Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
+*/
+
+#pragma once
+
+#include <chrono>
+#include <climits>
+#include <cmath>
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "MemoryConstraintModules.hpp"
+#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp"
+#include "osp/auxiliary/misc.hpp"
+#include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/graph_algorithms/directed_graph_top_sort.hpp"
+
+namespace osp {
+
+/**
+ * @brief The GreedyBspLocking class represents a scheduler that uses a greedy algorithm to compute schedules for
+ * BspInstance.
+ *
+ * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods.
+ * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm.
+ * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case.
+ */
+
+template <typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
+class BspLocking : public Scheduler<Graph_t> {
+    static_assert(is_computational_dag_v<Graph_t>, "BspLocking can only be used with computational DAGs.");
+
+  private:
+    using VertexType = vertex_idx_t<Graph_t>;
+
+    constexpr static bool use_memory_constraint = is_memory_constraint_v<MemoryConstraint_t>
+                                                  or is_memory_constraint_schedule_v<MemoryConstraint_t>;
+
+    static_assert(not use_memory_constraint or std::is_same_v<Graph_t, typename MemoryConstraint_t::Graph_impl_t>,
+                  "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t.");
+
+    MemoryConstraint_t memory_constraint;
+
+    using Priority = std::tuple<int, unsigned, VertexType>;
+
+    struct PriorityCompare {
+        bool operator()(const Priority &a, const Priority &b) const {
+            if (std::get<0>(a) != std::get<0>(b)) {
+                return std::get<0>(a) > std::get<0>(b);    // Higher score is better
+            }
+            if (std::get<1>(a) != std::get<1>(b)) {
+                return std::get<1>(a) > std::get<1>(b);    // Higher secondary_score is better
+            }
+            return std::get<2>(a) < std::get<2>(b);    // Smaller node index is better for tie-breaking
+        }
+    };
+
+    using MaxHeap = PairingHeap<VertexType, Priority, PriorityCompare>;
+
+    std::vector<MaxHeap> max_proc_score_heap;
+    std::vector<MaxHeap> max_all_proc_score_heap;
+
+    static std::vector<v_workw_t<Graph_t>> get_longest_path(const Graph_t &graph) {
+        std::vector<v_workw_t<Graph_t>> longest_path(graph.num_vertices(), 0);
+
+        const std::vector<VertexType> top_order = GetTopOrder(graph);
+
+        for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) {
+            longest_path[*r_iter] = graph.vertex_work_weight(*r_iter);
+            if (graph.out_degree(*r_iter) > 0) {
+                v_workw_t<Graph_t> max = 0;
+                for (const auto &child : graph.children(*r_iter)) {
+                    if (max <= longest_path[child]) {
+                        max = longest_path[child];
+                    }
+                }
+                longest_path[*r_iter] += max;
+            }
+        }
+
+        return longest_path;
+    }
+
+    std::deque<VertexType> locked_set;
+    std::vector<unsigned> locked;
+    int lock_penalty = 1;
+    std::vector<unsigned> ready_phase;
+
+    std::vector<int> default_value;
+
+    double max_percent_idle_processors;
+    bool increase_parallelism_in_new_superstep;
+
+    int computeScore(VertexType node, unsigned proc, const BspInstance<Graph_t> &instance) {
+        int score = 0;
+        for (const auto &succ : instance.getComputationalDag().children(node)) {
+            if (locked[succ] < instance.numberOfProcessors() && locked[succ] != proc) {
+                score -= lock_penalty;
+            }
+        }
+
+        return score + default_value[node];
+    };
+
+    bool check_mem_feasibility(const BspInstance<Graph_t> &instance,
+                               const std::set<VertexType> &allReady,
+                               const std::vector<std::set<VertexType>> &procReady) const {
+        if constexpr (use_memory_constraint) {
+            if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) {
+                for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+                    if (!procReady[i].empty()) {
+                        VertexType top_node = max_proc_score_heap[i].top();
+
+                        if (memory_constraint.can_add(top_node, i)) {
+                            return true;
+                        }
+                    }
+                }
+
+                if (!allReady.empty()) {
+                    for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+                        VertexType top_node = max_all_proc_score_heap[i].top();
+
+                        if (memory_constraint.can_add(top_node, i)) {
+                            return true;
+                        }
+                    }
+                }
+
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool Choose(const BspInstance<Graph_t> &instance,
+                std::set<VertexType> &allReady,
+                std::vector<std::set<VertexType>> &procReady,
+                const std::vector<bool> &procFree,
+                VertexType &node,
+                unsigned &p,
+                const bool endSupStep,
+                const v_workw_t<Graph_t> remaining_time) {
+        for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
+            if (procFree[proc] && !procReady[proc].empty()) {
+                // select node
+                VertexType top_node = max_proc_score_heap[proc].top();
+
+                // filling up
+                bool procready_empty = false;
+                while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) {
+                    procReady[proc].erase(top_node);
+                    ready_phase[top_node] = std::numeric_limits<unsigned>::max();
+                    max_proc_score_heap[proc].pop();
+                    if (!procReady[proc].empty()) {
+                        top_node = max_proc_score_heap[proc].top();
+                    } else {
+                        procready_empty = true;
+                        break;
+                    }
+                }
+                if (procready_empty) {
+                    continue;
+                }
+
+                node = top_node;
+                p = proc;
+            }
+        }
+
+        if (p < instance.numberOfProcessors()) {
+            return true;
+        }
+
+        Priority best_priority = {std::numeric_limits<int>::min(), 0, 0};
+        bool found_node = false;
+
+        for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
+            if (!procFree[proc] or max_all_proc_score_heap[proc].is_empty()) {
+                continue;
+            }
+
+            VertexType top_node = max_all_proc_score_heap[proc].top();
+
+            // filling up
+            bool all_procready_empty = false;
+            while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) {
+                allReady.erase(top_node);
+                for (unsigned proc_del = 0; proc_del < instance.numberOfProcessors(); proc_del++) {
+                    if (proc_del == proc || !instance.isCompatible(top_node, proc_del)) {
+                        continue;
+                    }
+                    max_all_proc_score_heap[proc_del].erase(top_node);
+                }
+                max_all_proc_score_heap[proc].pop();
+                ready_phase[top_node] = std::numeric_limits<unsigned>::max();
+                if (!max_all_proc_score_heap[proc].is_empty()) {
+                    top_node = max_all_proc_score_heap[proc].top();
+                } else {
+                    all_procready_empty = true;
+                    break;
+                }
+            }
+            if (all_procready_empty) {
+                continue;
+            }
+
+            Priority top_priority = max_all_proc_score_heap[proc].get_value(top_node);
+            if (!found_node || PriorityCompare{}(top_priority, best_priority)) {
+                if constexpr (use_memory_constraint) {
+                    if (memory_constraint.can_add(top_node, proc)) {
+                        best_priority = top_priority;
+                        node = top_node;
+                        p = proc;
+                        found_node = true;
+                    }
+
+                } else {
+                    best_priority = top_priority;
+                    node = top_node;
+                    p = proc;
+                    found_node = true;
+                }
+            }
+        }
+        return (found_node && std::get<0>(best_priority) > -3);
+    }
+
+    bool CanChooseNode(const BspInstance<Graph_t> &instance,
+                       const std::vector<std::set<VertexType>> &procReady,
+                       const std::vector<bool> &procFree) const {
+        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+            if (procFree[i] && !procReady[i].empty()) {
+                return true;
+            }
+        }
+
+        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+            if (procFree[i] && !max_all_proc_score_heap[i].is_empty()) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    unsigned get_nr_parallelizable_nodes(const BspInstance<Graph_t> &instance,
+                                         const std::vector<unsigned> &nr_ready_nodes_per_type,
+                                         const std::vector<unsigned> &nr_procs_per_type) const {
+        unsigned nr_nodes = 0;
+
+        std::vector<unsigned> ready_nodes_per_type = nr_ready_nodes_per_type;
+        std::vector<unsigned> procs_per_type = nr_procs_per_type;
+        for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) {
+            for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) {
+                if (instance.isCompatibleType(node_type, proc_type)) {
+                    unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]);
+                    nr_nodes += matched;
+                    ready_nodes_per_type[node_type] -= matched;
+                    procs_per_type[proc_type] -= matched;
+                }
+            }
+        }
+
+        return nr_nodes;
+    }
+
+  public:
+    /**
+     * @brief Default constructor for GreedyBspLocking.
+     */
+    BspLocking(float max_percent_idle_processors_ = 0.4f, bool increase_parallelism_in_new_superstep_ = true)
+        : max_percent_idle_processors(max_percent_idle_processors_),
+          increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {}
+
+    /**
+     * @brief Default destructor for GreedyBspLocking.
+     */
+    virtual ~BspLocking() = default;
+
+    /**
+     * @brief Compute a schedule for the given BspInstance.
+     *
+     * This method computes a schedule for the given BspInstance using a greedy algorithm.
+     *
+     * @param instance The BspInstance object representing the instance to compute the schedule for.
+     * @return A pair containing the return status and the computed BspSchedule.
+     */
+    virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
+        const auto &instance = schedule.getInstance();
+
+        for (const auto &v : instance.getComputationalDag().vertices()) {
+            schedule.setAssignedProcessor(v, std::numeric_limits<unsigned>::max());
+        }
+
+        unsigned supstepIdx = 0;
+
+        if constexpr (is_memory_constraint_v<MemoryConstraint_t>) {
+            memory_constraint.initialize(instance);
+        } else if constexpr (is_memory_constraint_schedule_v<MemoryConstraint_t>) {
+            memory_constraint.initialize(schedule, supstepIdx);
+        }
+
+        const auto &N = instance.numberOfVertices();
+        const unsigned &params_p = instance.numberOfProcessors();
+        const auto &G = instance.getComputationalDag();
+
+        const std::vector<v_workw_t<Graph_t>> path_length = get_longest_path(G);
+        v_workw_t<Graph_t> max_path = 1;
+        for (const auto &i : instance.vertices()) {
+            if (path_length[i] > max_path) {
+                max_path = path_length[i];
+            }
+        }
+
+        default_value.clear();
+        default_value.resize(N, 0);
+        for (const auto &i : instance.vertices()) {
+            // assert(path_length[i] * 20 / max_path <= std::numeric_limits<int>::max());
+            default_value[i] = static_cast<int>(path_length[i] * static_cast<v_workw_t<Graph_t>>(20) / max_path);
+        }
+
+        max_proc_score_heap = std::vector<MaxHeap>(params_p);
+        max_all_proc_score_heap = std::vector<MaxHeap>(params_p);
+
+        locked_set.clear();
+        locked.clear();
+        locked.resize(N, std::numeric_limits<unsigned>::max());
+
+        std::set<VertexType> ready;
+        ready_phase.clear();
+        ready_phase.resize(N, std::numeric_limits<unsigned>::max());
+
+        std::vector<std::set<VertexType>> procReady(params_p);
+        std::set<VertexType> allReady;
+
+        std::vector<VertexType> nrPredecDone(N, 0);
+        std::vector<bool> procFree(params_p, true);
+        unsigned free = params_p;
+
+        std::vector<unsigned> nr_ready_nodes_per_type(G.num_vertex_types(), 0);
+        std::vector<unsigned> nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
+        for (unsigned proc = 0; proc < params_p; ++proc) {
+            ++nr_procs_per_type[instance.getArchitecture().processorType(proc)];
+        }
+
+        std::set<std::pair<v_workw_t<Graph_t>, VertexType>> finishTimes;
+        finishTimes.emplace(0, std::numeric_limits<VertexType>::max());
+
+        for (const auto &v : source_vertices_view(G)) {
+            ready.insert(v);
+            allReady.insert(v);
+            ++nr_ready_nodes_per_type[G.vertex_type(v)];
+            ready_phase[v] = params_p;
+
+            for (unsigned proc = 0; proc < params_p; ++proc) {
+                if (instance.isCompatible(v, proc)) {
+                    Priority priority = {default_value[v], static_cast<unsigned>(G.out_degree(v)), v};
+                    max_all_proc_score_heap[proc].push(v, priority);
+                }
+            }
+        }
+
+        bool endSupStep = false;
+
+        while (!ready.empty() || !finishTimes.empty()) {
+            if (finishTimes.empty() && endSupStep) {
+                for (unsigned proc = 0; proc < params_p; ++proc) {
+                    procReady[proc].clear();
+                    max_proc_score_heap[proc].clear();
+
+                    if constexpr (use_memory_constraint) {
+                        memory_constraint.reset(proc);
+                    }
+                }
+
+                allReady = ready;
+
+                for (const auto &node : locked_set) {
+                    locked[node] = std::numeric_limits<unsigned>::max();
+                }
+                locked_set.clear();
+
+                for (unsigned proc = 0; proc < params_p; ++proc) {
+                    max_all_proc_score_heap[proc].clear();
+                }
+
+                for (const auto &v : ready) {
+                    ready_phase[v] = params_p;
+                    for (unsigned proc = 0; proc < params_p; ++proc) {
+                        if (!instance.isCompatible(v, proc)) {
+                            continue;
+                        }
+
+                        int score = computeScore(v, proc, instance);
+                        Priority priority = {score, static_cast<unsigned>(G.out_degree(v)), v};
+                        max_all_proc_score_heap[proc].push(v, priority);
+                    }
+                }
+
+                ++supstepIdx;
+
+                endSupStep = false;
+
+                finishTimes.emplace(0, std::numeric_limits<VertexType>::max());
+            }
+
+            const v_workw_t<Graph_t> time = finishTimes.begin()->first;
+            const v_workw_t<Graph_t> max_finish_time = finishTimes.rbegin()->first;
+
+            // Find new ready jobs
+            while (!finishTimes.empty() && finishTimes.begin()->first == time) {
+                const VertexType node = finishTimes.begin()->second;
+                finishTimes.erase(finishTimes.begin());
+
+                if (node != std::numeric_limits<VertexType>::max()) {
+                    for (const auto &succ : G.children(node)) {
+                        ++nrPredecDone[succ];
+                        if (nrPredecDone[succ] == G.in_degree(succ)) {
+                            ready.insert(succ);
+                            ++nr_ready_nodes_per_type[G.vertex_type(succ)];
+
+                            bool canAdd = true;
+                            for (const auto &pred : G.parents(succ)) {
+                                if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node)
+                                    && schedule.assignedSuperstep(pred) == supstepIdx) {
+                                    canAdd = false;
+                                    break;
+                                }
+                            }
+
+                            if constexpr (use_memory_constraint) {
+                                if (canAdd) {
+                                    if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) {
+                                        canAdd = false;
+                                    }
+                                }
+                            }
+
+                            if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) {
+                                canAdd = false;
+                            }
+
+                            if (canAdd) {
+                                procReady[schedule.assignedProcessor(node)].insert(succ);
+                                ready_phase[succ] = schedule.assignedProcessor(node);
+
+                                int score = computeScore(succ, schedule.assignedProcessor(node), instance);
+                                Priority priority = {score, static_cast<unsigned>(G.out_degree(succ)), succ};
+
+                                max_proc_score_heap[schedule.assignedProcessor(node)].push(succ, priority);
+                            }
+                        }
+                    }
+                    procFree[schedule.assignedProcessor(node)] = true;
+                    ++free;
+                }
+            }
+
+            // Assign new jobs to processors
+            if (!CanChooseNode(instance, procReady, procFree)) {
+                endSupStep = true;
+            }
+
+            while (CanChooseNode(instance, procReady, procFree)) {
+                VertexType nextNode = std::numeric_limits<VertexType>::max();
+                unsigned nextProc = instance.numberOfProcessors();
+                Choose(instance, allReady, procReady, procFree, nextNode, nextProc, endSupStep, max_finish_time - time);
+
+                if (nextNode == std::numeric_limits<VertexType>::max() || nextProc == instance.numberOfProcessors()) {
+                    endSupStep = true;
+                    break;
+                }
+
+                if (ready_phase[nextNode] < params_p) {
+                    procReady[nextProc].erase(nextNode);
+
+                    max_proc_score_heap[nextProc].erase(nextNode);
+
+                } else {
+                    allReady.erase(nextNode);
+
+                    for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
+                        if (instance.isCompatible(nextNode, proc) && max_all_proc_score_heap[proc].contains(nextNode)) {
+                            max_all_proc_score_heap[proc].erase(nextNode);
+                        }
+                    }
+                }
+
+                ready.erase(nextNode);
+                --nr_ready_nodes_per_type[G.vertex_type(nextNode)];
+                schedule.setAssignedProcessor(nextNode, nextProc);
+                schedule.setAssignedSuperstep(nextNode, supstepIdx);
+
+                ready_phase[nextNode] = std::numeric_limits<unsigned>::max();
+
+                if constexpr (use_memory_constraint) {
+                    memory_constraint.add(nextNode, nextProc);
+
+                    std::vector<VertexType> toErase;
+                    for (const auto &node : procReady[nextProc]) {
+                        if (not memory_constraint.can_add(node, nextProc)) {
+                            toErase.push_back(node);
+                        }
+                    }
+
+                    for (const auto &node : toErase) {
+                        procReady[nextProc].erase(node);
+                        max_proc_score_heap[nextProc].erase(node);
+                        ready_phase[node] = std::numeric_limits<unsigned>::max();
+                    }
+                }
+
+                finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode);
+                procFree[nextProc] = false;
+                --free;
+
+                // update auxiliary structures
+
+                for (const auto &succ : G.children(nextNode)) {
+                    if (locked[succ] < params_p && locked[succ] != nextProc) {
+                        for (const auto &parent : G.parents(succ)) {
+                            if (ready_phase[parent] < std::numeric_limits<unsigned>::max() && ready_phase[parent] < params_p
+                                && ready_phase[parent] != locked[succ]) {
+                                Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent);
+                                std::get<0>(p) += lock_penalty;
+                                max_proc_score_heap[ready_phase[parent]].update(parent, p);
+                            }
+                            if (ready_phase[parent] == params_p) {
+                                for (unsigned proc = 0; proc < params_p; ++proc) {
+                                    if (proc == locked[succ] || !instance.isCompatible(parent, proc)) {
+                                        continue;
+                                    }
+
+                                    if (max_all_proc_score_heap[proc].contains(parent)) {
+                                        Priority p = max_all_proc_score_heap[proc].get_value(parent);
+                                        std::get<0>(p) += lock_penalty;
+                                        max_all_proc_score_heap[proc].update(parent, p);
+                                    }
+                                }
+                            }
+                        }
+                        locked[succ] = params_p;
+                    } else if (locked[succ] == std::numeric_limits<unsigned>::max()) {
+                        locked_set.push_back(succ);
+                        locked[succ] = nextProc;
+
+                        for (const auto &parent : G.parents(succ)) {
+                            if (ready_phase[parent] < std::numeric_limits<unsigned>::max() && ready_phase[parent] < params_p
+                                && ready_phase[parent] != nextProc) {
+                                Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent);
+                                std::get<0>(p) -= lock_penalty;
+                                max_proc_score_heap[ready_phase[parent]].update(parent, p);
+                            }
+                            if (ready_phase[parent] == params_p) {
+                                for (unsigned proc = 0; proc < params_p; ++proc) {
+                                    if (proc == nextProc || !instance.isCompatible(parent, proc)) {
+                                        continue;
+                                    }
+
+                                    if (max_all_proc_score_heap[proc].contains(parent)) {
+                                        Priority p = max_all_proc_score_heap[proc].get_value(parent);
+                                        std::get<0>(p) -= lock_penalty;
+                                        max_all_proc_score_heap[proc].update(parent, p);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            if constexpr (use_memory_constraint) {
+                if (not check_mem_feasibility(instance, allReady, procReady)) {
+                    return RETURN_STATUS::ERROR;
+                }
+            }
+
+            if (free > params_p * max_percent_idle_processors
+                && ((!increase_parallelism_in_new_superstep)
+                    || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type)
+                           >= std::min(std::min(params_p, static_cast<unsigned>(1.2 * (params_p - free))),
+                                       params_p - free + (static_cast<unsigned>(0.5 * free))))) {
+                endSupStep = true;
+            }
+        }
+
+        assert(schedule.satisfiesPrecedenceConstraints());
+
+        return RETURN_STATUS::OSP_SUCCESS;
+    }
+
+    /**
+     * @brief Get the name of the schedule.
+     *
+     * This method returns the name of the schedule, which is "BspGreedy" in this case.
+     *
+     * @return The name of the schedule.
+     */
+    virtual std::string getScheduleName() const override {
+        if (use_memory_constraint) {
+            return "BspGreedyLockingMemory";
+        } else {
+            return "BspGreedyLocking";
+        }
+    }
+
+    void set_max_percent_idle_processors(float max_percent_idle_processors_) {
+        max_percent_idle_processors = max_percent_idle_processors_;
+    }
+};
+
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp b/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp
index 363f5c85..7294e974 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp
@@ -25,11 +25,11 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class GreedyBspToMaxBspConverter {
-
     static_assert(is_computational_dag_v<Graph_t>, "GreedyBspToMaxBspConverter can only be used with computational DAGs.");
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t> >, "GreedyBspToMaxBspConverter requires work and comm. weights to have the same type.");
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "GreedyBspToMaxBspConverter requires work and comm. weights to have the same type.");
 
   protected:
     using vertex_idx = vertex_idx_t<Graph_t>;
@@ -39,73 +39,68 @@ class GreedyBspToMaxBspConverter {
     double latency_coefficient = 1.25;
     double decay_factor = 0.5;
 
-    std::vector<std::vector<std::deque<vertex_idx_t<Graph_t>>>> createSuperstepLists(const BspScheduleCS<Graph_t>& schedule, std::vector<double>& priorities) const;
+    std::vector<std::vector<std::deque<vertex_idx_t<Graph_t>>>> createSuperstepLists(const BspScheduleCS<Graph_t> &schedule,
+                                                                                     std::vector<double> &priorities) const;
 
   public:
-
-    MaxBspSchedule<Graph_t> Convert(const BspSchedule<Graph_t>& schedule) const;
-    MaxBspScheduleCS<Graph_t> Convert(const BspScheduleCS<Graph_t>& schedule) const;
-
+    MaxBspSchedule<Graph_t> Convert(const BspSchedule<Graph_t> &schedule) const;
+    MaxBspScheduleCS<Graph_t> Convert(const BspScheduleCS<Graph_t> &schedule) const;
 };
 
-template<typename Graph_t>
-MaxBspSchedule<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const BspSchedule<Graph_t>& schedule) const
-{
+template <typename Graph_t>
+MaxBspSchedule<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const BspSchedule<Graph_t> &schedule) const {
     BspScheduleCS<Graph_t> schedule_cs(schedule);
     return Convert(schedule_cs);
 }
 
-template<typename Graph_t>
-MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const BspScheduleCS<Graph_t>& schedule) const
-{
-    const Graph_t& dag = schedule.getInstance().getComputationalDag();
+template <typename Graph_t>
+MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const BspScheduleCS<Graph_t> &schedule) const {
+    const Graph_t &dag = schedule.getInstance().getComputationalDag();
 
     // Initialize data structures
     std::vector<double> priorities;
     std::vector<std::vector<std::deque<vertex_idx>>> proc_list = createSuperstepLists(schedule, priorities);
     std::vector<std::vector<cost_type>> work_remaining_proc_superstep(schedule.getInstance().numberOfProcessors(),
-        std::vector<cost_type>(schedule.numberOfSupersteps(), 0));
+                                                                      std::vector<cost_type>(schedule.numberOfSupersteps(), 0));
     std::vector<vertex_idx> nodes_remaining_superstep(schedule.numberOfSupersteps(), 0);
 
     MaxBspScheduleCS<Graph_t> schedule_max(schedule.getInstance());
-    for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++)
-    {
-        work_remaining_proc_superstep[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)] += dag.vertex_work_weight(node);
+    for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
+        work_remaining_proc_superstep[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)]
+            += dag.vertex_work_weight(node);
         ++nodes_remaining_superstep[schedule.assignedSuperstep(node)];
         schedule_max.setAssignedProcessor(node, schedule.assignedProcessor(node));
     }
 
-    std::vector<std::vector<cost_type>> send_comm_remaining_proc_superstep(schedule.getInstance().numberOfProcessors(),
-        std::vector<cost_type>(schedule.numberOfSupersteps(), 0));
-    std::vector<std::vector<cost_type>> rec_comm_remaining_proc_superstep(schedule.getInstance().numberOfProcessors(),
-        std::vector<cost_type>(schedule.numberOfSupersteps(), 0));
+    std::vector<std::vector<cost_type>> send_comm_remaining_proc_superstep(
+        schedule.getInstance().numberOfProcessors(), std::vector<cost_type>(schedule.numberOfSupersteps(), 0));
+    std::vector<std::vector<cost_type>> rec_comm_remaining_proc_superstep(
+        schedule.getInstance().numberOfProcessors(), std::vector<cost_type>(schedule.numberOfSupersteps(), 0));
 
     std::vector<std::set<std::pair<KeyTriple, unsigned>>> free_comm_steps_for_superstep(schedule.numberOfSupersteps());
-    std::vector<std::vector<std::pair<KeyTriple, unsigned>>> dependent_comm_steps_for_node(schedule.getInstance().numberOfVertices());
-    for (auto const &[key, val] : schedule.getCommunicationSchedule())
-    {
-        if(schedule.assignedSuperstep(std::get<0>(key)) == val)
-        {
+    std::vector<std::vector<std::pair<KeyTriple, unsigned>>> dependent_comm_steps_for_node(
+        schedule.getInstance().numberOfVertices());
+    for (auto const &[key, val] : schedule.getCommunicationSchedule()) {
+        if (schedule.assignedSuperstep(std::get<0>(key)) == val) {
             dependent_comm_steps_for_node[std::get<0>(key)].emplace_back(key, val);
 
-            cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(key)) *
-                schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key));
+            cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(key))
+                                  * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key));
             send_comm_remaining_proc_superstep[std::get<1>(key)][val] += comm_cost;
             rec_comm_remaining_proc_superstep[std::get<2>(key)][val] += comm_cost;
+        } else {
+            free_comm_steps_for_superstep[val].emplace(key, val);
         }
-        else
-            free_comm_steps_for_superstep[val].emplace(key,val);
     }
 
     // Iterate through supersteps
     unsigned current_step = 0;
-    for(unsigned step = 0; step < schedule.numberOfSupersteps(); ++step)
-    {
+    for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) {
         std::vector<cost_type> work_done_on_proc(schedule.getInstance().numberOfProcessors(), 0);
         cost_type max_work_done = 0;
         std::vector<std::pair<KeyTriple, unsigned>> newly_freed_comm_steps;
         std::vector<cost_type> send_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0),
-                                rec_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0);
+            rec_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0);
 
         std::vector<std::pair<KeyTriple, unsigned>> comm_in_current_step;
 
@@ -113,23 +108,22 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
             rec_on_proc(schedule.getInstance().numberOfProcessors(), 0);
         bool empty_superstep = (nodes_remaining_superstep[step] == 0);
 
-        while(nodes_remaining_superstep[step] > 0)
-        {
+        while (nodes_remaining_superstep[step] > 0) {
             // I. Select the next node (from any proc) with highest priority
             unsigned chosen_proc = schedule.getInstance().numberOfProcessors();
             double best_prio = std::numeric_limits<double>::max();
 
-            for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-            {
-                if(!proc_list[proc][step].empty() && (chosen_proc == schedule.getInstance().numberOfProcessors() ||
-                    priorities[proc_list[proc][step].front()] < best_prio))
-                {
+            for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
+                if (!proc_list[proc][step].empty()
+                    && (chosen_proc == schedule.getInstance().numberOfProcessors()
+                        || priorities[proc_list[proc][step].front()] < best_prio)) {
                     chosen_proc = proc;
                     best_prio = priorities[proc_list[proc][step].front()];
                 }
             }
-            if(chosen_proc == schedule.getInstance().numberOfProcessors())
+            if (chosen_proc == schedule.getInstance().numberOfProcessors()) {
                 break;
+            }
 
             vertex_idx chosen_node = proc_list[chosen_proc][step].front();
             proc_list[chosen_proc][step].pop_front();
@@ -138,34 +132,33 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
             max_work_done = std::max(max_work_done, work_done_on_proc[chosen_proc]);
             schedule_max.setAssignedSuperstep(chosen_node, current_step);
             --nodes_remaining_superstep[step];
-            for(const std::pair<KeyTriple, unsigned>& entry : dependent_comm_steps_for_node[chosen_node])
-            {
+            for (const std::pair<KeyTriple, unsigned> &entry : dependent_comm_steps_for_node[chosen_node]) {
                 newly_freed_comm_steps.push_back(entry);
-                cost_type comm_cost = dag.vertex_comm_weight(chosen_node) *
-                    schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first));
+                cost_type comm_cost
+                    = dag.vertex_comm_weight(chosen_node)
+                      * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first));
                 send_sum_of_newly_free_on_proc[std::get<1>(entry.first)] += comm_cost;
                 rec_sum_of_newly_free_on_proc[std::get<2>(entry.first)] += comm_cost;
             }
 
             // II. Add nodes on all other processors if this doesn't increase work cost
-            for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-            {
-                if(proc == chosen_proc)
+            for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
+                if (proc == chosen_proc) {
                     continue;
-                while(!proc_list[proc][step].empty() && work_done_on_proc[proc] +
-                        dag.vertex_work_weight(proc_list[proc][step].front()) <= max_work_done)
-                {
+                }
+                while (!proc_list[proc][step].empty()
+                       && work_done_on_proc[proc] + dag.vertex_work_weight(proc_list[proc][step].front()) <= max_work_done) {
                     vertex_idx node = proc_list[proc][step].front();
                     proc_list[proc][step].pop_front();
                     work_done_on_proc[proc] += dag.vertex_work_weight(node);
                     work_remaining_proc_superstep[proc][step] -= dag.vertex_work_weight(node);
                     schedule_max.setAssignedSuperstep(node, current_step);
                     --nodes_remaining_superstep[step];
-                    for(const std::pair<KeyTriple, unsigned>& entry : dependent_comm_steps_for_node[node])
-                    {
+                    for (const std::pair<KeyTriple, unsigned> &entry : dependent_comm_steps_for_node[node]) {
                         newly_freed_comm_steps.push_back(entry);
-                        cost_type comm_cost = dag.vertex_comm_weight(node) *
-                            schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first));
+                        cost_type comm_cost = dag.vertex_comm_weight(node)
+                                              * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first),
+                                                                                                   std::get<2>(entry.first));
                         send_sum_of_newly_free_on_proc[std::get<1>(entry.first)] += comm_cost;
                         rec_sum_of_newly_free_on_proc[std::get<2>(entry.first)] += comm_cost;
                     }
@@ -173,47 +166,49 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
             }
 
             // III. Add communication steps that are already available
-            for(auto itr = free_comm_steps_for_superstep[step].begin(); itr != free_comm_steps_for_superstep[step].end();)
-            {
-                if(send_on_proc[std::get<1>(itr->first)] < max_work_done &&
-                    rec_on_proc[std::get<2>(itr->first)] < max_work_done)
-                {
-                    cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(itr->first)) *
-                        schedule.getInstance().getArchitecture().sendCosts(std::get<1>(itr->first), std::get<2>(itr->first)) *
-                        schedule.getInstance().getArchitecture().communicationCosts();
+            for (auto itr = free_comm_steps_for_superstep[step].begin(); itr != free_comm_steps_for_superstep[step].end();) {
+                if (send_on_proc[std::get<1>(itr->first)] < max_work_done && rec_on_proc[std::get<2>(itr->first)] < max_work_done) {
+                    cost_type comm_cost
+                        = dag.vertex_comm_weight(std::get<0>(itr->first))
+                          * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(itr->first), std::get<2>(itr->first))
+                          * schedule.getInstance().getArchitecture().communicationCosts();
                     send_on_proc[std::get<1>(itr->first)] += comm_cost;
                     rec_on_proc[std::get<2>(itr->first)] += comm_cost;
-                    if(current_step - 1 >= schedule_max.numberOfSupersteps())
+                    if (current_step - 1 >= schedule_max.numberOfSupersteps()) {
                         schedule_max.setNumberOfSupersteps(current_step);
+                    }
                     schedule_max.addCommunicationScheduleEntry(itr->first, current_step - 1);
                     comm_in_current_step.emplace_back(*itr);
                     free_comm_steps_for_superstep[step].erase(itr++);
-                }
-                else
+                } else {
                     ++itr;
+                }
             }
 
             // IV. Decide whether to split superstep here
-            if(!free_comm_steps_for_superstep[step].empty() || nodes_remaining_superstep[step] == 0)
+            if (!free_comm_steps_for_superstep[step].empty() || nodes_remaining_superstep[step] == 0) {
                 continue;
+            }
 
             cost_type max_work_remaining = 0, max_comm_remaining = 0, comm_after_reduction = 0;
-            for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-            {
+            for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
                 max_work_remaining = std::max(max_work_remaining, work_remaining_proc_superstep[proc][step]);
                 max_comm_remaining = std::max(max_comm_remaining, send_comm_remaining_proc_superstep[proc][step]);
                 max_comm_remaining = std::max(max_comm_remaining, rec_comm_remaining_proc_superstep[proc][step]);
-                comm_after_reduction = std::max(comm_after_reduction, send_comm_remaining_proc_superstep[proc][step] - send_sum_of_newly_free_on_proc[proc]);
-                comm_after_reduction = std::max(comm_after_reduction, rec_comm_remaining_proc_superstep[proc][step] - rec_sum_of_newly_free_on_proc[proc]);
+                comm_after_reduction = std::max(
+                    comm_after_reduction, send_comm_remaining_proc_superstep[proc][step] - send_sum_of_newly_free_on_proc[proc]);
+                comm_after_reduction = std::max(
+                    comm_after_reduction, rec_comm_remaining_proc_superstep[proc][step] - rec_sum_of_newly_free_on_proc[proc]);
             }
-            cost_type comm_reduction = (max_comm_remaining - comm_after_reduction) * schedule.getInstance().getArchitecture().communicationCosts();
+            cost_type comm_reduction
+                = (max_comm_remaining - comm_after_reduction) * schedule.getInstance().getArchitecture().communicationCosts();
 
             cost_type gain = std::min(comm_reduction, max_work_remaining);
-            if(gain > 0 && static_cast<double>(gain) >= static_cast<double>(schedule.getInstance().getArchitecture().synchronisationCosts()) * latency_coefficient)
-            {
+            if (gain > 0
+                && static_cast<double>(gain) >= static_cast<double>(schedule.getInstance().getArchitecture().synchronisationCosts())
+                                                    * latency_coefficient) {
                 // Split superstep
-                for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-                {
+                for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
                     work_done_on_proc[proc] = 0;
                     send_on_proc[proc] = 0;
                     rec_on_proc[proc] = 0;
@@ -221,12 +216,12 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
                     rec_sum_of_newly_free_on_proc[proc] = 0;
                 }
                 max_work_done = 0;
-                for(const std::pair<KeyTriple, unsigned>& entry : newly_freed_comm_steps)
-                {
+                for (const std::pair<KeyTriple, unsigned> &entry : newly_freed_comm_steps) {
                     free_comm_steps_for_superstep[step].insert(entry);
 
-                    cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first)) *
-                        schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first));
+                    cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first))
+                                          * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first),
+                                                                                               std::get<2>(entry.first));
                     send_comm_remaining_proc_superstep[std::get<1>(entry.first)][step] -= comm_cost;
                     rec_comm_remaining_proc_superstep[std::get<2>(entry.first)][step] -= comm_cost;
                 }
@@ -236,19 +231,21 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
             }
         }
 
-        if(!empty_superstep)
+        if (!empty_superstep) {
             ++current_step;
+        }
 
-        for(const std::pair<KeyTriple, unsigned>& entry : newly_freed_comm_steps)
+        for (const std::pair<KeyTriple, unsigned> &entry : newly_freed_comm_steps) {
             free_comm_steps_for_superstep[step].insert(entry);
+        }
 
-        if(free_comm_steps_for_superstep[step].empty())
+        if (free_comm_steps_for_superstep[step].empty()) {
             continue;
+        }
 
         // Handle the remaining communication steps: creating a new superstep afterwards with no work
         cost_type max_comm_current = 0;
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
             max_comm_current = std::max(max_comm_current, send_on_proc[proc]);
             max_comm_current = std::max(max_comm_current, rec_on_proc[proc]);
         }
@@ -258,114 +255,118 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
         rec_on_proc.resize(schedule.getInstance().numberOfProcessors(), 0);
 
         std::set<std::pair<vertex_idx, unsigned>> late_arriving_nodes;
-        for(const std::pair<KeyTriple, unsigned>& entry : free_comm_steps_for_superstep[step])
-        {
+        for (const std::pair<KeyTriple, unsigned> &entry : free_comm_steps_for_superstep[step]) {
             schedule_max.addCommunicationScheduleEntry(entry.first, current_step - 1);
-            cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first)) *
-                schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) *
-                schedule.getInstance().getArchitecture().communicationCosts();
+            cost_type comm_cost
+                = dag.vertex_comm_weight(std::get<0>(entry.first))
+                  * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first))
+                  * schedule.getInstance().getArchitecture().communicationCosts();
             send_on_proc[std::get<1>(entry.first)] += comm_cost;
             rec_on_proc[std::get<2>(entry.first)] += comm_cost;
             late_arriving_nodes.emplace(std::get<0>(entry.first), std::get<2>(entry.first));
         }
 
-        // Edge case - check if it is worth moving all communications from the current superstep to the next one instead (thus saving a sync cost)
-        // (for this we need to compute the h-relation-max in the current superstep, the next superstep, and also their union)
+        // Edge case - check if it is worth moving all communications from the current superstep to the next one instead (thus
+        // saving a sync cost) (for this we need to compute the h-relation-max in the current superstep, the next superstep, and
+        // also their union)
         cost_type max_comm_after = 0;
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
             max_comm_after = std::max(max_comm_after, send_on_proc[proc]);
             max_comm_after = std::max(max_comm_after, rec_on_proc[proc]);
         }
 
-        for(const std::pair<KeyTriple, unsigned>& entry : comm_in_current_step)
-        {
-            cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first)) *
-                schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) *
-                schedule.getInstance().getArchitecture().communicationCosts();
+        for (const std::pair<KeyTriple, unsigned> &entry : comm_in_current_step) {
+            cost_type comm_cost
+                = dag.vertex_comm_weight(std::get<0>(entry.first))
+                  * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first))
+                  * schedule.getInstance().getArchitecture().communicationCosts();
             send_on_proc[std::get<1>(entry.first)] += comm_cost;
             rec_on_proc[std::get<2>(entry.first)] += comm_cost;
         }
         cost_type max_comm_together = 0;
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
             max_comm_together = std::max(max_comm_together, send_on_proc[proc]);
             max_comm_together = std::max(max_comm_together, rec_on_proc[proc]);
         }
 
         cost_type work_limit = max_comm_after;
-        if(max_comm_together + max_work_done <= max_comm_after + std::max(max_work_done, max_comm_current) + schedule.getInstance().getArchitecture().synchronisationCosts())
-        {
+        if (max_comm_together + max_work_done <= max_comm_after + std::max(max_work_done, max_comm_current)
+                                                     + schedule.getInstance().getArchitecture().synchronisationCosts()) {
             work_limit = max_comm_together;
-            for(const std::pair<KeyTriple, unsigned>& entry : comm_in_current_step)
-            {
-                if(current_step - 1 >= schedule_max.numberOfSupersteps())
+            for (const std::pair<KeyTriple, unsigned> &entry : comm_in_current_step) {
+                if (current_step - 1 >= schedule_max.numberOfSupersteps()) {
                     schedule_max.setNumberOfSupersteps(current_step);
+                }
                 schedule_max.addCommunicationScheduleEntry(entry.first, current_step - 1);
                 late_arriving_nodes.emplace(std::get<0>(entry.first), std::get<2>(entry.first));
             }
         }
 
         // Bring computation steps into the extra superstep from the next superstep, if possible,a s long as it does not increase cost
-        if(step == schedule.numberOfSupersteps() - 1)
+        if (step == schedule.numberOfSupersteps() - 1) {
             continue;
+        }
 
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
             cost_type work_so_far = 0;
             std::set<vertex_idx> brought_forward;
-            for(vertex_idx node : proc_list[proc][step+1])
-            {
-                if(work_so_far + dag.vertex_work_weight(node) > work_limit)
+            for (vertex_idx node : proc_list[proc][step + 1]) {
+                if (work_so_far + dag.vertex_work_weight(node) > work_limit) {
                     continue;
+                }
 
                 bool has_dependency = false;
 
-                for (const vertex_idx &parent : dag.parents(node))
-                {
-                    if(schedule.assignedProcessor(node) != schedule.assignedProcessor(parent) &&
-                        late_arriving_nodes.find(std::make_pair(parent, proc)) != late_arriving_nodes.end())
-                            has_dependency = true;
+                for (const vertex_idx &parent : dag.parents(node)) {
+                    if (schedule.assignedProcessor(node) != schedule.assignedProcessor(parent)
+                        && late_arriving_nodes.find(std::make_pair(parent, proc)) != late_arriving_nodes.end()) {
+                        has_dependency = true;
+                    }
 
-                    if(schedule.assignedProcessor(node) == schedule.assignedProcessor(parent) &&
-                        schedule.assignedSuperstep(parent) == step + 1 &&
-                        brought_forward.find(parent) == brought_forward.end())
-                            has_dependency = true;
+                    if (schedule.assignedProcessor(node) == schedule.assignedProcessor(parent)
+                        && schedule.assignedSuperstep(parent) == step + 1
+                        && brought_forward.find(parent) == brought_forward.end()) {
+                        has_dependency = true;
+                    }
                 }
 
-                if(has_dependency)
+                if (has_dependency) {
                     continue;
+                }
 
                 brought_forward.insert(node);
                 work_so_far += dag.vertex_work_weight(node);
                 schedule_max.setAssignedSuperstep(node, current_step);
-                work_remaining_proc_superstep[proc][step+1] -= dag.vertex_work_weight(node);
-                --nodes_remaining_superstep[step+1];
+                work_remaining_proc_superstep[proc][step + 1] -= dag.vertex_work_weight(node);
+                --nodes_remaining_superstep[step + 1];
 
-                for(const std::pair<KeyTriple, unsigned>& entry : dependent_comm_steps_for_node[node])
-                    free_comm_steps_for_superstep[step+1].insert(entry);
+                for (const std::pair<KeyTriple, unsigned> &entry : dependent_comm_steps_for_node[node]) {
+                    free_comm_steps_for_superstep[step + 1].insert(entry);
+                }
             }
 
             std::deque<vertex_idx> remaining;
-            for(vertex_idx node : proc_list[proc][step+1])
-                if(brought_forward.find(node) == brought_forward.end())
+            for (vertex_idx node : proc_list[proc][step + 1]) {
+                if (brought_forward.find(node) == brought_forward.end()) {
                     remaining.push_back(node);
+                }
+            }
 
-            proc_list[proc][step+1] = remaining;
+            proc_list[proc][step + 1] = remaining;
         }
 
         ++current_step;
+    }
 
-     }
-
-     return schedule_max;
+    return schedule_max;
 }
 
-// Auxiliary function: creates a separate vectors for each proc-supstep combination, collecting the nodes in a priority-based topological order
-template<typename Graph_t>
-std::vector<std::vector<std::deque<vertex_idx_t<Graph_t>>>> GreedyBspToMaxBspConverter<Graph_t>::createSuperstepLists(const BspScheduleCS<Graph_t>& schedule, std::vector<double>& priorities) const
-{
-    const Graph_t& dag = schedule.getInstance().getComputationalDag();
+// Auxiliary function: creates a separate vectors for each proc-supstep combination, collecting the nodes in a priority-based
+// topological order
+template <typename Graph_t>
+std::vector<std::vector<std::deque<vertex_idx_t<Graph_t>>>> GreedyBspToMaxBspConverter<Graph_t>::createSuperstepLists(
+    const BspScheduleCS<Graph_t> &schedule, std::vector<double> &priorities) const {
+    const Graph_t &dag = schedule.getInstance().getComputationalDag();
     std::vector<vertex_idx> top_order = GetTopOrder(dag);
     priorities.clear();
     priorities.resize(dag.num_vertices());
@@ -373,54 +374,58 @@ std::vector<std::vector<std::deque<vertex_idx_t<Graph_t>>>> GreedyBspToMaxBspCon
 
     // compute for each node the amount of dependent send cost in the same superstep
     std::vector<cost_type> comm_dependency(dag.num_vertices(), 0);
-    for (auto const &[key, val] : schedule.getCommunicationSchedule())
-        if(schedule.assignedSuperstep(std::get<0>(key)) == val)
-            comm_dependency[std::get<0>(key)] += dag.vertex_comm_weight(std::get<0>(key)) *
-                schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key));
+    for (auto const &[key, val] : schedule.getCommunicationSchedule()) {
+        if (schedule.assignedSuperstep(std::get<0>(key)) == val) {
+            comm_dependency[std::get<0>(key)]
+                += dag.vertex_comm_weight(std::get<0>(key))
+                   * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key));
+        }
+    }
 
     // assign priority to nodes - based on their own work/comm ratio, and that of its successors in the same proc/supstep
-    for (auto itr = top_order.rbegin(); itr != top_order.rend(); ++itr)
-    {
+    for (auto itr = top_order.rbegin(); itr != top_order.rend(); ++itr) {
         vertex_idx node = *itr;
         double base = static_cast<double>(dag.vertex_work_weight(node));
-        if(comm_dependency[node] > 0)
+        if (comm_dependency[node] > 0) {
             base /= static_cast<double>(2 * comm_dependency[node]);
+        }
 
         double successors = 0;
         unsigned num_children = 0;
-        for (const vertex_idx &child : dag.children(node))
-            if(schedule.assignedProcessor(node) == schedule.assignedProcessor(child) &&
-                schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child))
-            {
+        for (const vertex_idx &child : dag.children(node)) {
+            if (schedule.assignedProcessor(node) == schedule.assignedProcessor(child)
+                && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child)) {
                 ++num_children;
                 successors += priorities[child];
                 ++local_in_degree[child];
             }
-        if(num_children > 0)
+        }
+        if (num_children > 0) {
             successors = successors * decay_factor / static_cast<double>(num_children);
+        }
         priorities[node] = base + successors;
     }
 
     // create lists for each processor-superstep pair, in a topological order, sorted by priority
-    std::vector<std::vector<std::deque<vertex_idx>>> superstep_lists(schedule.getInstance().numberOfProcessors(),
-        std::vector<std::deque<vertex_idx>>(schedule.numberOfSupersteps()));
+    std::vector<std::vector<std::deque<vertex_idx>>> superstep_lists(
+        schedule.getInstance().numberOfProcessors(), std::vector<std::deque<vertex_idx>>(schedule.numberOfSupersteps()));
 
-    std::set<std::pair<double, vertex_idx> > free;
-    for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++)
-        if(local_in_degree[node]==0)
+    std::set<std::pair<double, vertex_idx>> free;
+    for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
+        if (local_in_degree[node] == 0) {
             free.emplace(priorities[node], node);
-    while(!free.empty())
-    {
+        }
+    }
+    while (!free.empty()) {
         vertex_idx node = free.begin()->second;
         free.erase(free.begin());
         superstep_lists[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)].push_back(node);
-        for (const vertex_idx &child : dag.children(node))
-        {
-            if(schedule.assignedProcessor(node) == schedule.assignedProcessor(child) &&
-                schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child))
-            {
-                if(--local_in_degree[child] == 0)
+        for (const vertex_idx &child : dag.children(node)) {
+            if (schedule.assignedProcessor(node) == schedule.assignedProcessor(child)
+                && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child)) {
+                if (--local_in_degree[child] == 0) {
                     free.emplace(priorities[child], child);
+                }
             }
         }
     }
@@ -428,4 +433,4 @@ std::vector<std::vector<std::deque<vertex_idx_t<Graph_t>>>> GreedyBspToMaxBspCon
     return superstep_lists;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp
index d55d9c82..9aeab9cc 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp
@@ -41,9 +41,8 @@ enum CilkMode { CILK, SJF };
  * a greedy scheduling algorithm for Cilk-based BSP (Bulk Synchronous Parallel) systems. The scheduler
  * selects the next node and processor to execute a task based on a greedy strategy.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class CilkScheduler : public Scheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "CilkScheduler can only be used with computational DAGs.");
 
   private:
@@ -61,42 +60,51 @@ class CilkScheduler : public Scheduler<Graph_t> {
 
     std::mt19937 gen;
 
-    void Choose(const BspInstance<Graph_t> &instance, std::vector<std::deque<vertex_idx_t<Graph_t>>> &procQueue,
-                const std::set<vertex_idx_t<Graph_t>> &readyNodes, const std::vector<bool> &procFree,
-                vertex_idx_t<Graph_t> &node, unsigned &p) {
+    void Choose(const BspInstance<Graph_t> &instance,
+                std::vector<std::deque<vertex_idx_t<Graph_t>>> &procQueue,
+                const std::set<vertex_idx_t<Graph_t>> &readyNodes,
+                const std::vector<bool> &procFree,
+                vertex_idx_t<Graph_t> &node,
+                unsigned &p) {
         if (mode == SJF) {
-
             node = *readyNodes.begin();
-            for (auto &r : readyNodes)
-                if (instance.getComputationalDag().vertex_work_weight(r) <
-                    instance.getComputationalDag().vertex_work_weight(node))
+            for (auto &r : readyNodes) {
+                if (instance.getComputationalDag().vertex_work_weight(r) < instance.getComputationalDag().vertex_work_weight(node)) {
                     node = r;
+                }
+            }
 
             p = 0;
-            for (; p < instance.numberOfProcessors(); ++p)
-                if (procFree[p])
+            for (; p < instance.numberOfProcessors(); ++p) {
+                if (procFree[p]) {
                     break;
+                }
+            }
 
         } else if (mode == CILK) {
-            for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
+            for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
                 if (procFree[i] && !procQueue[i].empty()) {
                     p = i;
                     node = procQueue[i].back();
                     procQueue[i].pop_back();
                     return;
                 }
+            }
 
             // Time to steal
-            for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
+            for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
                 if (procFree[i]) {
                     p = i;
                     break;
                 }
+            }
 
             std::vector<unsigned> canStealFrom;
-            for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-                if (!procQueue[i].empty())
+            for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+                if (!procQueue[i].empty()) {
                     canStealFrom.push_back(i);
+                }
+            }
 
             if (canStealFrom.empty()) {
                 node = std::numeric_limits<vertex_idx_t<Graph_t>>::max();
@@ -139,7 +147,6 @@ class CilkScheduler : public Scheduler<Graph_t> {
      * @return A pair containing the return status and the computed BSP schedule.
      */
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &bsp_schedule) override {
-
         // if constexpr (use_memory_constraint) {
         //     memory_constraint.initialize(instance);
         // }
@@ -166,8 +173,9 @@ class CilkScheduler : public Scheduler<Graph_t> {
 
         for (const auto &v : source_vertices_view(instance.getComputationalDag())) {
             ready.insert(v);
-            if (mode == CILK)
+            if (mode == CILK) {
                 procQueue[0].push_front(v);
+            }
         }
 
         while (!finishTimes.empty()) {
@@ -179,15 +187,13 @@ class CilkScheduler : public Scheduler<Graph_t> {
                 finishTimes.erase(finishTimes.begin());
                 const vertex_idx_t<Graph_t> &node = currentPair.second;
                 if (node != std::numeric_limits<vertex_idx_t<Graph_t>>::max()) {
-
                     for (const auto &succ : instance.getComputationalDag().children(node)) {
-
                         ++nrPredecDone[succ];
                         if (nrPredecDone[succ] == instance.getComputationalDag().in_degree(succ)) {
-
                             ready.insert(succ);
-                            if (mode == CILK)
+                            if (mode == CILK) {
                                 procQueue[schedule.proc[node]].push_back(succ);
+                            }
                         }
                     }
                     procFree[schedule.proc[node]] = true;
@@ -197,7 +203,6 @@ class CilkScheduler : public Scheduler<Graph_t> {
 
             // Assign new jobs to processors
             while (nrProcFree > 0 && !ready.empty()) {
-
                 unsigned nextProc = instance.numberOfProcessors();
                 vertex_idx_t<Graph_t> nextNode = std::numeric_limits<vertex_idx_t<Graph_t>>::max();
 
@@ -214,8 +219,9 @@ class CilkScheduler : public Scheduler<Graph_t> {
                 finishTimes.insert({time + instance.getComputationalDag().vertex_work_weight(nextNode), nextNode});
                 procFree[nextProc] = false;
 
-                if (nrProcFree > 0)
+                if (nrProcFree > 0) {
                     --nrProcFree;
+                }
 
                 greedyProcLists[nextProc].push_back(nextNode);
             }
@@ -252,19 +258,18 @@ class CilkScheduler : public Scheduler<Graph_t> {
      * @return The name of the schedule.
      */
     virtual std::string getScheduleName() const override {
-
         switch (mode) {
-        case CILK:
-            return "CilkGreedy";
-            break;
+            case CILK:
+                return "CilkGreedy";
+                break;
 
-        case SJF:
-            return "SJFGreedy";
+            case SJF:
+                return "SJFGreedy";
 
-        default:
-            return "UnknownModeGreedy";
+            default:
+                return "UnknownModeGreedy";
         }
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp b/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp
index 6f9d1e94..eceac14e 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp
@@ -32,7 +32,7 @@ namespace osp {
  *
  * This class stores the processor and time information for a schedule.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class CSchedule {
   private:
     using vertex_idx = vertex_idx_t<Graph_t>;
@@ -47,8 +47,7 @@ class CSchedule {
      * @param size The size of the schedule.
      */
     CSchedule(std::size_t size)
-        : proc(std::vector<unsigned>(size, std::numeric_limits<unsigned>::max())), time(std::vector<workw_t>(size, 0)) {
-    }
+        : proc(std::vector<unsigned>(size, std::numeric_limits<unsigned>::max())), time(std::vector<workw_t>(size, 0)) {}
 
     /**
      * @brief Converts the CSchedule object to a BspSchedule object.
@@ -60,9 +59,9 @@ class CSchedule {
     void convertToBspSchedule(const BspInstance<Graph_t> &instance,
                               const std::vector<std::deque<vertex_idx>> &procAssignmentLists,
                               BspSchedule<Graph_t> &bsp_schedule) {
-
-        for (const auto &v : instance.vertices())
+        for (const auto &v : instance.vertices()) {
             bsp_schedule.setAssignedProcessor(v, proc[v]);
+        }
 
         const vertex_idx N = instance.numberOfVertices();
         const unsigned P = instance.numberOfProcessors();
@@ -72,8 +71,9 @@ class CSchedule {
 
         std::vector<decltype(procAssignmentLists[0].cbegin())> done(P), limit(P);
 
-        for (unsigned j = 0; j < P; ++j)
+        for (unsigned j = 0; j < P; ++j) {
             done[j] = procAssignmentLists[j].cbegin();
+        }
 
         while (totalNodesDone < N) {
             // create next superstep
@@ -84,31 +84,34 @@ class CSchedule {
                     bool cut = false;
 
                     for (const auto &source : instance.getComputationalDag().parents(node)) {
-                        if (!processed[source] && proc[source] != proc[node])
+                        if (!processed[source] && proc[source] != proc[node]) {
                             cut = true;
+                        }
                     }
 
-                    if (cut)
+                    if (cut) {
                         break;
+                    }
                 }
-                if (limit[j] != procAssignmentLists[j].end() && time[*limit[j]] < timeLimit)
+                if (limit[j] != procAssignmentLists[j].end() && time[*limit[j]] < timeLimit) {
                     timeLimit = time[*limit[j]];
+                }
             }
 
-            for (unsigned j = 0; j < P; ++j)
-                for (; done[j] != limit[j] && (time[*done[j]] < timeLimit ||
-                                               (time[*done[j]] == timeLimit &&
-                                                instance.getComputationalDag().vertex_work_weight(*done[j]) == 0));
+            for (unsigned j = 0; j < P; ++j) {
+                for (; done[j] != limit[j]
+                       && (time[*done[j]] < timeLimit
+                           || (time[*done[j]] == timeLimit && instance.getComputationalDag().vertex_work_weight(*done[j]) == 0));
                      ++done[j]) {
                     processed[*done[j]] = true;
                     bsp_schedule.setAssignedSuperstep(*done[j], superStepIdx);
                     ++totalNodesDone;
                 }
+            }
 
             ++superStepIdx;
         }
-       
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp
index fa05ebfd..78a573c5 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp
@@ -47,28 +47,25 @@ enum EtfMode { ETF, BL_EST };
  * each processor. The algorithm selects the task with the earliest EST and assigns it to the processor with the
  * earliest available start time. The process is repeated until all tasks are scheduled.
  */
-template<typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
+template <typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
 class EtfScheduler : public Scheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "EtfScheduler can only be used with computational DAGs.");
 
     static_assert(std::is_convertible_v<v_commw_t<Graph_t>, v_workw_t<Graph_t>>,
                   "EtfScheduler requires that work and communication weights are convertible.");
 
-    static_assert(not has_edge_weights_v<Graph_t> ||
-                      std::is_convertible_v<e_commw_t<Graph_t>, v_workw_t<Graph_t>>,
+    static_assert(not has_edge_weights_v<Graph_t> || std::is_convertible_v<e_commw_t<Graph_t>, v_workw_t<Graph_t>>,
                   "EtfScheduler requires that work and communication weights are convertible.");
 
   private:
     using tv_pair = std::pair<v_workw_t<Graph_t>, vertex_idx_t<Graph_t>>;
 
-    EtfMode mode;  // The mode of the scheduler (ETF or BL_EST)
-    bool use_numa; // Flag indicating whether to use NUMA-aware scheduling
+    EtfMode mode;     // The mode of the scheduler (ETF or BL_EST)
+    bool use_numa;    // Flag indicating whether to use NUMA-aware scheduling
 
     constexpr static bool use_memory_constraint = is_memory_constraint_v<MemoryConstraint_t>;
 
-    static_assert(not use_memory_constraint ||
-                      std::is_same_v<MemoryConstraint_t, persistent_transient_memory_constraint<Graph_t>>,
+    static_assert(not use_memory_constraint || std::is_same_v<MemoryConstraint_t, persistent_transient_memory_constraint<Graph_t>>,
                   "EtfScheduler implements only persistent_transient_memory_constraint.");
 
     MemoryConstraint_t memory_constraint;
@@ -81,7 +78,6 @@ class EtfScheduler : public Scheduler<Graph_t> {
      * @return A vector containing the bottom level of each task.
      */
     std::vector<v_workw_t<Graph_t>> ComputeBottomLevel(const BspInstance<Graph_t> &instance) const {
-
         std::vector<v_workw_t<Graph_t>> BL(instance.numberOfVertices(), 0);
 
         const std::vector<vertex_idx_t<Graph_t>> topOrder = GetTopOrder(instance.getComputationalDag());
@@ -93,11 +89,9 @@ class EtfScheduler : public Scheduler<Graph_t> {
             v_workw_t<Graph_t> maxval = 0;
 
             if constexpr (has_edge_weights_v<Graph_t>) {
-
                 for (const auto &out_edge : out_edges(node, instance.getComputationalDag())) {
-
-                    const v_workw_t<Graph_t> tmp_val = BL[target(out_edge, instance.getComputationalDag())] +
-                                                       instance.getComputationalDag().edge_comm_weight(out_edge);
+                    const v_workw_t<Graph_t> tmp_val = BL[target(out_edge, instance.getComputationalDag())]
+                                                       + instance.getComputationalDag().edge_comm_weight(out_edge);
 
                     if (tmp_val > maxval) {
                         maxval = tmp_val;
@@ -105,11 +99,8 @@ class EtfScheduler : public Scheduler<Graph_t> {
                 }
 
             } else {
-
                 for (const auto &child : instance.getComputationalDag().children(node)) {
-
-                    const v_workw_t<Graph_t> tmp_val =
-                        BL[child] + instance.getComputationalDag().vertex_comm_weight(child);
+                    const v_workw_t<Graph_t> tmp_val = BL[child] + instance.getComputationalDag().vertex_comm_weight(child);
 
                     if (tmp_val > maxval) {
                         maxval = tmp_val;
@@ -123,20 +114,16 @@ class EtfScheduler : public Scheduler<Graph_t> {
     }
 
     bool check_mem_feasibility(const BspInstance<Graph_t> &instance, const std::set<tv_pair> &ready) const {
-
         if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) {
-
             if (ready.empty()) {
                 return true;
             }
 
             for (const auto &node_pair : ready) {
                 for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
-
                     const auto node = node_pair.second;
 
                     if constexpr (use_memory_constraint) {
-
                         if (memory_constraint.can_add(node, i)) {
                             return true;
                         }
@@ -163,11 +150,13 @@ class EtfScheduler : public Scheduler<Graph_t> {
      * @param avg_ The average execution time of the tasks.
      * @return The earliest start time (EST) for the task on the processor.
      */
-    v_workw_t<Graph_t> GetESTforProc(const BspInstance<Graph_t> &instance, CSchedule<Graph_t> &schedule,
-                                     vertex_idx_t<Graph_t> node, unsigned proc,
-                                     const v_workw_t<Graph_t> procAvailableFrom, std::vector<v_workw_t<Graph_t>> &send,
+    v_workw_t<Graph_t> GetESTforProc(const BspInstance<Graph_t> &instance,
+                                     CSchedule<Graph_t> &schedule,
+                                     vertex_idx_t<Graph_t> node,
+                                     unsigned proc,
+                                     const v_workw_t<Graph_t> procAvailableFrom,
+                                     std::vector<v_workw_t<Graph_t>> &send,
                                      std::vector<v_workw_t<Graph_t>> &rec) const {
-
         std::vector<tv_pair> predec;
         for (const auto &pred : instance.getComputationalDag().parents(node)) {
             predec.emplace_back(schedule.time[pred] + instance.getComputationalDag().vertex_work_weight(pred), pred);
@@ -177,22 +166,19 @@ class EtfScheduler : public Scheduler<Graph_t> {
 
         v_workw_t<Graph_t> EST = procAvailableFrom;
         for (const auto &next : predec) {
-            v_workw_t<Graph_t> t =
-                schedule.time[next.second] + instance.getComputationalDag().vertex_work_weight(next.second);
+            v_workw_t<Graph_t> t = schedule.time[next.second] + instance.getComputationalDag().vertex_work_weight(next.second);
             if (schedule.proc[next.second] != proc) {
                 t = std::max(t, send[schedule.proc[next.second]]);
                 t = std::max(t, rec[proc]);
 
                 if constexpr (has_edge_weights_v<Graph_t>) {
-
                     t += instance.getComputationalDag().edge_comm_weight(
-                             edge_desc(next.second, node, instance.getComputationalDag()).first) *
-                         instance.sendCosts(schedule.proc[next.second], proc);
+                             edge_desc(next.second, node, instance.getComputationalDag()).first)
+                         * instance.sendCosts(schedule.proc[next.second], proc);
 
                 } else {
-
-                    t += instance.getComputationalDag().vertex_comm_weight(next.second) *
-                         instance.sendCosts(schedule.proc[next.second], proc);
+                    t += instance.getComputationalDag().vertex_comm_weight(next.second)
+                         * instance.sendCosts(schedule.proc[next.second], proc);
                 }
 
                 send[schedule.proc[next.second]] = t;
@@ -215,20 +201,19 @@ class EtfScheduler : public Scheduler<Graph_t> {
      * @param avg_ The average execution time of the tasks.
      * @return A triple containing the best EST, the node index, and the processor index.
      */
-    tv_pair GetBestESTforNodes(const BspInstance<Graph_t> &instance, CSchedule<Graph_t> &schedule,
+    tv_pair GetBestESTforNodes(const BspInstance<Graph_t> &instance,
+                               CSchedule<Graph_t> &schedule,
                                const std::vector<vertex_idx_t<Graph_t>> &nodeList,
                                const std::vector<v_workw_t<Graph_t>> &procAvailableFrom,
-                               std::vector<v_workw_t<Graph_t>> &send, std::vector<v_workw_t<Graph_t>> &rec,
+                               std::vector<v_workw_t<Graph_t>> &send,
+                               std::vector<v_workw_t<Graph_t>> &rec,
                                unsigned &bestProc) const {
-
         v_workw_t<Graph_t> bestEST = std::numeric_limits<v_workw_t<Graph_t>>::max();
         vertex_idx_t<Graph_t> bestNode = 0;
         std::vector<v_workw_t<Graph_t>> bestSend, bestRec;
-        for (const auto &node : nodeList)
+        for (const auto &node : nodeList) {
             for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) {
-
                 if constexpr (use_memory_constraint) {
-
                     if (not memory_constraint.can_add(node, j)) {
                         continue;
                     }
@@ -236,8 +221,7 @@ class EtfScheduler : public Scheduler<Graph_t> {
 
                 std::vector<v_workw_t<Graph_t>> newSend = send;
                 std::vector<v_workw_t<Graph_t>> newRec = rec;
-                v_workw_t<Graph_t> EST =
-                    GetESTforProc(instance, schedule, node, j, procAvailableFrom[j], newSend, newRec);
+                v_workw_t<Graph_t> EST = GetESTforProc(instance, schedule, node, j, procAvailableFrom[j], newSend, newRec);
                 if (EST < bestEST) {
                     bestEST = EST;
                     bestProc = j;
@@ -246,6 +230,7 @@ class EtfScheduler : public Scheduler<Graph_t> {
                     bestRec = newRec;
                 }
             }
+        }
 
         send = bestSend;
         rec = bestRec;
@@ -273,7 +258,6 @@ class EtfScheduler : public Scheduler<Graph_t> {
      * @return A pair containing the return status and the computed BspSchedule object.
      */
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &bsp_schedule) override {
-
         const auto &instance = bsp_schedule.getInstance();
 
         if constexpr (use_memory_constraint) {
@@ -286,14 +270,15 @@ class EtfScheduler : public Scheduler<Graph_t> {
 
         std::vector<vertex_idx_t<Graph_t>> predecProcessed(instance.numberOfVertices(), 0);
 
-        std::vector<v_workw_t<Graph_t>> finishTimes(instance.numberOfProcessors(), 0),
-            send(instance.numberOfProcessors(), 0), rec(instance.numberOfProcessors(), 0);
+        std::vector<v_workw_t<Graph_t>> finishTimes(instance.numberOfProcessors(), 0), send(instance.numberOfProcessors(), 0),
+            rec(instance.numberOfProcessors(), 0);
 
         std::vector<v_workw_t<Graph_t>> BL;
-        if (mode == BL_EST)
+        if (mode == BL_EST) {
             BL = ComputeBottomLevel(instance);
-        else
+        } else {
             BL = std::vector<v_workw_t<Graph_t>>(instance.numberOfVertices(), 0);
+        }
 
         std::set<tv_pair> ready;
 
@@ -302,12 +287,10 @@ class EtfScheduler : public Scheduler<Graph_t> {
         }
 
         while (!ready.empty()) {
-
             tv_pair best_tv(0, 0);
             unsigned best_proc = 0;
 
             if (mode == BL_EST) {
-
                 std::vector<vertex_idx_t<Graph_t>> nodeList{ready.begin()->second};
                 ready.erase(ready.begin());
                 best_tv = GetBestESTforNodes(instance, schedule, nodeList, finishTimes, send, rec, best_proc);
@@ -315,8 +298,9 @@ class EtfScheduler : public Scheduler<Graph_t> {
 
             if (mode == ETF) {
                 std::vector<vertex_idx_t<Graph_t>> nodeList;
-                for (const auto &next : ready)
+                for (const auto &next : ready) {
                     nodeList.push_back(next.second);
+                }
                 best_tv = GetBestESTforNodes(instance, schedule, nodeList, finishTimes, send, rec, best_proc);
                 ready.erase(tv_pair({0, best_tv.second}));
             }
@@ -334,12 +318,12 @@ class EtfScheduler : public Scheduler<Graph_t> {
 
             for (const auto &succ : instance.getComputationalDag().children(node)) {
                 ++predecProcessed[succ];
-                if (predecProcessed[succ] == instance.getComputationalDag().in_degree(succ))
+                if (predecProcessed[succ] == instance.getComputationalDag().in_degree(succ)) {
                     ready.insert({BL[succ], succ});
+                }
             }
 
             if constexpr (use_memory_constraint) {
-
                 if (not check_mem_feasibility(instance, ready)) {
                     return RETURN_STATUS::ERROR;
                 }
@@ -386,16 +370,16 @@ class EtfScheduler : public Scheduler<Graph_t> {
      */
     virtual std::string getScheduleName() const override {
         switch (mode) {
-        case ETF:
-            return "ETFGreedy";
+            case ETF:
+                return "ETFGreedy";
 
-        case BL_EST:
-            return "BL-ESTGreedy";
+            case BL_EST:
+                return "BL-ESTGreedy";
 
-        default:
-            return "UnknownModeGreedy";
+            default:
+                return "UnknownModeGreedy";
         }
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp
index be196e28..c67389aa 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp
@@ -31,6 +31,7 @@ limitations under the License.
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 
 namespace osp {
+
 /**
  * @brief The GreedyBspScheduler class represents a scheduler that uses a greedy algorithm to compute schedules for
  * BspInstance.
@@ -39,16 +40,15 @@ namespace osp {
  * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm.
  * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case.
  */
-template<typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
+template <typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
 class GreedyBspScheduler : public Scheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "GreedyBspScheduler can only be used with computational DAGs.");
 
   private:
     using VertexType = vertex_idx_t<Graph_t>;
 
-    constexpr static bool use_memory_constraint =
-        is_memory_constraint_v<MemoryConstraint_t> or is_memory_constraint_schedule_v<MemoryConstraint_t>;
+    constexpr static bool use_memory_constraint = is_memory_constraint_v<MemoryConstraint_t>
+                                                  or is_memory_constraint_schedule_v<MemoryConstraint_t>;
 
     static_assert(not use_memory_constraint or std::is_same_v<Graph_t, typename MemoryConstraint_t::Graph_impl_t>,
                   "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t.");
@@ -56,17 +56,15 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
     MemoryConstraint_t memory_constraint;
 
     struct heap_node {
-
         VertexType node;
 
         double score;
 
         heap_node() : node(0), score(0) {}
+
         heap_node(VertexType node_arg, double score_arg) : node(node_arg), score(score_arg) {}
 
-        bool operator<(heap_node const &rhs) const {
-            return (score < rhs.score) || (score <= rhs.score and node < rhs.node);
-        }
+        bool operator<(heap_node const &rhs) const { return (score < rhs.score) || (score <= rhs.score and node < rhs.node); }
     };
 
     std::vector<boost::heap::fibonacci_heap<heap_node>> max_proc_score_heap;
@@ -80,29 +78,29 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
     float max_percent_idle_processors;
     bool increase_parallelism_in_new_superstep;
 
-    double computeScore(VertexType node, unsigned proc, const std::vector<std::vector<bool>> &procInHyperedge,
+    double computeScore(VertexType node,
+                        unsigned proc,
+                        const std::vector<std::vector<bool>> &procInHyperedge,
                         const BspInstance<Graph_t> &instance) const {
-
         double score = 0;
         for (const auto &pred : instance.getComputationalDag().parents(node)) {
-
             if (procInHyperedge[pred][proc]) {
-                score += static_cast<double>(instance.getComputationalDag().vertex_comm_weight(pred)) /
-                         static_cast<double>(instance.getComputationalDag().out_degree(pred));
+                score += static_cast<double>(instance.getComputationalDag().vertex_comm_weight(pred))
+                         / static_cast<double>(instance.getComputationalDag().out_degree(pred));
             }
         }
         return score;
     }
 
-    void Choose(const BspInstance<Graph_t> &instance, const std::vector<std::set<VertexType>> &procReady,
-                const std::vector<bool> &procFree, VertexType &node, unsigned &p) const {
-
+    void Choose(const BspInstance<Graph_t> &instance,
+                const std::vector<std::set<VertexType>> &procReady,
+                const std::vector<bool> &procFree,
+                VertexType &node,
+                unsigned &p) const {
         double max_score = -1.0;
 
         for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
-
             if (procFree[proc] && !procReady[proc].empty()) {
-
                 // select node
                 heap_node top_node = max_proc_score_heap[proc].top();
 
@@ -116,24 +114,21 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
         }
 
         for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
-            if (!procFree[proc] or max_all_proc_score_heap[proc].empty())
+            if (!procFree[proc] or max_all_proc_score_heap[proc].empty()) {
                 continue;
+            }
 
             heap_node top_node = max_all_proc_score_heap[proc].top();
 
             if (top_node.score > max_score) {
-
                 if constexpr (use_memory_constraint) {
-
                     if (memory_constraint.can_add(top_node.node, proc)) {
-
                         max_score = top_node.score;
                         node = top_node.node;
                         p = proc;
                     }
 
                 } else {
-
                     max_score = top_node.score;
                     node = top_node.node;
                     p = proc;
@@ -142,32 +137,36 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
         }
     };
 
-    bool CanChooseNode(const BspInstance<Graph_t> &instance, const std::set<VertexType> &allReady,
-                       const std::vector<std::set<VertexType>> &procReady, const std::vector<bool> &procFree) const {
-        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-            if (procFree[i] && !procReady[i].empty())
+    bool CanChooseNode(const BspInstance<Graph_t> &instance,
+                       const std::set<VertexType> &allReady,
+                       const std::vector<std::set<VertexType>> &procReady,
+                       const std::vector<bool> &procFree) const {
+        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+            if (procFree[i] && !procReady[i].empty()) {
                 return true;
+            }
+        }
 
-        if (!allReady.empty())
-            for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-                if (procFree[i])
+        if (!allReady.empty()) {
+            for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+                if (procFree[i]) {
                     return true;
+                }
+            }
+        }
 
         return false;
     };
 
-    bool check_mem_feasibility(const BspInstance<Graph_t> &instance, const std::set<VertexType> &allReady,
+    bool check_mem_feasibility(const BspInstance<Graph_t> &instance,
+                               const std::set<VertexType> &allReady,
                                const std::vector<std::set<VertexType>> &procReady) const {
-
         if constexpr (use_memory_constraint) {
-
             if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) {
-
                 unsigned num_empty_proc = 0;
 
                 for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
                     if (!procReady[i].empty()) {
-
                         const heap_node &top_node = max_proc_score_heap[i].top();
 
                         // todo check if this is correct
@@ -183,9 +182,8 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
                     return true;
                 }
 
-                if (!allReady.empty())
+                if (!allReady.empty()) {
                     for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
-
                         const heap_node &top_node = max_all_proc_score_heap[i].top();
 
                         // todo check if this is correct
@@ -193,6 +191,7 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
                             return true;
                         }
                     }
+                }
 
                 return false;
             }
@@ -207,14 +206,16 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
 
         std::vector<unsigned> ready_nodes_per_type = nr_ready_nodes_per_type;
         std::vector<unsigned> procs_per_type = nr_procs_per_type;
-        for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type)
-            for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type)
+        for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) {
+            for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) {
                 if (instance.isCompatibleType(node_type, proc_type)) {
                     unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]);
                     nr_nodes += matched;
                     ready_nodes_per_type[node_type] -= matched;
                     procs_per_type[proc_type] -= matched;
                 }
+            }
+        }
 
         return nr_nodes;
     }
@@ -241,7 +242,6 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
      * @return A pair containing the return status and the computed BspSchedule.
      */
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         const auto &instance = schedule.getInstance();
 
         for (const auto &v : instance.getComputationalDag().vertices()) {
@@ -268,8 +268,7 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
 
         std::set<VertexType> ready;
 
-        std::vector<std::vector<bool>> procInHyperedge =
-            std::vector<std::vector<bool>>(N, std::vector<bool>(params_p, false));
+        std::vector<std::vector<bool>> procInHyperedge = std::vector<std::vector<bool>>(N, std::vector<bool>(params_p, false));
 
         std::vector<std::set<VertexType>> procReady(params_p);
         std::set<VertexType> allReady;
@@ -280,8 +279,9 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
 
         std::vector<unsigned> nr_ready_nodes_per_type(G.num_vertex_types(), 0);
         std::vector<unsigned> nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < params_p; ++proc)
+        for (unsigned proc = 0; proc < params_p; ++proc) {
             ++nr_procs_per_type[instance.getArchitecture().processorType(proc)];
+        }
 
         std::set<std::pair<v_workw_t<Graph_t>, VertexType>> finishTimes;
         finishTimes.emplace(0, std::numeric_limits<VertexType>::max());
@@ -301,7 +301,6 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
 
         bool endSupStep = false;
         while (!ready.empty() || !finishTimes.empty()) {
-
             if (finishTimes.empty() && endSupStep) {
                 for (unsigned proc = 0; proc < params_p; ++proc) {
                     procReady[proc].clear();
@@ -322,9 +321,9 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
 
                 for (const auto &v : ready) {
                     for (unsigned proc = 0; proc < params_p; ++proc) {
-
-                        if (!instance.isCompatible(v, proc))
+                        if (!instance.isCompatible(v, proc)) {
                             continue;
+                        }
 
                         double score = computeScore(v, proc, procInHyperedge, instance);
                         heap_node new_node(v, score);
@@ -343,13 +342,11 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
 
             // Find new ready jobs
             while (!finishTimes.empty() && finishTimes.begin()->first == time) {
-
                 const VertexType node = finishTimes.begin()->second;
                 finishTimes.erase(finishTimes.begin());
 
                 if (node != std::numeric_limits<VertexType>::max()) {
                     for (const auto &succ : G.children(node)) {
-
                         ++nrPredecDone[succ];
                         if (nrPredecDone[succ] == G.in_degree(succ)) {
                             ready.insert(succ);
@@ -357,34 +354,33 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
 
                             bool canAdd = true;
                             for (const auto &pred : G.parents(succ)) {
-
-                                if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) &&
-                                    schedule.assignedSuperstep(pred) == supstepIdx) {
+                                if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node)
+                                    && schedule.assignedSuperstep(pred) == supstepIdx) {
                                     canAdd = false;
                                     break;
                                 }
                             }
 
                             if constexpr (use_memory_constraint) {
-
                                 if (canAdd) {
-                                    if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node)))
+                                    if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) {
                                         canAdd = false;
+                                    }
                                 }
                             }
 
-                            if (!instance.isCompatible(succ, schedule.assignedProcessor(node)))
+                            if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) {
                                 canAdd = false;
+                            }
 
                             if (canAdd) {
                                 procReady[schedule.assignedProcessor(node)].insert(succ);
 
-                                double score =
-                                    computeScore(succ, schedule.assignedProcessor(node), procInHyperedge, instance);
+                                double score = computeScore(succ, schedule.assignedProcessor(node), procInHyperedge, instance);
 
                                 heap_node new_node(succ, score);
-                                node_proc_heap_handles[schedule.assignedProcessor(node)][succ] =
-                                    max_proc_score_heap[schedule.assignedProcessor(node)].push(new_node);
+                                node_proc_heap_handles[schedule.assignedProcessor(node)][succ]
+                                    = max_proc_score_heap[schedule.assignedProcessor(node)].push(new_node);
                             }
                         }
                     }
@@ -393,8 +389,9 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
                 }
             }
 
-            if (endSupStep)
+            if (endSupStep) {
                 continue;
+            }
 
             // Assign new jobs to processors
             if (!CanChooseNode(instance, allReady, procReady, procFree)) {
@@ -402,7 +399,6 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
             }
 
             while (CanChooseNode(instance, allReady, procReady, procFree)) {
-
                 VertexType nextNode = std::numeric_limits<VertexType>::max();
                 unsigned nextProc = instance.numberOfProcessors();
                 Choose(instance, procReady, procFree, nextNode, nextProc);
@@ -413,14 +409,12 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
                 }
 
                 if (procReady[nextProc].find(nextNode) != procReady[nextProc].end()) {
-
                     procReady[nextProc].erase(nextNode);
 
                     max_proc_score_heap[nextProc].erase(node_proc_heap_handles[nextProc][nextNode]);
                     node_proc_heap_handles[nextProc].erase(nextNode);
 
                 } else {
-
                     allReady.erase(nextNode);
 
                     for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
@@ -461,7 +455,6 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
                 procInHyperedge[nextNode][nextProc] = true;
 
                 for (const auto &pred : G.parents(nextNode)) {
-
                     if (procInHyperedge[pred][nextProc]) {
                         continue;
                     }
@@ -469,21 +462,17 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
                     procInHyperedge[pred][nextProc] = true;
 
                     for (const auto &child : G.children(pred)) {
-
                         if (child != nextNode && procReady[nextProc].find(child) != procReady[nextProc].end()) {
-
-                            (*node_proc_heap_handles[nextProc][child]).score +=
-                                static_cast<double>(instance.getComputationalDag().vertex_comm_weight(pred)) /
-                                static_cast<double>(instance.getComputationalDag().out_degree(pred));
+                            (*node_proc_heap_handles[nextProc][child]).score
+                                += static_cast<double>(instance.getComputationalDag().vertex_comm_weight(pred))
+                                   / static_cast<double>(instance.getComputationalDag().out_degree(pred));
                             max_proc_score_heap[nextProc].update(node_proc_heap_handles[nextProc][child]);
                         }
 
-                        if (child != nextNode && allReady.find(child) != allReady.end() &&
-                            instance.isCompatible(child, nextProc)) {
-
-                            (*node_all_proc_heap_handles[nextProc][child]).score +=
-                                static_cast<double>(instance.getComputationalDag().vertex_comm_weight(pred)) /
-                                static_cast<double>(instance.getComputationalDag().out_degree(pred));
+                        if (child != nextNode && allReady.find(child) != allReady.end() && instance.isCompatible(child, nextProc)) {
+                            (*node_all_proc_heap_handles[nextProc][child]).score
+                                += static_cast<double>(instance.getComputationalDag().vertex_comm_weight(pred))
+                                   / static_cast<double>(instance.getComputationalDag().out_degree(pred));
                             max_all_proc_score_heap[nextProc].update(node_all_proc_heap_handles[nextProc][child]);
                         }
                     }
@@ -491,18 +480,16 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
             }
 
             if constexpr (use_memory_constraint) {
-
                 if (not check_mem_feasibility(instance, allReady, procReady)) {
-
                     return RETURN_STATUS::ERROR;
                 }
             }
 
-            if (free > static_cast<unsigned>(static_cast<float>(params_p) * max_percent_idle_processors) &&
-                ((!increase_parallelism_in_new_superstep) ||
-                 get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) >=
-                     std::min(std::min(params_p, static_cast<unsigned>(1.2 * (params_p - free))),
-                              params_p - free + (static_cast<unsigned>(0.5 * free))))) {
+            if (free > static_cast<unsigned>(static_cast<float>(params_p) * max_percent_idle_processors)
+                && ((!increase_parallelism_in_new_superstep)
+                    || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type)
+                           >= std::min(std::min(params_p, static_cast<unsigned>(1.2 * (params_p - free))),
+                                       params_p - free + (static_cast<unsigned>(0.5 * free))))) {
                 endSupStep = true;
             }
         }
@@ -522,4 +509,4 @@ class GreedyBspScheduler : public Scheduler<Graph_t> {
     std::string getScheduleName() const override { return "BspGreedy"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp
index 9b3880b9..57b043dc 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp
@@ -28,17 +28,15 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class GreedyChildren : public Scheduler<Graph_t> {
   private:
     bool ensure_enough_sources;
 
   public:
-
     GreedyChildren(bool ensure_enough_sources_ = true) : Scheduler<Graph_t>(), ensure_enough_sources(ensure_enough_sources_) {};
 
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &sched) override {
-
         using VertexType = vertex_idx_t<Graph_t>;
         const auto &instance = sched.getInstance();
 
@@ -87,8 +85,9 @@ class GreedyChildren : public Scheduler<Graph_t> {
                         }
                     }
 
-                    if (failed_to_allocate)
+                    if (failed_to_allocate) {
                         continue;
+                    }
 
                     sched.setAssignedSuperstep(node, superstep_counter);
                     if (processor_set) {
@@ -123,8 +122,9 @@ class GreedyChildren : public Scheduler<Graph_t> {
                     node_added = true;
                     break;
                 }
-                if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors())
+                if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors()) {
                     break;
+                }
             }
 
             superstep_counter++;
@@ -133,9 +133,7 @@ class GreedyChildren : public Scheduler<Graph_t> {
         return RETURN_STATUS::OSP_SUCCESS;
     }
 
-    std::string getScheduleName() const override {
-        return ensure_enough_sources ? "GreedyChildrenS" : "GreedyChildren";
-    }
+    std::string getScheduleName() const override { return ensure_enough_sources ? "GreedyChildrenS" : "GreedyChildren"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp
index 890f779c..9aff8fb3 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp
@@ -18,11 +18,12 @@ limitations under the License.
 
 #pragma once
 
+#include <string>
+#include <vector>
+
 #include "osp/bsp/model/cost/LazyCommunicationCost.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/bsp/scheduler/Serial.hpp"
-#include <string>
-#include <vector>
 
 namespace osp {
 
@@ -38,9 +39,8 @@ namespace osp {
  * @tparam Graph_t The graph type representing the computational DAG.
  * @tparam CostModel The cost model functor to evaluate schedules. Defaults to LazyCommunicationCost.
  */
-template<typename Graph_t, typename CostModel = LazyCommunicationCost<Graph_t>>
+template <typename Graph_t, typename CostModel = LazyCommunicationCost<Graph_t>>
 class GreedyMetaScheduler : public Scheduler<Graph_t> {
-
     Serial<Graph_t> serial_scheduler_;
     std::vector<Scheduler<Graph_t> *> schedulers_;
 
@@ -58,13 +58,16 @@ class GreedyMetaScheduler : public Scheduler<Graph_t> {
     ~GreedyMetaScheduler() override = default;
 
     void addSerialScheduler() { schedulers_.push_back(&serial_scheduler_); }
+
     void addScheduler(Scheduler<Graph_t> &s) { schedulers_.push_back(&s); }
+
     void resetScheduler() { schedulers_.clear(); }
 
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
         if (schedule.getInstance().getArchitecture().numberOfProcessors() == 1) {
-            if constexpr (verbose)
+            if constexpr (verbose) {
                 std::cout << "Using serial scheduler for P=1." << std::endl;
+            }
             serial_scheduler_.computeSchedule(schedule);
             return RETURN_STATUS::OSP_SUCCESS;
         }
@@ -76,14 +79,17 @@ class GreedyMetaScheduler : public Scheduler<Graph_t> {
             scheduler->computeSchedule(current_schedule);
             const v_workw_t<Graph_t> schedule_cost = CostModel()(current_schedule);
 
-            if constexpr (verbose)
-                std::cout << "Executed scheduler " << scheduler->getScheduleName() << ", costs: " << schedule_cost << ", nr. supersteps: " << current_schedule.numberOfSupersteps() << std::endl;
+            if constexpr (verbose) {
+                std::cout << "Executed scheduler " << scheduler->getScheduleName() << ", costs: " << schedule_cost
+                          << ", nr. supersteps: " << current_schedule.numberOfSupersteps() << std::endl;
+            }
 
             if (schedule_cost < best_schedule_cost) {
                 best_schedule_cost = schedule_cost;
                 schedule = current_schedule;
-                if constexpr (verbose)
+                if constexpr (verbose) {
                     std::cout << "New best schedule!" << std::endl;
+                }
             }
         }
 
@@ -93,4 +99,4 @@ class GreedyMetaScheduler : public Scheduler<Graph_t> {
     std::string getScheduleName() const override { return "GreedyMetaScheduler"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp
index bb88ad78..5f943110 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp
@@ -21,198 +21,202 @@ limitations under the License.
 #include "osp/bsp/model/BspScheduleRecomp.hpp"
 
 namespace osp {
+
 /**
  * @brief The GreedyReccomputer class applies a greedy algorithm to remove some of the communciation steps in
  * a BspSchedule by recomputation steps if this decreases the cost.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class GreedyRecomputer {
-
     static_assert(is_computational_dag_v<Graph_t>, "GreedyRecomputer can only be used with computational DAGs.");
-    
-private:
+
+  private:
     using vertex_idx = vertex_idx_t<Graph_t>;
     using cost_type = v_workw_t<Graph_t>;
     using KeyTriple = std::tuple<vertex_idx_t<Graph_t>, unsigned int, unsigned int>;
 
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>, "GreedyRecomputer requires work and comm. weights to have the same type.");
-
-
-public:
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "GreedyRecomputer requires work and comm. weights to have the same type.");
 
+  public:
     /**
      * @brief Default destructor for GreedyRecomputer.
      */
     virtual ~GreedyRecomputer() = default;
 
-    RETURN_STATUS computeRecompSchedule(BspScheduleCS<Graph_t> &initial_schedule, BspScheduleRecomp<Graph_t>& out_schedule) const;
-
+    RETURN_STATUS computeRecompSchedule(BspScheduleCS<Graph_t> &initial_schedule, BspScheduleRecomp<Graph_t> &out_schedule) const;
 };
 
-template<typename Graph_t>
-RETURN_STATUS GreedyRecomputer<Graph_t>::computeRecompSchedule(BspScheduleCS<Graph_t> &initial_schedule, BspScheduleRecomp<Graph_t>& out_schedule) const
-{
-    const vertex_idx& N = initial_schedule.getInstance().numberOfVertices();
-    const unsigned& P = initial_schedule.getInstance().numberOfProcessors();
-    const unsigned& S = initial_schedule.numberOfSupersteps();
-    const Graph_t& G = initial_schedule.getInstance().getComputationalDag();
+template <typename Graph_t>
+RETURN_STATUS GreedyRecomputer<Graph_t>::computeRecompSchedule(BspScheduleCS<Graph_t> &initial_schedule,
+                                                               BspScheduleRecomp<Graph_t> &out_schedule) const {
+    const vertex_idx &N = initial_schedule.getInstance().numberOfVertices();
+    const unsigned &P = initial_schedule.getInstance().numberOfProcessors();
+    const unsigned &S = initial_schedule.numberOfSupersteps();
+    const Graph_t &G = initial_schedule.getInstance().getComputationalDag();
 
     out_schedule = BspScheduleRecomp<Graph_t>(initial_schedule.getInstance());
     out_schedule.setNumberOfSupersteps(initial_schedule.numberOfSupersteps());
 
     // Initialize required data structures
-    std::vector<std::vector<cost_type>> work_cost(P, std::vector<cost_type>(S, 0)),
-                                        send_cost(P, std::vector<cost_type>(S, 0)),
-                                        rec_cost(P, std::vector<cost_type>(S, 0));
+    std::vector<std::vector<cost_type>> work_cost(P, std::vector<cost_type>(S, 0)), send_cost(P, std::vector<cost_type>(S, 0)),
+        rec_cost(P, std::vector<cost_type>(S, 0));
 
     std::vector<std::vector<unsigned>> first_computable(N, std::vector<unsigned>(P, 0U)),
-                                        first_present(N, std::vector<unsigned>(P, std::numeric_limits<unsigned>::max()));
-    
-    std::vector<std::vector<std::multiset<unsigned> > > needed_on_proc(N, std::vector<std::multiset<unsigned> >(P, {S}));
-    
+        first_present(N, std::vector<unsigned>(P, std::numeric_limits<unsigned>::max()));
+
+    std::vector<std::vector<std::multiset<unsigned>>> needed_on_proc(N, std::vector<std::multiset<unsigned>>(P, {S}));
+
     std::vector<cost_type> max_work(S, 0), max_comm(S, 0);
 
-    std::vector<std::set<KeyTriple> > comm_steps(S);                              
+    std::vector<std::set<KeyTriple>> comm_steps(S);
 
-    for(vertex_idx node = 0; node < N; ++node)
-    {
-      const unsigned& proc = initial_schedule.assignedProcessor(node);
-      const unsigned& step = initial_schedule.assignedSuperstep(node);
+    for (vertex_idx node = 0; node < N; ++node) {
+        const unsigned &proc = initial_schedule.assignedProcessor(node);
+        const unsigned &step = initial_schedule.assignedSuperstep(node);
 
-      work_cost[proc][step] += G.vertex_work_weight(node);
-      first_present[node][proc] = std::min(first_present[node][proc], step);
-      for(vertex_idx pred : G.parents(node))
-        needed_on_proc[pred][proc].insert(step);
-      
-      out_schedule.assignments(node).emplace_back(proc, step);
+        work_cost[proc][step] += G.vertex_work_weight(node);
+        first_present[node][proc] = std::min(first_present[node][proc], step);
+        for (vertex_idx pred : G.parents(node)) {
+            needed_on_proc[pred][proc].insert(step);
+        }
+
+        out_schedule.assignments(node).emplace_back(proc, step);
     }
-    for(const std::pair<KeyTriple, unsigned> item : initial_schedule.getCommunicationSchedule())
-    {
-      const vertex_idx& node = std::get<0>(item.first);
-      const unsigned& from_proc = std::get<1>(item.first);
-      const unsigned& to_proc = std::get<2>(item.first);
-      const unsigned& step = item.second;
-      send_cost[from_proc][step] += G.vertex_comm_weight(node) * 
-                                      initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc);
-      rec_cost[to_proc][step] += G.vertex_comm_weight(node) * 
-                                      initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc);
-
-      comm_steps[step].emplace(item.first);
-      needed_on_proc[node][from_proc].insert(step);
-      first_present[node][to_proc] = std::min(first_present[node][to_proc], step+1);
+    for (const std::pair<KeyTriple, unsigned> item : initial_schedule.getCommunicationSchedule()) {
+        const vertex_idx &node = std::get<0>(item.first);
+        const unsigned &from_proc = std::get<1>(item.first);
+        const unsigned &to_proc = std::get<2>(item.first);
+        const unsigned &step = item.second;
+        send_cost[from_proc][step] += G.vertex_comm_weight(node)
+                                      * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc);
+        rec_cost[to_proc][step] += G.vertex_comm_weight(node)
+                                   * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc);
+
+        comm_steps[step].emplace(item.first);
+        needed_on_proc[node][from_proc].insert(step);
+        first_present[node][to_proc] = std::min(first_present[node][to_proc], step + 1);
+    }
+    for (unsigned step = 0; step < S; ++step) {
+        for (unsigned proc = 0; proc < P; ++proc) {
+            max_work[step] = std::max(max_work[step], work_cost[proc][step]);
+            max_comm[step] = std::max(max_comm[step], send_cost[proc][step]);
+            max_comm[step] = std::max(max_comm[step], rec_cost[proc][step]);
+        }
     }
-    for(unsigned step = 0; step < S; ++step)
-      for(unsigned proc = 0; proc < P; ++proc)
-      {
-        max_work[step] =std::max(max_work[step], work_cost[proc][step]);
-        max_comm[step] =std::max(max_comm[step], send_cost[proc][step]);
-        max_comm[step] =std::max(max_comm[step], rec_cost[proc][step]);
-      }
-
-    for(vertex_idx node = 0; node < N; ++node)
-      for(const vertex_idx& pred : G.parents(node))
-        for(unsigned proc = 0; proc < P; ++proc)
-          first_computable[node][proc] = std::max(first_computable[node][proc], first_present[pred][proc]);
-    
+
+    for (vertex_idx node = 0; node < N; ++node) {
+        for (const vertex_idx &pred : G.parents(node)) {
+            for (unsigned proc = 0; proc < P; ++proc) {
+                first_computable[node][proc] = std::max(first_computable[node][proc], first_present[pred][proc]);
+            }
+        }
+    }
+
     // Find improvement steps
     bool still_improved = true;
-    while(still_improved)
-    {
-      still_improved = false;
-
-      for(unsigned step = 0; step < S; ++step)
-      {
-        std::vector<KeyTriple> to_erase;
-        for(const KeyTriple& entry : comm_steps[step])
-        {
-          const vertex_idx& node = std::get<0>(entry);
-          const unsigned& from_proc = std::get<1>(entry);
-          const unsigned& to_proc = std::get<2>(entry);
-
-          // check how much comm cost we save by removing comm schedule entry
-          cost_type comm_induced = G.vertex_comm_weight(node) * 
-                                      initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc);
-
-          cost_type new_max_comm = 0;
-          for(unsigned proc = 0; proc < P; ++proc)
-          {
-            if(proc == from_proc)
-              new_max_comm = std::max(new_max_comm, send_cost[proc][step]-comm_induced);
-            else
-              new_max_comm = std::max(new_max_comm, send_cost[proc][step]);
-            if(proc == to_proc)
-              new_max_comm = std::max(new_max_comm, rec_cost[proc][step]-comm_induced);
-            else
-              new_max_comm = std::max(new_max_comm, rec_cost[proc][step]);
-          }
-          if(new_max_comm == max_comm[step])
-            continue;
-
-          if(!initial_schedule.getInstance().isCompatible(node, to_proc))
-            continue;
-
-          cost_type decrease = max_comm[step] - new_max_comm;
-          if(max_comm[step] > 0 && new_max_comm == 0)
-            decrease += initial_schedule.getInstance().getArchitecture().synchronisationCosts();
-
-          // check how much it would increase the work cost instead
-          unsigned best_step = S; 
-          cost_type smallest_increase = std::numeric_limits<cost_type>::max();
-          for(unsigned comp_step = first_computable[node][to_proc]; comp_step <= *needed_on_proc[node][to_proc].begin(); ++comp_step)
-          {
-            cost_type increase = work_cost[to_proc][comp_step] + G.vertex_work_weight(node) > max_work[comp_step] ?
-                                work_cost[to_proc][comp_step] + G.vertex_work_weight(node) - max_work[comp_step] : 0 ;
-            
-            if(increase < smallest_increase)
-            {
-              best_step = comp_step;
-              smallest_increase = increase;
+    while (still_improved) {
+        still_improved = false;
+
+        for (unsigned step = 0; step < S; ++step) {
+            std::vector<KeyTriple> to_erase;
+            for (const KeyTriple &entry : comm_steps[step]) {
+                const vertex_idx &node = std::get<0>(entry);
+                const unsigned &from_proc = std::get<1>(entry);
+                const unsigned &to_proc = std::get<2>(entry);
+
+                // check how much comm cost we save by removing comm schedule entry
+                cost_type comm_induced = G.vertex_comm_weight(node)
+                                         * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc);
+
+                cost_type new_max_comm = 0;
+                for (unsigned proc = 0; proc < P; ++proc) {
+                    if (proc == from_proc) {
+                        new_max_comm = std::max(new_max_comm, send_cost[proc][step] - comm_induced);
+                    } else {
+                        new_max_comm = std::max(new_max_comm, send_cost[proc][step]);
+                    }
+                    if (proc == to_proc) {
+                        new_max_comm = std::max(new_max_comm, rec_cost[proc][step] - comm_induced);
+                    } else {
+                        new_max_comm = std::max(new_max_comm, rec_cost[proc][step]);
+                    }
+                }
+                if (new_max_comm == max_comm[step]) {
+                    continue;
+                }
+
+                if (!initial_schedule.getInstance().isCompatible(node, to_proc)) {
+                    continue;
+                }
+
+                cost_type decrease = max_comm[step] - new_max_comm;
+                if (max_comm[step] > 0 && new_max_comm == 0) {
+                    decrease += initial_schedule.getInstance().getArchitecture().synchronisationCosts();
+                }
+
+                // check how much it would increase the work cost instead
+                unsigned best_step = S;
+                cost_type smallest_increase = std::numeric_limits<cost_type>::max();
+                for (unsigned comp_step = first_computable[node][to_proc]; comp_step <= *needed_on_proc[node][to_proc].begin();
+                     ++comp_step) {
+                    cost_type increase = work_cost[to_proc][comp_step] + G.vertex_work_weight(node) > max_work[comp_step]
+                                             ? work_cost[to_proc][comp_step] + G.vertex_work_weight(node) - max_work[comp_step]
+                                             : 0;
+
+                    if (increase < smallest_increase) {
+                        best_step = comp_step;
+                        smallest_increase = increase;
+                    }
+                }
+
+                // check if this modification is beneficial
+                if (best_step == S || smallest_increase > decrease) {
+                    continue;
+                }
+
+                // execute the modification
+                to_erase.emplace_back(entry);
+                out_schedule.assignments(node).emplace_back(to_proc, best_step);
+
+                send_cost[from_proc][step] -= comm_induced;
+                rec_cost[to_proc][step] -= comm_induced;
+                max_comm[step] = new_max_comm;
+
+                work_cost[to_proc][best_step] += G.vertex_work_weight(node);
+                max_work[best_step] += smallest_increase;
+
+                // update movability bounds
+                for (const vertex_idx &pred : G.parents(node)) {
+                    needed_on_proc[pred][to_proc].insert(best_step);
+                }
+
+                needed_on_proc[node][from_proc].erase(needed_on_proc[node][from_proc].lower_bound(step));
+
+                first_present[node][to_proc] = best_step;
+                for (const vertex_idx &succ : G.children(node)) {
+                    for (const vertex_idx &pred : G.parents(node)) {
+                        first_computable[succ][to_proc] = std::max(first_computable[succ][to_proc], first_present[pred][to_proc]);
+                    }
+                }
+
+                still_improved = true;
+            }
+            for (const KeyTriple &entry : to_erase) {
+                comm_steps[step].erase(entry);
             }
-          }
-
-          // check if this modification is beneficial
-          if(best_step == S || smallest_increase > decrease)
-            continue;
-
-          // execute the modification
-          to_erase.emplace_back(entry);
-          out_schedule.assignments(node).emplace_back(to_proc, best_step);
-
-          send_cost[from_proc][step] -= comm_induced;
-          rec_cost[to_proc][step] -= comm_induced;
-          max_comm[step] = new_max_comm;
-
-          work_cost[to_proc][best_step] += G.vertex_work_weight(node);
-          max_work[best_step] += smallest_increase;
-
-          // update movability bounds
-          for(const vertex_idx& pred : G.parents(node))
-            needed_on_proc[pred][to_proc].insert(best_step);
-
-          needed_on_proc[node][from_proc].erase(needed_on_proc[node][from_proc].lower_bound(step));
-
-          first_present[node][to_proc] = best_step;
-          for(const vertex_idx& succ : G.children(node))
-          {
-            for(const vertex_idx& pred : G.parents(node))
-              first_computable[succ][to_proc] = std::max(first_computable[succ][to_proc], first_present[pred][to_proc]);
-          }
-
-          still_improved = true;
-
         }
-        for(const KeyTriple& entry : to_erase)
-          comm_steps[step].erase(entry);
-      }
     }
 
-    for(unsigned step = 0; step < S; ++step)
-      for(const KeyTriple& entry : comm_steps[step])
-        out_schedule.getCommunicationSchedule().emplace(entry, step);
+    for (unsigned step = 0; step < S; ++step) {
+        for (const KeyTriple &entry : comm_steps[step]) {
+            out_schedule.getCommunicationSchedule().emplace(entry, step);
+        }
+    }
 
     out_schedule.mergeSupersteps();
 
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp
index 7a6c454d..857e4e02 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp
@@ -42,16 +42,15 @@ namespace osp {
  *
  * It computes schedules for BspInstance using variance-based priorities.
  */
-template<typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
+template <typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
 class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "GreedyVarianceSspScheduler can only be used with computational DAGs.");
 
   private:
     using VertexType = vertex_idx_t<Graph_t>;
 
-    constexpr static bool use_memory_constraint =
-        is_memory_constraint_v<MemoryConstraint_t> or is_memory_constraint_schedule_v<MemoryConstraint_t>;
+    constexpr static bool use_memory_constraint = is_memory_constraint_v<MemoryConstraint_t>
+                                                  or is_memory_constraint_schedule_v<MemoryConstraint_t>;
 
     static_assert(not use_memory_constraint or std::is_same_v<Graph_t, typename MemoryConstraint_t::Graph_impl_t>,
                   "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t.");
@@ -75,28 +74,19 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
             }
             temp = std::log(temp) / 2 + max_priority;
 
-            double node_weight = std::log(
-                static_cast<double>(
-                    std::max(
-                        graph.vertex_work_weight(*r_iter),
-                        static_cast<v_workw_t<Graph_t>>(1)
-                    )
-                )
-            );
+            double node_weight
+                = std::log(static_cast<double>(std::max(graph.vertex_work_weight(*r_iter), static_cast<v_workw_t<Graph_t>>(1))));
             double larger_val = node_weight > temp ? node_weight : temp;
 
-            work_variance[*r_iter] =
-                std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val;
+            work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val;
         }
 
         return work_variance;
     }
 
-    std::vector<std::vector<std::vector<unsigned>>>
-    procTypesCompatibleWithNodeType_omit_procType(const BspInstance<Graph_t> &instance) const {
-
-        const std::vector<std::vector<unsigned>> procTypesCompatibleWithNodeType =
-            instance.getProcTypesCompatibleWithNodeType();
+    std::vector<std::vector<std::vector<unsigned>>> procTypesCompatibleWithNodeType_omit_procType(
+        const BspInstance<Graph_t> &instance) const {
+        const std::vector<std::vector<unsigned>> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType();
 
         std::vector<std::vector<std::vector<unsigned>>> procTypesCompatibleWithNodeType_skip(
             instance.getArchitecture().getNumberOfProcessorTypes(),
@@ -104,8 +94,9 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
         for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) {
             for (unsigned nodeType = 0; nodeType < instance.getComputationalDag().num_vertex_types(); nodeType++) {
                 for (unsigned otherProcType : procTypesCompatibleWithNodeType[nodeType]) {
-                    if (procType == otherProcType)
+                    if (procType == otherProcType) {
                         continue;
+                    }
                     procTypesCompatibleWithNodeType_skip[procType][nodeType].emplace_back(otherProcType);
                 }
             }
@@ -124,38 +115,42 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
                        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady,
                        const std::vector<bool> &procFree) const {
-        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-            if (procFree[i] && !procReady[i].empty())
+        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+            if (procFree[i] && !procReady[i].empty()) {
                 return true;
+            }
+        }
 
-        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-            if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty())
+        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+            if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) {
                 return true;
+            }
+        }
 
         return false;
     }
 
     void Choose(const BspInstance<Graph_t> &instance,
-            const std::vector<double> &work_variance,
-            std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
-            std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady,
-            const std::vector<bool> &procFree,
-            VertexType &node, unsigned &p,
-            const bool endSupStep,
-            const v_workw_t<Graph_t> remaining_time,
-            const std::vector<std::vector<std::vector<unsigned>>> &procTypesCompatibleWithNodeType_skip_proctype) const 
-    {
+                const std::vector<double> &work_variance,
+                std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
+                std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady,
+                const std::vector<bool> &procFree,
+                VertexType &node,
+                unsigned &p,
+                const bool endSupStep,
+                const v_workw_t<Graph_t> remaining_time,
+                const std::vector<std::vector<std::vector<unsigned>>> &procTypesCompatibleWithNodeType_skip_proctype) const {
         double maxScore = -1;
         bool found_allocation = false;
 
         for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
-            if (!procFree[i] || procReady[i].empty())
+            if (!procFree[i] || procReady[i].empty()) {
                 continue;
+            }
 
             auto it = procReady[i].begin();
             while (it != procReady[i].end()) {
-                if (endSupStep &&
-                    (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) {
+                if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) {
                     it = procReady[i].erase(it);
                     continue;
                 }
@@ -172,15 +167,14 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                             found_allocation = true;
 
                             if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) {
-                                const auto &compatibleTypes =
-                                    procTypesCompatibleWithNodeType_skip_proctype[procType]
-                                        [instance.getComputationalDag().vertex_type(node)];
+                                const auto &compatibleTypes
+                                    = procTypesCompatibleWithNodeType_skip_proctype[procType]
+                                                                                   [instance.getComputationalDag().vertex_type(node)];
 
                                 for (unsigned otherType : compatibleTypes) {
                                     for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) {
-                                        if (j != i &&
-                                            instance.getArchitecture().processorType(j) == otherType &&
-                                            j < procReady.size()) {
+                                        if (j != i && instance.getArchitecture().processorType(j) == otherType
+                                            && j < procReady.size()) {
                                             procReady[j].erase(std::make_pair(node, work_variance[node]));
                                         }
                                     }
@@ -195,15 +189,13 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                         found_allocation = true;
 
                         if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) {
-                            const auto &compatibleTypes =
-                                procTypesCompatibleWithNodeType_skip_proctype[procType]
-                                    [instance.getComputationalDag().vertex_type(node)];
+                            const auto &compatibleTypes
+                                = procTypesCompatibleWithNodeType_skip_proctype[procType]
+                                                                               [instance.getComputationalDag().vertex_type(node)];
 
                             for (unsigned otherType : compatibleTypes) {
                                 for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) {
-                                    if (j != i &&
-                                        instance.getArchitecture().processorType(j) == otherType &&
-                                        j < procReady.size()) {
+                                    if (j != i && instance.getArchitecture().processorType(j) == otherType && j < procReady.size()) {
                                         procReady[j].erase(std::make_pair(node, work_variance[node]));
                                     }
                                 }
@@ -218,20 +210,21 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
             }
         }
 
-        if (found_allocation)
+        if (found_allocation) {
             return;
+        }
 
         for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
             const unsigned procType = instance.getArchitecture().processorType(i);
-            if (!procFree[i] || procType >= allReady.size() || allReady[procType].empty())
+            if (!procFree[i] || procType >= allReady.size() || allReady[procType].empty()) {
                 continue;
+            }
 
             auto &readyList = allReady[procType];
             auto it = readyList.begin();
 
             while (it != readyList.end()) {
-                if (endSupStep &&
-                    (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) {
+                if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) {
                     it = readyList.erase(it);
                     continue;
                 }
@@ -244,13 +237,14 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                             node = it->first;
                             p = i;
 
-                            const auto &compatibleTypes =
-                                procTypesCompatibleWithNodeType_skip_proctype[procType]
-                                    [instance.getComputationalDag().vertex_type(node)];
+                            const auto &compatibleTypes
+                                = procTypesCompatibleWithNodeType_skip_proctype[procType]
+                                                                               [instance.getComputationalDag().vertex_type(node)];
 
                             for (unsigned otherType : compatibleTypes) {
-                                if (otherType < allReady.size())
+                                if (otherType < allReady.size()) {
                                     allReady[otherType].erase(std::make_pair(node, work_variance[node]));
+                                }
                             }
 
                             return;
@@ -259,15 +253,16 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                         node = it->first;
                         p = i;
 
-                        const auto &compatibleTypes =
-                            procTypesCompatibleWithNodeType_skip_proctype[procType]
-                                [instance.getComputationalDag().vertex_type(node)];
+                        const auto &compatibleTypes
+                            = procTypesCompatibleWithNodeType_skip_proctype[procType]
+                                                                           [instance.getComputationalDag().vertex_type(node)];
 
                         for (unsigned otherType : compatibleTypes) {
-                            if (otherType < allReady.size())
+                            if (otherType < allReady.size()) {
                                 allReady[otherType].erase(std::make_pair(node, work_variance[node]));
+                            }
                         }
-                        
+
                         return;
                     }
                 }
@@ -276,18 +271,13 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
         }
     };
 
-
-    bool check_mem_feasibility(
-        const BspInstance<Graph_t> &instance,
-        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
-        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady) const 
-    {
+    bool check_mem_feasibility(const BspInstance<Graph_t> &instance,
+                               const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
+                               const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady) const {
         if constexpr (use_memory_constraint) {
-            if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) 
-            {
+            if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) {
                 for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
                     if (!procReady[i].empty()) {
-
                         const std::pair<VertexType, double> &node_pair = *procReady[i].begin();
                         VertexType top_node = node_pair.first;
 
@@ -298,12 +288,11 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                 }
 
                 for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
-
-                    if (allReady[instance.getArchitecture().processorType(i)].empty())
+                    if (allReady[instance.getArchitecture().processorType(i)].empty()) {
                         continue;
+                    }
 
-                    const std::pair<VertexType, double> &node_pair =
-                        *allReady[instance.getArchitecture().processorType(i)].begin();
+                    const std::pair<VertexType, double> &node_pair = *allReady[instance.getArchitecture().processorType(i)].begin();
                     VertexType top_node = node_pair.first;
 
                     if (memory_constraint.can_add(top_node, i)) {
@@ -318,14 +307,12 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
         return true;
     }
 
-    unsigned get_nr_parallelizable_nodes(
-        const BspInstance<Graph_t> &instance,
-        const unsigned &stale,
-        const std::vector<unsigned> &nr_old_ready_nodes_per_type,
-        const std::vector<unsigned> &nr_ready_nodes_per_type,
-        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady,
-        const std::vector<unsigned> &nr_procs_per_type) const 
-    {
+    unsigned get_nr_parallelizable_nodes(const BspInstance<Graph_t> &instance,
+                                         const unsigned &stale,
+                                         const std::vector<unsigned> &nr_old_ready_nodes_per_type,
+                                         const std::vector<unsigned> &nr_ready_nodes_per_type,
+                                         const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady,
+                                         const std::vector<unsigned> &nr_procs_per_type) const {
         unsigned nr_nodes = 0;
         unsigned num_proc_types = instance.getArchitecture().getNumberOfProcessorTypes();
 
@@ -348,8 +335,7 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
         for (unsigned proc_type = 0; proc_type < num_proc_types; ++proc_type) {
             for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) {
                 if (instance.isCompatibleType(node_type, proc_type)) {
-                    unsigned matched = std::min(ready_nodes_per_type[node_type],
-                                                procs_per_type[proc_type]);
+                    unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]);
                     nr_nodes += matched;
                     ready_nodes_per_type[node_type] -= matched;
                     procs_per_type[proc_type] -= matched;
@@ -360,18 +346,17 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
         return nr_nodes;
     }
 
-    public:
-
+  public:
     /**
-    * @brief Default constructor for GreedyVarianceSspScheduler.
-    */
+     * @brief Default constructor for GreedyVarianceSspScheduler.
+     */
     GreedyVarianceSspScheduler(float max_percent_idle_processors_ = 0.2f, bool increase_parallelism_in_new_superstep_ = true)
         : max_percent_idle_processors(max_percent_idle_processors_),
           increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {}
 
     /**
-    * @brief Default destructor for GreedyVarianceSspScheduler.
-    */
+     * @brief Default destructor for GreedyVarianceSspScheduler.
+     */
     virtual ~GreedyVarianceSspScheduler() = default;
 
     RETURN_STATUS computeSspSchedule(BspSchedule<Graph_t> &schedule, unsigned stale) {
@@ -398,12 +383,11 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
             instance.getArchitecture().getNumberOfProcessorTypes());
 
         const auto procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType();
-        const std::vector<std::vector<std::vector<unsigned>>> procTypesCompatibleWithNodeType_skip_proctype =
-            procTypesCompatibleWithNodeType_omit_procType(instance);
+        const std::vector<std::vector<std::vector<unsigned>>> procTypesCompatibleWithNodeType_skip_proctype
+            = procTypesCompatibleWithNodeType_omit_procType(instance);
 
         std::vector<unsigned> nr_old_ready_nodes_per_type(G.num_vertex_types(), 0);
-        std::vector<std::vector<unsigned>> nr_ready_stale_nodes_per_type(
-            stale, std::vector<unsigned>(G.num_vertex_types(), 0));
+        std::vector<std::vector<unsigned>> nr_ready_stale_nodes_per_type(stale, std::vector<unsigned>(G.num_vertex_types(), 0));
         std::vector<unsigned> nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
         for (auto proc = 0u; proc < P; ++proc) {
             ++nr_procs_per_type[instance.getArchitecture().processorType(proc)];
@@ -428,8 +412,10 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
         std::set<std::pair<v_workw_t<Graph_t>, VertexType>> finishTimes;
         finishTimes.emplace(0, std::numeric_limits<VertexType>::max());
 
-        std::vector<unsigned> number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
-        std::vector<unsigned> limit_of_number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
+        std::vector<unsigned> number_of_allocated_allReady_tasks_in_superstep(
+            instance.getArchitecture().getNumberOfProcessorTypes(), 0);
+        std::vector<unsigned> limit_of_number_of_allocated_allReady_tasks_in_superstep(
+            instance.getArchitecture().getNumberOfProcessorTypes(), 0);
 
         bool endSupStep = true;
         bool begin_outer_while = true;
@@ -437,17 +423,21 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
         unsigned successive_empty_supersteps = 0u;
 
         auto nonempty_ready = [&]() {
-            return std::any_of(ready.cbegin(), ready.cend(),
-                             [](const std::set<std::pair<VertexType, double>, VarianceCompare>& ready_set) { return !ready_set.empty(); });
+            return std::any_of(
+                ready.cbegin(), ready.cend(), [](const std::set<std::pair<VertexType, double>, VarianceCompare> &ready_set) {
+                    return !ready_set.empty();
+                });
         };
 
         while (!old_ready.empty() || nonempty_ready() || !finishTimes.empty()) {
             if (finishTimes.empty() && endSupStep) {
                 able_to_schedule_in_step = false;
-                number_of_allocated_allReady_tasks_in_superstep = std::vector<unsigned>(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
+                number_of_allocated_allReady_tasks_in_superstep
+                    = std::vector<unsigned>(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
 
-                for (unsigned i = 0; i < P; ++i)
+                for (unsigned i = 0; i < P; ++i) {
                     procReady[supstepIdx % stale][i].clear();
+                }
 
                 if (!begin_outer_while) {
                     supstepIdx++;
@@ -455,8 +445,9 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                     begin_outer_while = false;
                 }
 
-                 for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType)
+                for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType) {
                     allReady[procType].clear();
+                }
 
                 old_ready.insert(ready[supstepIdx % stale].begin(), ready[supstepIdx % stale].end());
                 ready[supstepIdx % stale].clear();
@@ -474,17 +465,19 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
 
                 if constexpr (use_memory_constraint) {
                     if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) {
-                        for (unsigned proc = 0; proc < P; proc++)
+                        for (unsigned proc = 0; proc < P; proc++) {
                             memory_constraint.reset(proc);
+                        }
                     }
                 }
 
                 for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) {
                     unsigned equal_split = (static_cast<unsigned>(allReady[procType].size()) + stale - 1) / stale;
                     unsigned at_least_for_long_step = 3 * nr_procs_per_type[procType];
-                    limit_of_number_of_allocated_allReady_tasks_in_superstep[procType] = std::max(at_least_for_long_step, equal_split);
+                    limit_of_number_of_allocated_allReady_tasks_in_superstep[procType]
+                        = std::max(at_least_for_long_step, equal_split);
                 }
-            
+
                 endSupStep = false;
                 finishTimes.emplace(0, std::numeric_limits<VertexType>::max());
             }
@@ -500,14 +493,14 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                 if (node != std::numeric_limits<VertexType>::max()) {
                     const unsigned proc_of_node = schedule.assignedProcessor(node);
 
-                    for (const auto& succ : G.children(node)) {
+                    for (const auto &succ : G.children(node)) {
                         nrPredecRemain[succ]--;
                         if (nrPredecRemain[succ] == 0) {
                             ready[supstepIdx % stale].emplace(succ, work_variances[succ]);
                             nr_ready_stale_nodes_per_type[supstepIdx % stale][G.vertex_type(succ)]++;
 
                             unsigned earliest_add = supstepIdx;
-                            for (const auto& pred : G.parents(succ)) {
+                            for (const auto &pred : G.parents(succ)) {
                                 if (schedule.assignedProcessor(pred) != proc_of_node) {
                                     earliest_add = std::max(earliest_add, stale + schedule.assignedSuperstep(pred));
                                 }
@@ -521,13 +514,11 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                                         memory_ok = memory_constraint.can_add(succ, proc_of_node);
                                     }
                                 }
-                                for (unsigned step_to_add = earliest_add;
-                                    step_to_add < supstepIdx + stale; ++step_to_add) {
+                                for (unsigned step_to_add = earliest_add; step_to_add < supstepIdx + stale; ++step_to_add) {
                                     if ((step_to_add == supstepIdx) && !memory_ok) {
-                                        continue; 
+                                        continue;
                                     }
-                                    procReady[step_to_add % stale][proc_of_node].emplace(
-                                        succ, work_variances[succ]);
+                                    procReady[step_to_add % stale][proc_of_node].emplace(succ, work_variances[succ]);
                                 }
                             }
                         }
@@ -547,29 +538,37 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                 VertexType nextNode = std::numeric_limits<VertexType>::max();
                 unsigned nextProc = P;
 
-                Choose( instance, work_variances, allReady, 
-                        procReady[supstepIdx % stale], procFree, 
-                        nextNode, nextProc, endSupStep, max_finish_time - time, procTypesCompatibleWithNodeType_skip_proctype);
+                Choose(instance,
+                       work_variances,
+                       allReady,
+                       procReady[supstepIdx % stale],
+                       procFree,
+                       nextNode,
+                       nextProc,
+                       endSupStep,
+                       max_finish_time - time,
+                       procTypesCompatibleWithNodeType_skip_proctype);
 
                 if (nextNode == std::numeric_limits<VertexType>::max() || nextProc == P) {
                     endSupStep = true;
                     break;
                 }
 
-                if (procReady[supstepIdx % stale][nextProc].find(std::make_pair(nextNode, work_variances[nextNode])) !=
-                    procReady[supstepIdx % stale][nextProc].end()) {
+                if (procReady[supstepIdx % stale][nextProc].find(std::make_pair(nextNode, work_variances[nextNode]))
+                    != procReady[supstepIdx % stale][nextProc].end()) {
                     for (size_t i = 0; i < stale; i++) {
                         procReady[i][nextProc].erase(std::make_pair(nextNode, work_variances[nextNode]));
                     }
                 } else {
-                    for(unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(nextNode)]) {
+                    for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(nextNode)]) {
                         allReady[procType].erase(std::make_pair(nextNode, work_variances[nextNode]));
                     }
                     nr_old_ready_nodes_per_type[G.vertex_type(nextNode)]--;
                     const unsigned nextProcType = instance.getArchitecture().processorType(nextProc);
                     number_of_allocated_allReady_tasks_in_superstep[nextProcType]++;
-                    
-                    if (number_of_allocated_allReady_tasks_in_superstep[nextProcType] >= limit_of_number_of_allocated_allReady_tasks_in_superstep[nextProcType]) {
+
+                    if (number_of_allocated_allReady_tasks_in_superstep[nextProcType]
+                        >= limit_of_number_of_allocated_allReady_tasks_in_superstep[nextProcType]) {
                         allReady[nextProcType].clear();
                     }
                 }
@@ -603,38 +602,35 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
                 --free;
             }
 
-            if (able_to_schedule_in_step)
+            if (able_to_schedule_in_step) {
                 successive_empty_supersteps = 0;
-            else if (++successive_empty_supersteps > 100 + stale)
+            } else if (++successive_empty_supersteps > 100 + stale) {
                 return RETURN_STATUS::ERROR;
+            }
 
-            if (free > (P * max_percent_idle_processors) &&
-                ((!increase_parallelism_in_new_superstep) ||
-                get_nr_parallelizable_nodes(
-                    instance, stale, nr_old_ready_nodes_per_type,
-                    nr_ready_stale_nodes_per_type[(supstepIdx + 1) % stale],
-                    procReady[(supstepIdx + 1) % stale],
-                    nr_procs_per_type) >= std::min(
-                                            std::min(P, static_cast<unsigned>(1.2 * (P - free))),
-                                            P - free + static_cast<unsigned>(0.5 * free)))) 
-            {
+            if (free > (P * max_percent_idle_processors)
+                && ((!increase_parallelism_in_new_superstep)
+                    || get_nr_parallelizable_nodes(instance,
+                                                   stale,
+                                                   nr_old_ready_nodes_per_type,
+                                                   nr_ready_stale_nodes_per_type[(supstepIdx + 1) % stale],
+                                                   procReady[(supstepIdx + 1) % stale],
+                                                   nr_procs_per_type)
+                           >= std::min(std::min(P, static_cast<unsigned>(1.2 * (P - free))),
+                                       P - free + static_cast<unsigned>(0.5 * free)))) {
                 endSupStep = true;
             }
         }
 
         assert(schedule.satisfiesPrecedenceConstraints());
-        //schedule.setAutoCommunicationSchedule();
+        // schedule.setAutoCommunicationSchedule();
 
         return RETURN_STATUS::OSP_SUCCESS;
     }
 
-    RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-        return computeSspSchedule(schedule, 1U);
-    }
+    RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override { return computeSspSchedule(schedule, 1U); }
 
-    RETURN_STATUS computeSchedule(MaxBspSchedule<Graph_t> &schedule) override {
-        return computeSspSchedule(schedule, 2U);
-    }
+    RETURN_STATUS computeSchedule(MaxBspSchedule<Graph_t> &schedule) override { return computeSspSchedule(schedule, 2U); }
 
     std::string getScheduleName() const override {
         if constexpr (use_memory_constraint) {
@@ -643,7 +639,6 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler<Graph_t> {
             return "GreedyVarianceSsp";
         }
     }
-
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp
index 241aef58..981b6dfa 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp
@@ -21,10 +21,10 @@ limitations under the License.
 #include <chrono>
 #include <climits>
 #include <list>
-#include <queue>
 #include <map>
-#include <unordered_set>
+#include <queue>
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "MemoryConstraintModules.hpp"
@@ -34,7 +34,7 @@ limitations under the License.
 
 namespace osp {
 
-template<typename weight_t>
+template <typename weight_t>
 struct GrowLocalAutoCores_Params {
     unsigned minSuperstepSize = 20;
     weight_t syncCostMultiplierMinSuperstepWeight = 1;
@@ -50,31 +50,30 @@ struct GrowLocalAutoCores_Params {
  * The getScheduleName() method returns the name of the schedule, which is "GreedyBspGrowLocalAutoCores" in this
  * case.
  */
-template<typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
+template <typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
 class GrowLocalAutoCores : public Scheduler<Graph_t> {
-
   private:
     GrowLocalAutoCores_Params<v_workw_t<Graph_t>> params;
 
-    constexpr static bool use_memory_constraint =
-        is_memory_constraint_v<MemoryConstraint_t> or is_memory_constraint_schedule_v<MemoryConstraint_t>;
+    constexpr static bool use_memory_constraint = is_memory_constraint_v<MemoryConstraint_t>
+                                                  or is_memory_constraint_schedule_v<MemoryConstraint_t>;
 
     static_assert(not use_memory_constraint or std::is_same_v<Graph_t, typename MemoryConstraint_t::Graph_impl_t>,
                   "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t.");
 
-    static_assert(not use_memory_constraint or not (std::is_same_v<MemoryConstraint_t, persistent_transient_memory_constraint<Graph_t>> or std::is_same_v<MemoryConstraint_t, global_memory_constraint<Graph_t>>), 
-                  "MemoryConstraint_t must not be persistent_transient_memory_constraint or global_memory_constraint. Not supported in GrowLocalAutoCores.");
-               
+    static_assert(not use_memory_constraint
+                      or not(std::is_same_v<MemoryConstraint_t, persistent_transient_memory_constraint<Graph_t>>
+                             or std::is_same_v<MemoryConstraint_t, global_memory_constraint<Graph_t>>),
+                  "MemoryConstraint_t must not be persistent_transient_memory_constraint or global_memory_constraint. Not "
+                  "supported in GrowLocalAutoCores.");
 
     MemoryConstraint_t local_memory_constraint;
- 
 
   public:
     /**
      * @brief Default constructor for GreedyBspGrowLocalAutoCores.
      */
-    GrowLocalAutoCores(
-        GrowLocalAutoCores_Params<v_workw_t<Graph_t>> params_ = GrowLocalAutoCores_Params<v_workw_t<Graph_t>>())
+    GrowLocalAutoCores(GrowLocalAutoCores_Params<v_workw_t<Graph_t>> params_ = GrowLocalAutoCores_Params<v_workw_t<Graph_t>>())
         : params(params_) {}
 
     /**
@@ -91,7 +90,6 @@ class GrowLocalAutoCores : public Scheduler<Graph_t> {
      * @return A pair containing the return status and the computed BspSchedule.
      */
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         using vertex_idx = typename Graph_t::vertex_idx;
         const auto &instance = schedule.getInstance();
 
@@ -115,7 +113,7 @@ class GrowLocalAutoCores : public Scheduler<Graph_t> {
         const unsigned P = instance.numberOfProcessors();
         const auto &G = instance.getComputationalDag();
 
-        std::unordered_set<vertex_idx> ready;  
+        std::unordered_set<vertex_idx> ready;
 
         std::vector<vertex_idx> allReady;
         std::vector<std::vector<vertex_idx>> procReady(P);
@@ -136,16 +134,13 @@ class GrowLocalAutoCores : public Scheduler<Graph_t> {
         std::vector<vertex_idx> new_ready;
         std::vector<vertex_idx> best_new_ready;
 
-        const v_workw_t<Graph_t> minWeightParallelCheck =
-            params.syncCostMultiplierParallelCheck * instance.synchronisationCosts();
-        const v_workw_t<Graph_t> minSuperstepWeight =
-            params.syncCostMultiplierMinSuperstepWeight * instance.synchronisationCosts();
+        const v_workw_t<Graph_t> minWeightParallelCheck = params.syncCostMultiplierParallelCheck * instance.synchronisationCosts();
+        const v_workw_t<Graph_t> minSuperstepWeight = params.syncCostMultiplierMinSuperstepWeight * instance.synchronisationCosts();
 
         double desiredParallelism = static_cast<double>(P);
 
         vertex_idx total_assigned = 0;
         while (total_assigned < N) {
-
             unsigned limit = params.minSuperstepSize;
             double best_score = 0;
             double best_parallelism = 0;
@@ -153,7 +148,6 @@ class GrowLocalAutoCores : public Scheduler<Graph_t> {
             bool continueSuperstepAttempts = true;
 
             while (continueSuperstepAttempts) {
-
                 for (unsigned p = 0; p < P; p++) {
                     new_assignments[p].clear();
                     procReady[p].clear();
@@ -295,8 +289,8 @@ class GrowLocalAutoCores : public Scheduler<Graph_t> {
 
                 bool accept_step = false;
 
-                double score = static_cast<double>(total_weight_assigned) /
-                               static_cast<double>(weight_limit + instance.synchronisationCosts());
+                double score = static_cast<double>(total_weight_assigned)
+                               / static_cast<double>(weight_limit + instance.synchronisationCosts());
                 double parallelism = 0;
                 if (weight_limit > 0) {
                     parallelism = static_cast<double>(total_weight_assigned) / static_cast<double>(weight_limit);
@@ -377,8 +371,8 @@ class GrowLocalAutoCores : public Scheduler<Graph_t> {
                 }
             }
 
-            desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism) +
-                                 (0.1 * static_cast<double>(P)); // weights should sum up to one
+            desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism)
+                                 + (0.1 * static_cast<double>(P));    // weights should sum up to one
 
             ++supstep;
         }
@@ -398,4 +392,4 @@ class GrowLocalAutoCores : public Scheduler<Graph_t> {
     virtual std::string getScheduleName() const override { return "GrowLocalAutoCores"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp
index b5b4ea95..5569195d 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp
@@ -18,6 +18,8 @@ limitations under the License.
 
 #pragma once
 
+#include <omp.h>
+
 #include <climits>
 #include <list>
 #include <map>
@@ -25,12 +27,10 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include <omp.h>
-
 // #define TIME_THREADS_GROW_LOCAL_PARALLEL
 #ifdef TIME_THREADS_GROW_LOCAL_PARALLEL
-#include <chrono>
-#include <iostream>
+#    include <chrono>
+#    include <iostream>
 #endif
 
 #include "osp/auxiliary/misc.hpp"
@@ -41,14 +41,14 @@ namespace osp {
 
 static constexpr unsigned CacheLineSize = 64;
 
-template<typename vert_t, typename weight_t>
+template <typename vert_t, typename weight_t>
 struct GrowLocalAutoCoresParallel_Params {
     vert_t minSuperstepSize = 20;
     weight_t syncCostMultiplierMinSuperstepWeight = 1;
     weight_t syncCostMultiplierParallelCheck = 4;
 
-    unsigned numThreads = 0;           // 0 for auto
-    unsigned maxNumThreads = UINT_MAX; // used when auto num threads
+    unsigned numThreads = 0;              // 0 for auto
+    unsigned maxNumThreads = UINT_MAX;    // used when auto num threads
 };
 
 /**
@@ -60,7 +60,7 @@ struct GrowLocalAutoCoresParallel_Params {
  * The getScheduleName() method returns the name of the schedule, which is "GrowLocalAutoCoresParallel" in this
  * case.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
     static_assert(is_directed_graph_v<Graph_t>);
     static_assert(has_vertex_weights_v<Graph_t>);
@@ -74,8 +74,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
     /**
      * @brief Default constructor for GrowLocalAutoCoresParallel.
      */
-    GrowLocalAutoCoresParallel(
-        GrowLocalAutoCoresParallel_Params<vertex_idx_t<Graph_t>, v_workw_t<Graph_t>> params_ = GrowLocalAutoCoresParallel_Params<vertex_idx_t<Graph_t>, v_workw_t<Graph_t>>())
+    GrowLocalAutoCoresParallel(GrowLocalAutoCoresParallel_Params<vertex_idx_t<Graph_t>, v_workw_t<Graph_t>> params_
+                               = GrowLocalAutoCoresParallel_Params<vertex_idx_t<Graph_t>, v_workw_t<Graph_t>>())
         : params(params_) {}
 
     /**
@@ -83,8 +83,12 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
      */
     virtual ~GrowLocalAutoCoresParallel() = default;
 
-    void computePartialSchedule(BspSchedule<Graph_t> &schedule, const std::vector<VertexType> &topOrder, const std::vector<VertexType> &posInTopOrder, const VertexType startNode, const VertexType endNode, unsigned &supstep) const {
-
+    void computePartialSchedule(BspSchedule<Graph_t> &schedule,
+                                const std::vector<VertexType> &topOrder,
+                                const std::vector<VertexType> &posInTopOrder,
+                                const VertexType startNode,
+                                const VertexType endNode,
+                                unsigned &supstep) const {
 #ifdef TIME_THREADS_GROW_LOCAL_PARALLEL
         double startTime = omp_get_wtime();
 #endif
@@ -255,8 +259,9 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
                         } else if (readyIter != ready.end()) {
                             chosen_node = *readyIter;
                             readyIter++;
-                        } else
+                        } else {
                             break;
+                        }
 
                         new_assignments[proc].push_back(chosen_node);
                         schedule.setAssignedProcessor(chosen_node, proc);
@@ -310,13 +315,15 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
 
                 bool accept_step = false;
 
-                double score = static_cast<double>(total_weight_assigned) / static_cast<double>(weight_limit + instance.synchronisationCosts());
+                double score = static_cast<double>(total_weight_assigned)
+                               / static_cast<double>(weight_limit + instance.synchronisationCosts());
                 double parallelism = 0;
                 if (weight_limit > 0) {
                     parallelism = static_cast<double>(total_weight_assigned) / static_cast<double>(weight_limit);
                 }
 
-                if (score > 0.97 * best_score) { // It is possible to make this less strict, i.e. score > 0.98 * best_score. The purpose of this would be to encourage larger supersteps.
+                if (score > 0.97 * best_score) {    // It is possible to make this less strict, i.e. score > 0.98 * best_score.
+                                                    // The purpose of this would be to encourage larger supersteps.
                     best_score = std::max(best_score, score);
                     best_parallelism = parallelism;
                     accept_step = true;
@@ -457,7 +464,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
                 }
             }
 
-            desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism) + (0.1 * static_cast<double>(P)); // weights should sum up to one
+            desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism)
+                                 + (0.1 * static_cast<double>(P));    // weights should sum up to one
 
             ++supstep;
         }
@@ -468,17 +476,26 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
         if (omp_get_thread_num() < 10) {
             padd = " ";
         }
-        std::string outputString = "Thread: " + padd + std::to_string(omp_get_thread_num()) + "\t Time: " + std::to_string(endTime - startTime) + "\n";
+        std::string outputString
+            = "Thread: " + padd + std::to_string(omp_get_thread_num()) + "\t Time: " + std::to_string(endTime - startTime) + "\n";
         std::cout << outputString;
 #endif
     }
 
-    void incrementScheduleSupersteps(BspSchedule<Graph_t> &schedule, const VertexType startNode, const VertexType endNode, const unsigned incr) const {
+    void incrementScheduleSupersteps(BspSchedule<Graph_t> &schedule,
+                                     const VertexType startNode,
+                                     const VertexType endNode,
+                                     const unsigned incr) const {
         for (VertexType node = startNode; node < endNode; node++) {
             schedule.setAssignedSuperstepNoUpdateNumSuperstep(node, schedule.assignedSuperstep(node) + incr);
         }
     }
-    void incrementScheduleSupersteps_TopOrder(BspSchedule<Graph_t> &schedule, const std::vector<VertexType> &topOrder, const VertexType startIndex, const VertexType endIndex, const unsigned incr) const {
+
+    void incrementScheduleSupersteps_TopOrder(BspSchedule<Graph_t> &schedule,
+                                              const std::vector<VertexType> &topOrder,
+                                              const VertexType startIndex,
+                                              const VertexType endIndex,
+                                              const unsigned incr) const {
         for (VertexType index = startIndex; index < endIndex; index++) {
             const VertexType node = topOrder[index];
             schedule.setAssignedSuperstepNoUpdateNumSuperstep(node, schedule.assignedSuperstep(node) + incr);
@@ -486,7 +503,6 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
     }
 
     RETURN_STATUS computeScheduleParallel(BspSchedule<Graph_t> &schedule, unsigned int numThreads) const {
-
         const BspInstance<Graph_t> &instance = schedule.getInstance();
         const Graph_t &graph = instance.getComputationalDag();
 
@@ -524,11 +540,13 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
             }
         }
 
-#pragma omp parallel num_threads(numThreads) default(none) shared(schedule, topOrder, posInTopOrder, superstepsThread, supstepIncr, numThreads, startNodes, incr)
+#pragma omp parallel num_threads(numThreads) default(none)                                                 \
+    shared(schedule, topOrder, posInTopOrder, superstepsThread, supstepIncr, numThreads, startNodes, incr)
         {
 #pragma omp for schedule(static, 1)
             for (unsigned thr = 0; thr < numThreads; thr++) {
-                computePartialSchedule(schedule, topOrder, posInTopOrder, startNodes[thr], startNodes[thr + 1], superstepsThread[thr * UnsignedPadding]);
+                computePartialSchedule(
+                    schedule, topOrder, posInTopOrder, startNodes[thr], startNodes[thr + 1], superstepsThread[thr * UnsignedPadding]);
             }
 
 #pragma omp master
@@ -547,7 +565,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
                 if constexpr (has_vertices_in_top_order_v<Graph_t>) {
                     incrementScheduleSupersteps(schedule, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]);
                 } else {
-                    incrementScheduleSupersteps_TopOrder(schedule, topOrder, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]);
+                    incrementScheduleSupersteps_TopOrder(
+                        schedule, topOrder, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]);
                 }
             }
         }
@@ -566,11 +585,11 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
      * @return A pair containing the return status and the computed BspSchedule.
      */
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         unsigned numThreads = params.numThreads;
         if (numThreads == 0) {
             // numThreads = static_cast<unsigned>(std::sqrt( static_cast<double>((schedule.getInstance().numberOfVertices() / 1000000)))) + 1;
-            numThreads = static_cast<unsigned>(std::log2(static_cast<double>((schedule.getInstance().numberOfVertices() / 1000)))) + 1;
+            numThreads
+                = static_cast<unsigned>(std::log2(static_cast<double>((schedule.getInstance().numberOfVertices() / 1000)))) + 1;
         }
         numThreads = std::min(numThreads, params.maxNumThreads);
         if (numThreads == 0) {
@@ -590,4 +609,4 @@ class GrowLocalAutoCoresParallel : public Scheduler<Graph_t> {
     virtual std::string getScheduleName() const override { return "GrowLocalAutoCoresParallel"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp b/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp
index ce1f0503..f56ed2f2 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp
@@ -29,19 +29,19 @@ namespace osp {
  * This trait checks if a type has the required methods for a memory constraint.
  *
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_memory_constraint : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_memory_constraint<
-    T, std::void_t<decltype(std::declval<T>().initialize(std::declval<BspInstance<typename T::Graph_impl_t>>())),
-                   decltype(std::declval<T>().can_add(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(),
-                                                      std::declval<unsigned>())),
-                   decltype(std::declval<T>().add(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(),
-                                                  std::declval<unsigned>())),
-                   decltype(std::declval<T>().reset(std::declval<unsigned>())), decltype(T())>> : std::true_type {};
-
-template<typename T>
+    T,
+    std::void_t<decltype(std::declval<T>().initialize(std::declval<BspInstance<typename T::Graph_impl_t>>())),
+                decltype(std::declval<T>().can_add(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().add(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().reset(std::declval<unsigned>())),
+                decltype(T())>> : std::true_type {};
+
+template <typename T>
 inline constexpr bool is_memory_constraint_v = is_memory_constraint<T>::value;
 
 /**
@@ -57,9 +57,8 @@ struct no_memory_constraint {
  *
  * @tparam Graph_t The graph type.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct local_memory_constraint {
-
     using Graph_impl_t = Graph_t;
 
     const BspInstance<Graph_t> *instance;
@@ -78,20 +77,19 @@ struct local_memory_constraint {
     }
 
     inline bool can_add(const vertex_idx_t<Graph_t> &v, const unsigned proc) const {
-        return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) <=
-               instance->getArchitecture().memoryBound(proc);
+        return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v)
+               <= instance->getArchitecture().memoryBound(proc);
     }
 
     inline void add(const vertex_idx_t<Graph_t> &v, const unsigned proc) {
         current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v);
     }
 
-    inline bool can_add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight,
-                        const v_memw_t<Graph_t>&) const {
+    inline bool can_add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight, const v_memw_t<Graph_t> &) const {
         return current_proc_memory[proc] + custom_mem_weight <= instance->getArchitecture().memoryBound(proc);
     }
 
-    inline void add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight, const v_memw_t<Graph_t>&) {
+    inline void add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight, const v_memw_t<Graph_t> &) {
         current_proc_memory[proc] += custom_mem_weight;
     }
 
@@ -109,12 +107,10 @@ struct local_memory_constraint {
  *
  * @tparam Graph_t The graph type.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct persistent_transient_memory_constraint {
-
-    static_assert(
-        std::is_convertible_v<v_commw_t<Graph_t>, v_memw_t<Graph_t>>,
-        "persistent_transient_memory_constraint requires that memory and communication weights are convertible.");
+    static_assert(std::is_convertible_v<v_commw_t<Graph_t>, v_memw_t<Graph_t>>,
+                  "persistent_transient_memory_constraint requires that memory and communication weights are convertible.");
 
     using Graph_impl_t = Graph_t;
 
@@ -137,31 +133,26 @@ struct persistent_transient_memory_constraint {
     }
 
     inline bool can_add(const vertex_idx_t<Graph_t> &v, const unsigned proc) const {
-
-        return (
-            current_proc_persistent_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) +
-                std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v)) <=
-            instance->getArchitecture().memoryBound(proc));
+        return (current_proc_persistent_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v)
+                    + std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v))
+                <= instance->getArchitecture().memoryBound(proc));
     }
 
     inline void add(const vertex_idx_t<Graph_t> &v, const unsigned proc) {
-
         current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v);
-        current_proc_transient_memory[proc] =
-            std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v));
+        current_proc_transient_memory[proc]
+            = std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v));
     }
 
-    inline bool can_add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight,
+    inline bool can_add(const unsigned proc,
+                        const v_memw_t<Graph_t> &custom_mem_weight,
                         const v_commw_t<Graph_t> &custom_comm_weight) const {
-
-        return (current_proc_persistent_memory[proc] + custom_mem_weight +
-                    std::max(current_proc_transient_memory[proc], custom_comm_weight) <=
-                instance->getArchitecture().memoryBound(proc));
+        return (current_proc_persistent_memory[proc] + custom_mem_weight
+                    + std::max(current_proc_transient_memory[proc], custom_comm_weight)
+                <= instance->getArchitecture().memoryBound(proc));
     }
 
-    inline void add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight,
-                    const v_commw_t<Graph_t> &custom_comm_weight ) {
-
+    inline void add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight, const v_commw_t<Graph_t> &custom_comm_weight) {
         current_proc_persistent_memory[proc] += custom_mem_weight;
         current_proc_transient_memory[proc] = std::max(current_proc_transient_memory[proc], custom_comm_weight);
     }
@@ -169,9 +160,8 @@ struct persistent_transient_memory_constraint {
     inline void reset(const unsigned) {}
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct global_memory_constraint {
-
     using Graph_impl_t = Graph_t;
 
     const BspInstance<Graph_t> *instance;
@@ -190,16 +180,15 @@ struct global_memory_constraint {
     }
 
     inline bool can_add(const vertex_idx_t<Graph_t> &v, const unsigned proc) const {
-        return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) <=
-               instance->getArchitecture().memoryBound(proc);
+        return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v)
+               <= instance->getArchitecture().memoryBound(proc);
     }
 
     inline void add(const vertex_idx_t<Graph_t> &v, const unsigned proc) {
         current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v);
     }
 
-    inline bool can_add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight,
-                        const v_commw_t<Graph_t> &) const {
+    inline bool can_add(const unsigned proc, const v_memw_t<Graph_t> &custom_mem_weight, const v_commw_t<Graph_t> &) const {
         return current_proc_memory[proc] + custom_mem_weight <= instance->getArchitecture().memoryBound(proc);
     }
 
@@ -210,25 +199,23 @@ struct global_memory_constraint {
     inline void reset(const unsigned) {}
 };
 
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_memory_constraint_schedule : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_memory_constraint_schedule<
-    T, std::void_t<decltype(std::declval<T>().initialize(std::declval<BspSchedule<typename T::Graph_impl_t>>(),
-                                                         std::declval<unsigned>())),
-                   decltype(std::declval<T>().can_add(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(),
-                                                      std::declval<unsigned>())),
-                   decltype(std::declval<T>().add(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(),
-                                                  std::declval<unsigned>())),
-                   decltype(std::declval<T>().reset(std::declval<unsigned>())), decltype(T())>> : std::true_type {};
-
-template<typename T>
+    T,
+    std::void_t<decltype(std::declval<T>().initialize(std::declval<BspSchedule<typename T::Graph_impl_t>>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().can_add(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().add(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().reset(std::declval<unsigned>())),
+                decltype(T())>> : std::true_type {};
+
+template <typename T>
 inline constexpr bool is_memory_constraint_schedule_v = is_memory_constraint_schedule<T>::value;
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct local_in_out_memory_constraint {
-
     static_assert(std::is_convertible_v<v_commw_t<Graph_t>, v_memw_t<Graph_t>>,
                   "local_in_out_memory_constraint requires that memory and communication weights are convertible.");
 
@@ -255,14 +242,12 @@ struct local_in_out_memory_constraint {
     }
 
     inline bool can_add(const vertex_idx_t<Graph_t> &v, const unsigned proc) const {
-
-        v_memw_t<Graph_t> inc_memory = instance->getComputationalDag().vertex_mem_weight(v) +
-                                       instance->getComputationalDag().vertex_comm_weight(v);
+        v_memw_t<Graph_t> inc_memory
+            = instance->getComputationalDag().vertex_mem_weight(v) + instance->getComputationalDag().vertex_comm_weight(v);
 
         for (const auto &pred : instance->getComputationalDag().parents(v)) {
-
-            if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v) &&
-                schedule->assignedSuperstep(pred) == *current_superstep) {
+            if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v)
+                && schedule->assignedSuperstep(pred) == *current_superstep) {
                 inc_memory -= instance->getComputationalDag().vertex_comm_weight(pred);
             }
         }
@@ -271,14 +256,12 @@ struct local_in_out_memory_constraint {
     }
 
     inline void add(const vertex_idx_t<Graph_t> &v, const unsigned proc) {
-
-        current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v) +
-                                     instance->getComputationalDag().vertex_comm_weight(v);
+        current_proc_memory[proc]
+            += instance->getComputationalDag().vertex_mem_weight(v) + instance->getComputationalDag().vertex_comm_weight(v);
 
         for (const auto &pred : instance->getComputationalDag().parents(v)) {
-
-            if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v) &&
-                schedule->assignedSuperstep(pred) == *current_superstep) {
+            if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v)
+                && schedule->assignedSuperstep(pred) == *current_superstep) {
                 current_proc_memory[proc] -= instance->getComputationalDag().vertex_comm_weight(pred);
             }
         }
@@ -287,9 +270,8 @@ struct local_in_out_memory_constraint {
     inline void reset(const unsigned proc) { current_proc_memory[proc] = 0; }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct local_inc_edges_memory_constraint {
-
     using Graph_impl_t = Graph_t;
 
     const BspInstance<Graph_t> *instance;
@@ -316,13 +298,11 @@ struct local_inc_edges_memory_constraint {
     }
 
     inline bool can_add(const vertex_idx_t<Graph_t> &v, const unsigned proc) const {
-
         v_commw_t<Graph_t> inc_memory = instance->getComputationalDag().vertex_comm_weight(v);
 
         for (const auto &pred : instance->getComputationalDag().parents(v)) {
-
-            if (schedule->assignedSuperstep(pred) != *current_superstep &&
-                current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) {
+            if (schedule->assignedSuperstep(pred) != *current_superstep
+                && current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) {
                 inc_memory += instance->getComputationalDag().vertex_comm_weight(pred);
             }
         }
@@ -331,11 +311,9 @@ struct local_inc_edges_memory_constraint {
     }
 
     inline void add(const vertex_idx_t<Graph_t> &v, const unsigned proc) {
-
         current_proc_memory[proc] += instance->getComputationalDag().vertex_comm_weight(v);
 
         for (const auto &pred : instance->getComputationalDag().parents(v)) {
-
             if (schedule->assignedSuperstep(pred) != *current_superstep) {
                 const auto pair = current_proc_predec[proc].insert(pred);
                 if (pair.second) {
@@ -351,12 +329,10 @@ struct local_inc_edges_memory_constraint {
     }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct local_sources_inc_edges_memory_constraint {
-
-    static_assert(
-        std::is_convertible_v<v_commw_t<Graph_t>, v_memw_t<Graph_t>>,
-        "local_sources_inc_edges_memory_constraint requires that memory and communication weights are convertible.");
+    static_assert(std::is_convertible_v<v_commw_t<Graph_t>, v_memw_t<Graph_t>>,
+                  "local_sources_inc_edges_memory_constraint requires that memory and communication weights are convertible.");
 
     using Graph_impl_t = Graph_t;
 
@@ -384,7 +360,6 @@ struct local_sources_inc_edges_memory_constraint {
     }
 
     inline bool can_add(const vertex_idx_t<Graph_t> &v, const unsigned proc) const {
-
         v_memw_t<Graph_t> inc_memory = 0;
 
         if (is_source(v, instance->getComputationalDag())) {
@@ -392,9 +367,8 @@ struct local_sources_inc_edges_memory_constraint {
         }
 
         for (const auto &pred : instance->getComputationalDag().parents(v)) {
-
-            if (schedule->assignedSuperstep(v) != *current_superstep &&
-                current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) {
+            if (schedule->assignedSuperstep(v) != *current_superstep
+                && current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) {
                 inc_memory += instance->getComputationalDag().vertex_comm_weight(pred);
             }
         }
@@ -403,13 +377,11 @@ struct local_sources_inc_edges_memory_constraint {
     }
 
     inline void add(const vertex_idx_t<Graph_t> &v, const unsigned proc) {
-
         if (is_source(v, instance->getComputationalDag())) {
             current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v);
         }
 
         for (const auto &pred : instance->getComputationalDag().parents(v)) {
-
             if (schedule->assignedSuperstep(pred) != *current_superstep) {
                 const auto pair = current_proc_predec[proc].insert(pred);
                 if (pair.second) {
@@ -425,4 +397,4 @@ struct local_sources_inc_edges_memory_constraint {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp b/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp
index bf7e160c..795290fd 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp
@@ -28,20 +28,17 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class RandomGreedy : public Scheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "RandomGreedy can only be used with computational DAGs.");
 
   private:
     bool ensure_enough_sources;
 
   public:
-
     RandomGreedy(bool ensure_enough_sources_ = true) : Scheduler<Graph_t>(), ensure_enough_sources(ensure_enough_sources_) {};
 
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &sched) override {
-
         using VertexType = vertex_idx_t<Graph_t>;
 
         const auto &instance = sched.getInstance();
@@ -72,7 +69,6 @@ class RandomGreedy : public Scheduler<Graph_t> {
             bool few_sources = next.size() < instance.numberOfProcessors() ? true : false;
             unsigned fail_counter = 0;
             while (!next.empty() && fail_counter < 20) {
-
                 std::uniform_int_distribution<VertexType> rand_node_idx(0, next.size() - 1);
                 VertexType node_ind = rand_node_idx(g);
                 const auto &node = next[node_ind];
@@ -81,14 +77,12 @@ class RandomGreedy : public Scheduler<Graph_t> {
                 unsigned processor_to_be_allocated = 0;
 
                 for (const auto &par : graph.parents(node)) {
-                    if (processor_set &&
-                        (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend()) &&
-                        (sched.assignedProcessor(par) != processor_to_be_allocated)) {
+                    if (processor_set && (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend())
+                        && (sched.assignedProcessor(par) != processor_to_be_allocated)) {
                         failed_to_allocate = true;
                         break;
                     }
-                    if ((!processor_set) &&
-                        (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend())) {
+                    if ((!processor_set) && (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend())) {
                         processor_set = true;
                         processor_to_be_allocated = sched.assignedProcessor(par);
                     }
@@ -108,8 +102,7 @@ class RandomGreedy : public Scheduler<Graph_t> {
 
                     assert(std::distance(processor_weights.begin(), min_iter) >= 0);
 
-                    sched.setAssignedProcessor(
-                        node, static_cast<unsigned>(std::distance(processor_weights.begin(), min_iter)));
+                    sched.setAssignedProcessor(node, static_cast<unsigned>(std::distance(processor_weights.begin(), min_iter)));
                 }
 
                 nodes_assigned_this_superstep.emplace(node);
@@ -127,8 +120,9 @@ class RandomGreedy : public Scheduler<Graph_t> {
                 next.erase(it);
                 next.insert(next.end(), new_nodes.cbegin(), new_nodes.cend());
 
-                if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors())
+                if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors()) {
                     break;
+                }
             }
 
             superstep_counter++;
@@ -140,4 +134,4 @@ class RandomGreedy : public Scheduler<Graph_t> {
     std::string getScheduleName() const override { return ensure_enough_sources ? "RandomGreedyS" : "RandomGreedy"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp b/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp
index ff245d1d..91541ca6 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp
@@ -45,16 +45,15 @@ namespace osp {
  * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm.
  * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case.
  */
-template<typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
+template <typename Graph_t, typename MemoryConstraint_t = no_memory_constraint>
 class VarianceFillup : public Scheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "VarianceFillup can only be used with computational DAGs.");
 
   private:
     using VertexType = vertex_idx_t<Graph_t>;
 
-    constexpr static bool use_memory_constraint =
-        is_memory_constraint_v<MemoryConstraint_t> or is_memory_constraint_schedule_v<MemoryConstraint_t>;
+    constexpr static bool use_memory_constraint = is_memory_constraint_v<MemoryConstraint_t>
+                                                  or is_memory_constraint_schedule_v<MemoryConstraint_t>;
 
     static_assert(not use_memory_constraint or std::is_same_v<Graph_t, typename MemoryConstraint_t::Graph_impl_t>,
                   "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t.");
@@ -65,7 +64,6 @@ class VarianceFillup : public Scheduler<Graph_t> {
     bool increase_parallelism_in_new_superstep;
 
     std::vector<double> compute_work_variance(const Graph_t &graph) const {
-
         std::vector<double> work_variance(graph.num_vertices(), 0.0);
 
         const std::vector<VertexType> top_order = GetTopOrder(graph);
@@ -81,21 +79,19 @@ class VarianceFillup : public Scheduler<Graph_t> {
             }
             temp = std::log(temp) / 2 + max_priority;
 
-            double node_weight = std::log( static_cast<double>( std::max(graph.vertex_work_weight(*r_iter), static_cast<v_workw_t<Graph_t>>(1)) ) );
+            double node_weight
+                = std::log(static_cast<double>(std::max(graph.vertex_work_weight(*r_iter), static_cast<v_workw_t<Graph_t>>(1))));
             double larger_val = node_weight > temp ? node_weight : temp;
 
-            work_variance[*r_iter] =
-                std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val;
+            work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val;
         }
 
         return work_variance;
     }
 
-    std::vector<std::vector<std::vector<unsigned>>>
-    procTypesCompatibleWithNodeType_omit_procType(const BspInstance<Graph_t> &instance) const {
-
-        const std::vector<std::vector<unsigned>> procTypesCompatibleWithNodeType =
-            instance.getProcTypesCompatibleWithNodeType();
+    std::vector<std::vector<std::vector<unsigned>>> procTypesCompatibleWithNodeType_omit_procType(
+        const BspInstance<Graph_t> &instance) const {
+        const std::vector<std::vector<unsigned>> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType();
 
         std::vector<std::vector<std::vector<unsigned>>> procTypesCompatibleWithNodeType_skip(
             instance.getArchitecture().getNumberOfProcessorTypes(),
@@ -103,8 +99,9 @@ class VarianceFillup : public Scheduler<Graph_t> {
         for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) {
             for (unsigned nodeType = 0; nodeType < instance.getComputationalDag().num_vertex_types(); nodeType++) {
                 for (unsigned otherProcType : procTypesCompatibleWithNodeType[nodeType]) {
-                    if (procType == otherProcType)
+                    if (procType == otherProcType) {
                         continue;
+                    }
                     procTypesCompatibleWithNodeType_skip[procType][nodeType].emplace_back(otherProcType);
                 }
             }
@@ -119,17 +116,13 @@ class VarianceFillup : public Scheduler<Graph_t> {
         }
     };
 
-    bool check_mem_feasibility(
-        const BspInstance<Graph_t> &instance,
-        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
-        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady) const {
-
+    bool check_mem_feasibility(const BspInstance<Graph_t> &instance,
+                               const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
+                               const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady) const {
         if constexpr (use_memory_constraint) {
             if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) {
-
                 for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
                     if (!procReady[i].empty()) {
-
                         const std::pair<VertexType, double> &node_pair = *procReady[i].begin();
                         VertexType top_node = node_pair.first;
 
@@ -140,12 +133,11 @@ class VarianceFillup : public Scheduler<Graph_t> {
                 }
 
                 for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
-
-                    if (allReady[instance.getArchitecture().processorType(i)].empty())
+                    if (allReady[instance.getArchitecture().processorType(i)].empty()) {
                         continue;
+                    }
 
-                    const std::pair<VertexType, double> &node_pair =
-                        *allReady[instance.getArchitecture().processorType(i)].begin();
+                    const std::pair<VertexType, double> &node_pair = *allReady[instance.getArchitecture().processorType(i)].begin();
                     VertexType top_node = node_pair.first;
 
                     if (memory_constraint.can_add(top_node, i)) {
@@ -160,21 +152,22 @@ class VarianceFillup : public Scheduler<Graph_t> {
         return true;
     };
 
-    void
-    Choose(const BspInstance<Graph_t> &instance, const std::vector<double> &work_variance,
-           std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
-           std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady,
-           const std::vector<bool> &procFree, VertexType &node, unsigned &p, const bool endSupStep,
-           const v_workw_t<Graph_t> remaining_time,
-           const std::vector<std::vector<std::vector<unsigned>>> &procTypesCompatibleWithNodeType_skip_proctype) const {
-
+    void Choose(const BspInstance<Graph_t> &instance,
+                const std::vector<double> &work_variance,
+                std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
+                std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady,
+                const std::vector<bool> &procFree,
+                VertexType &node,
+                unsigned &p,
+                const bool endSupStep,
+                const v_workw_t<Graph_t> remaining_time,
+                const std::vector<std::vector<std::vector<unsigned>>> &procTypesCompatibleWithNodeType_skip_proctype) const {
         double maxScore = -1;
         for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
             if (procFree[i] && !procReady[i].empty()) {
                 // select node
                 for (auto node_pair_it = procReady[i].begin(); node_pair_it != procReady[i].end();) {
-                    if (endSupStep &&
-                        (remaining_time < instance.getComputationalDag().vertex_work_weight(node_pair_it->first))) {
+                    if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(node_pair_it->first))) {
                         node_pair_it = procReady[i].erase(node_pair_it);
                         continue;
                     }
@@ -207,31 +200,27 @@ class VarianceFillup : public Scheduler<Graph_t> {
                     const double &score = it->second;
 
                     if (score > maxScore) {
-
                         if constexpr (use_memory_constraint) {
-
                             if (memory_constraint.can_add(it->first, i)) {
-
                                 node = it->first;
                                 p = i;
 
                                 allReady[instance.getArchitecture().processorType(i)].erase(it);
-                                for (unsigned procType : procTypesCompatibleWithNodeType_skip_proctype
-                                         [instance.getArchitecture().processorType(i)]
-                                         [instance.getComputationalDag().vertex_type(node)]) {
+                                for (unsigned procType :
+                                     procTypesCompatibleWithNodeType_skip_proctype[instance.getArchitecture().processorType(
+                                         i)][instance.getComputationalDag().vertex_type(node)]) {
                                     allReady[procType].erase(std::make_pair(node, work_variance[node]));
                                 }
                                 return;
                             }
                         } else {
-
                             node = it->first;
                             p = i;
 
                             allReady[instance.getArchitecture().processorType(i)].erase(it);
                             for (unsigned procType :
-                                 procTypesCompatibleWithNodeType_skip_proctype[instance.getArchitecture().processorType(
-                                     i)][instance.getComputationalDag().vertex_type(node)]) {
+                                 procTypesCompatibleWithNodeType_skip_proctype[instance.getArchitecture().processorType(i)]
+                                                                              [instance.getComputationalDag().vertex_type(node)]) {
                                 allReady[procType].erase(std::make_pair(node, work_variance[node]));
                             }
                             return;
@@ -247,13 +236,17 @@ class VarianceFillup : public Scheduler<Graph_t> {
                        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &allReady,
                        const std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> &procReady,
                        const std::vector<bool> &procFree) const {
-        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-            if (procFree[i] && !procReady[i].empty())
+        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+            if (procFree[i] && !procReady[i].empty()) {
                 return true;
+            }
+        }
 
-        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i)
-            if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty())
+        for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) {
+            if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) {
                 return true;
+            }
+        }
 
         return false;
     }
@@ -265,14 +258,16 @@ class VarianceFillup : public Scheduler<Graph_t> {
 
         std::vector<unsigned> ready_nodes_per_type = nr_ready_nodes_per_type;
         std::vector<unsigned> procs_per_type = nr_procs_per_type;
-        for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type)
-            for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type)
+        for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) {
+            for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) {
                 if (instance.isCompatibleType(node_type, proc_type)) {
                     unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]);
                     nr_nodes += matched;
                     ready_nodes_per_type[node_type] -= matched;
                     procs_per_type[proc_type] -= matched;
                 }
+            }
+        }
 
         return nr_nodes;
     }
@@ -299,7 +294,6 @@ class VarianceFillup : public Scheduler<Graph_t> {
      * @return A pair containing the return status and the computed BspSchedule.
      */
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         const auto &instance = schedule.getInstance();
 
         for (const auto &v : instance.getComputationalDag().vertices()) {
@@ -325,15 +319,15 @@ class VarianceFillup : public Scheduler<Graph_t> {
         std::vector<std::set<std::pair<VertexType, double>, VarianceCompare>> allReady(
             instance.getArchitecture().getNumberOfProcessorTypes());
 
-        const std::vector<std::vector<unsigned>> procTypesCompatibleWithNodeType =
-            instance.getProcTypesCompatibleWithNodeType();
-        const std::vector<std::vector<std::vector<unsigned>>> procTypesCompatibleWithNodeType_skip_proctype =
-            procTypesCompatibleWithNodeType_omit_procType(instance);
+        const std::vector<std::vector<unsigned>> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType();
+        const std::vector<std::vector<std::vector<unsigned>>> procTypesCompatibleWithNodeType_skip_proctype
+            = procTypesCompatibleWithNodeType_omit_procType(instance);
 
         std::vector<unsigned> nr_ready_nodes_per_type(G.num_vertex_types(), 0);
         std::vector<unsigned> nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < params_p; ++proc)
+        for (unsigned proc = 0; proc < params_p; ++proc) {
             ++nr_procs_per_type[instance.getArchitecture().processorType(proc)];
+        }
 
         std::vector<VertexType> nrPredecRemain(N);
         for (VertexType node = 0; node < N; node++) {
@@ -342,8 +336,9 @@ class VarianceFillup : public Scheduler<Graph_t> {
             if (num_parents == 0) {
                 ready.insert(std::make_pair(node, work_variances[node]));
                 ++nr_ready_nodes_per_type[G.vertex_type(node)];
-                for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)])
+                for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) {
                     allReady[procType].insert(std::make_pair(node, work_variances[node]));
+                }
             }
         }
 
@@ -364,14 +359,15 @@ class VarianceFillup : public Scheduler<Graph_t> {
                     }
                 }
 
-                for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes();
-                     ++procType)
+                for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType) {
                     allReady[procType].clear();
+                }
 
                 for (const auto &nodeAndValuePair : ready) {
                     const auto node = nodeAndValuePair.first;
-                    for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)])
+                    for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) {
                         allReady[procType].insert(allReady[procType].end(), nodeAndValuePair);
+                    }
                 }
 
                 ++supstepIdx;
@@ -396,21 +392,23 @@ class VarianceFillup : public Scheduler<Graph_t> {
 
                             bool canAdd = true;
                             for (const auto &pred : G.parents(succ)) {
-                                if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) &&
-                                    schedule.assignedSuperstep(pred) == supstepIdx)
+                                if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node)
+                                    && schedule.assignedSuperstep(pred) == supstepIdx) {
                                     canAdd = false;
+                                }
                             }
 
                             if constexpr (use_memory_constraint) {
-
                                 if (canAdd) {
-                                    if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node)))
+                                    if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) {
                                         canAdd = false;
+                                    }
                                 }
                             }
 
-                            if (!instance.isCompatible(succ, schedule.assignedProcessor(node)))
+                            if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) {
                                 canAdd = false;
+                            }
 
                             if (canAdd) {
                                 procReady[schedule.assignedProcessor(node)].emplace(succ, work_variances[succ]);
@@ -427,11 +425,18 @@ class VarianceFillup : public Scheduler<Graph_t> {
                 endSupStep = true;
             }
             while (CanChooseNode(instance, allReady, procReady, procFree)) {
-
                 VertexType nextNode = std::numeric_limits<VertexType>::max();
                 unsigned nextProc = params_p;
-                Choose(instance, work_variances, allReady, procReady, procFree, nextNode, nextProc, endSupStep,
-                       max_finish_time - time, procTypesCompatibleWithNodeType_skip_proctype);
+                Choose(instance,
+                       work_variances,
+                       allReady,
+                       procReady,
+                       procFree,
+                       nextNode,
+                       nextProc,
+                       endSupStep,
+                       max_finish_time - time,
+                       procTypesCompatibleWithNodeType_skip_proctype);
 
                 if (nextNode == std::numeric_limits<VertexType>::max() || nextProc == params_p) {
                     endSupStep = true;
@@ -465,19 +470,18 @@ class VarianceFillup : public Scheduler<Graph_t> {
             }
 
             if constexpr (use_memory_constraint) {
-
                 if (not check_mem_feasibility(instance, allReady, procReady)) {
-
                     return RETURN_STATUS::ERROR;
                 }
             }
 
-            if (free > params_p * max_percent_idle_processors &&
-                ((!increase_parallelism_in_new_superstep) ||
-                 get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) >=
-                     std::min(std::min(params_p, static_cast<unsigned>(1.2 * (params_p - free))),
-                              params_p - free + (static_cast<unsigned>(0.5 * free)))))
+            if (free > params_p * max_percent_idle_processors
+                && ((!increase_parallelism_in_new_superstep)
+                    || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type)
+                           >= std::min(std::min(params_p, static_cast<unsigned>(1.2 * (params_p - free))),
+                                       params_p - free + (static_cast<unsigned>(0.5 * free))))) {
                 endSupStep = true;
+            }
         }
 
         assert(schedule.satisfiesPrecedenceConstraints());
@@ -493,7 +497,6 @@ class VarianceFillup : public Scheduler<Graph_t> {
      * @return The name of the schedule.
      */
     virtual std::string getScheduleName() const override {
-
         if constexpr (use_memory_constraint) {
             return "VarianceGreedyFillupMemory";
         } else {
@@ -502,4 +505,4 @@ class VarianceFillup : public Scheduler<Graph_t> {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp
index 1b8d72b9..862c9bb2 100644
--- a/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp
+++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp
@@ -33,9 +33,8 @@ namespace osp {
  * a BSP schedule, with the assignment of vertices to processors and supersteps fixed.
  */
 
-template<typename Graph_t>
+template <typename Graph_t>
 class CoptCommScheduleOptimizer {
-
     static_assert(is_computational_dag_v<Graph_t>, "CoptFullScheduler can only be used with computational DAGs.");
 
     bool ignore_latency = false;
@@ -43,21 +42,19 @@ class CoptCommScheduleOptimizer {
     unsigned int timeLimitSeconds = 600;
 
   protected:
-
     VarArray superstep_has_comm;
     VarArray max_comm_superstep_var;
     std::vector<std::vector<std::vector<VarArray>>> comm_processor_to_processor_superstep_node_var;
 
-    void setupVariablesConstraintsObjective(const BspScheduleCS<Graph_t>& schedule, Model& model);
+    void setupVariablesConstraintsObjective(const BspScheduleCS<Graph_t> &schedule, Model &model);
 
-    void setInitialSolution(BspScheduleCS<Graph_t>& schedule, Model &model);
+    void setInitialSolution(BspScheduleCS<Graph_t> &schedule, Model &model);
 
     bool canShrinkResultingSchedule(unsigned number_of_supersteps) const;
 
-    void updateCommSchedule(BspScheduleCS<Graph_t>& schedule) const;
+    void updateCommSchedule(BspScheduleCS<Graph_t> &schedule) const;
 
   public:
-
     using KeyTriple = std::tuple<vertex_idx_t<Graph_t>, unsigned int, unsigned int>;
     virtual ~CoptCommScheduleOptimizer() = default;
 
@@ -66,14 +63,14 @@ class CoptCommScheduleOptimizer {
     virtual std::string getScheduleName() const { return "ILPCommunication"; }
 
     virtual void setTimeLimitSeconds(unsigned int limit) { timeLimitSeconds = limit; }
+
     inline unsigned int getTimeLimitSeconds() const { return timeLimitSeconds; }
+
     virtual void setIgnoreLatency(bool ignore_latency_) { ignore_latency = ignore_latency_; }
 };
 
-
-template<typename Graph_t>
-RETURN_STATUS CoptCommScheduleOptimizer<Graph_t>::improveSchedule(BspScheduleCS<Graph_t>& schedule) {
-
+template <typename Graph_t>
+RETURN_STATUS CoptCommScheduleOptimizer<Graph_t>::improveSchedule(BspScheduleCS<Graph_t> &schedule) {
     Envr env;
     Model model = env.CreateModel("bsp_schedule_cs");
 
@@ -86,11 +83,11 @@ RETURN_STATUS CoptCommScheduleOptimizer<Graph_t>::improveSchedule(BspScheduleCS<
 
     model.Solve();
 
-    if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL))
-    {
+    if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
         updateCommSchedule(schedule);
-        if (canShrinkResultingSchedule(schedule.numberOfSupersteps()))
-           schedule.shrinkByMergingSupersteps();
+        if (canShrinkResultingSchedule(schedule.numberOfSupersteps())) {
+            schedule.shrinkByMergingSupersteps();
+        }
     }
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
@@ -98,38 +95,37 @@ RETURN_STATUS CoptCommScheduleOptimizer<Graph_t>::improveSchedule(BspScheduleCS<
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
         return RETURN_STATUS::ERROR;
     } else {
-        if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL))
+        if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
             return RETURN_STATUS::BEST_FOUND;
-        else
+        } else {
             return RETURN_STATUS::TIMEOUT;
+        }
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 bool CoptCommScheduleOptimizer<Graph_t>::canShrinkResultingSchedule(unsigned number_of_supersteps) const {
-
     for (unsigned step = 0; step < number_of_supersteps - 1; step++) {
-
-        if (superstep_has_comm[static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) <= 0.01)
+        if (superstep_has_comm[static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) <= 0.01) {
             return true;
+        }
     }
     return false;
 }
 
-template<typename Graph_t>
-void CoptCommScheduleOptimizer<Graph_t>::updateCommSchedule(BspScheduleCS<Graph_t>& schedule) const {
-
-    std::map<KeyTriple, unsigned int>& cs = schedule.getCommunicationSchedule();
+template <typename Graph_t>
+void CoptCommScheduleOptimizer<Graph_t>::updateCommSchedule(BspScheduleCS<Graph_t> &schedule) const {
+    std::map<KeyTriple, unsigned int> &cs = schedule.getCommunicationSchedule();
     cs.clear();
 
     for (const auto &node : schedule.getInstance().vertices()) {
-
         for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) {
             for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) {
                 if (p_from != p_to) {
                     for (unsigned int step = 0; step < schedule.numberOfSupersteps(); step++) {
                         if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)].Get(
-                                COPT_DBLINFO_VALUE) >= .99) {
+                                COPT_DBLINFO_VALUE)
+                            >= .99) {
                             cs[std::make_tuple(node, p_from, p_to)] = step;
                         }
                     }
@@ -139,34 +135,32 @@ void CoptCommScheduleOptimizer<Graph_t>::updateCommSchedule(BspScheduleCS<Graph_
     }
 }
 
-template<typename Graph_t>
-void CoptCommScheduleOptimizer<Graph_t>::setInitialSolution(BspScheduleCS<Graph_t>& schedule, Model &model){
-
-    const Graph_t& DAG = schedule.getInstance().getComputationalDag();
-    const BspArchitecture<Graph_t>& arch = schedule.getInstance().getArchitecture();
-    const unsigned& num_processors = schedule.getInstance().numberOfProcessors();
-    const unsigned& num_supersteps = schedule.numberOfSupersteps();
+template <typename Graph_t>
+void CoptCommScheduleOptimizer<Graph_t>::setInitialSolution(BspScheduleCS<Graph_t> &schedule, Model &model) {
+    const Graph_t &DAG = schedule.getInstance().getComputationalDag();
+    const BspArchitecture<Graph_t> &arch = schedule.getInstance().getArchitecture();
+    const unsigned &num_processors = schedule.getInstance().numberOfProcessors();
+    const unsigned &num_supersteps = schedule.numberOfSupersteps();
     const auto &cs = schedule.getCommunicationSchedule();
 
-    std::vector<std::vector<unsigned> > first_at(DAG.num_vertices(), std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
-    for (const auto &node : DAG.vertices())
-            first_at[node][schedule.assignedProcessor(node)] = schedule.assignedSuperstep(node);
-
+    std::vector<std::vector<unsigned>> first_at(DAG.num_vertices(),
+                                                std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
     for (const auto &node : DAG.vertices()) {
+        first_at[node][schedule.assignedProcessor(node)] = schedule.assignedSuperstep(node);
+    }
 
+    for (const auto &node : DAG.vertices()) {
         for (unsigned p1 = 0; p1 < num_processors; p1++) {
-
             for (unsigned p2 = 0; p2 < num_processors; p2++) {
-
-                if(p1 == p2)
+                if (p1 == p2) {
                     continue;
+                }
 
                 for (unsigned step = 0; step < num_supersteps; step++) {
-
                     const auto &key = std::make_tuple(node, p1, p2);
                     if (cs.find(key) != cs.end() && cs.at(key) == step) {
                         model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node)], 1);
-                        first_at[node][p2] = std::min(first_at[node][p2], step+1);
+                        first_at[node][p2] = std::min(first_at[node][p2], step + 1);
                     } else {
                         model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node)], 0);
                     }
@@ -175,37 +169,38 @@ void CoptCommScheduleOptimizer<Graph_t>::setInitialSolution(BspScheduleCS<Graph_
         }
     }
 
-    for (const auto &node : DAG.vertices())
-        for (unsigned proc = 0; proc < num_processors; proc++)
-                for (unsigned step = 0; step < num_supersteps; step++)
-                {
-                    if(step >= first_at[node][proc])
-                        model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
-                                                                                        [static_cast<int>(node)], 1);
-                    else
-                        model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
-                                                                                        [static_cast<int>(node)], 0);
+    for (const auto &node : DAG.vertices()) {
+        for (unsigned proc = 0; proc < num_processors; proc++) {
+            for (unsigned step = 0; step < num_supersteps; step++) {
+                if (step >= first_at[node][proc]) {
+                    model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast<int>(node)], 1);
+                } else {
+                    model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast<int>(node)], 0);
                 }
+            }
+        }
+    }
 
-    if(!ignore_latency)
-    {
+    if (!ignore_latency) {
         std::vector<unsigned> comm_phase_used(num_supersteps, 0);
-        for (auto const &[key, val] : cs)
+        for (auto const &[key, val] : cs) {
             comm_phase_used[val] = 1;
-        for (unsigned step = 0; step < num_supersteps; step++)
+        }
+        for (unsigned step = 0; step < num_supersteps; step++) {
             model.SetMipStart(superstep_has_comm[static_cast<int>(step)], comm_phase_used[step]);
+        }
     }
 
     std::vector<std::vector<v_commw_t<Graph_t>>> send(num_supersteps, std::vector<v_commw_t<Graph_t>>(num_processors, 0));
     std::vector<std::vector<v_commw_t<Graph_t>>> rec(num_supersteps, std::vector<v_commw_t<Graph_t>>(num_processors, 0));
 
     for (const auto &[key, val] : cs) {
-        send[val][std::get<1>(key)] += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key));
+        send[val][std::get<1>(key)]
+            += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key));
         rec[val][std::get<2>(key)] += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key));
     }
 
     for (unsigned step = 0; step < num_supersteps; step++) {
-
         v_commw_t<Graph_t> max_comm = 0;
         for (unsigned proc = 0; proc < num_processors; proc++) {
             max_comm = std::max(max_comm, send[step][proc]);
@@ -219,9 +214,8 @@ void CoptCommScheduleOptimizer<Graph_t>::setInitialSolution(BspScheduleCS<Graph_
     model.SetIntParam(COPT_INTPARAM_MIPSTARTMODE, 2);
 }
 
-template<typename Graph_t>
-void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(const BspScheduleCS<Graph_t>& schedule, Model& model) {
-
+template <typename Graph_t>
+void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(const BspScheduleCS<Graph_t> &schedule, Model &model) {
     const unsigned &max_number_supersteps = schedule.numberOfSupersteps();
     const unsigned &num_processors = schedule.getInstance().numberOfProcessors();
     const unsigned num_vertices = static_cast<unsigned>(schedule.getInstance().numberOfVertices());
@@ -235,17 +229,14 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
 
     // communicate node from p1 to p2 at superstep
 
-    comm_processor_to_processor_superstep_node_var = std::vector<std::vector<std::vector<VarArray>>>(num_processors,
-                                        std::vector<std::vector<VarArray>>(num_processors,  std::vector<VarArray>(max_number_supersteps)));
+    comm_processor_to_processor_superstep_node_var = std::vector<std::vector<std::vector<VarArray>>>(
+        num_processors, std::vector<std::vector<VarArray>>(num_processors, std::vector<VarArray>(max_number_supersteps)));
 
     for (unsigned p1 = 0; p1 < num_processors; p1++) {
-
         for (unsigned p2 = 0; p2 < num_processors; p2++) {
-
             for (unsigned step = 0; step < max_number_supersteps; step++) {
-
-                comm_processor_to_processor_superstep_node_var[p1][p2][step] = model.AddVars(static_cast<int>(num_vertices),
-                                                        COPT_BINARY, "comm_processor_to_processor_superstep_node");
+                comm_processor_to_processor_superstep_node_var[p1][p2][step]
+                    = model.AddVars(static_cast<int>(num_vertices), COPT_BINARY, "comm_processor_to_processor_superstep_node");
             }
         }
     }
@@ -253,16 +244,12 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
     if (!ignore_latency) {
         unsigned M = num_processors * num_processors * num_vertices;
         for (unsigned int step = 0; step < schedule.numberOfSupersteps(); step++) {
-
             Expr expr;
 
             for (unsigned p1 = 0; p1 < num_processors; p1++) {
-
                 for (unsigned p2 = 0; p2 < num_processors; p2++) {
-
                     if (p1 != p2) {
                         for (unsigned node = 0; node < num_vertices; node++) {
-
                             expr += comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node)];
                         }
                     }
@@ -275,25 +262,25 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
     // precedence constraint: if task is computed then all of its predecessors must have been present
     // and vertex is present where it was computed
     for (unsigned node = 0; node < num_vertices; node++) {
-
         const unsigned &processor = schedule.assignedProcessor(node);
         const unsigned &superstep = schedule.assignedSuperstep(node);
         Expr expr;
         unsigned num_com_edges = 0;
         for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node)) {
-
             if (schedule.assignedProcessor(node) != schedule.assignedProcessor(pred)) {
                 num_com_edges += 1;
                 expr += comm_processor_to_processor_superstep_node_var[processor][processor][superstep][static_cast<int>(pred)];
 
                 model.AddConstr(
-                    comm_processor_to_processor_superstep_node_var[schedule.assignedProcessor(pred)][schedule.assignedProcessor(pred)]
-                                                                  [schedule.assignedSuperstep(pred)][static_cast<int>(pred)] == 1);
+                    comm_processor_to_processor_superstep_node_var[schedule.assignedProcessor(pred)][schedule.assignedProcessor(
+                        pred)][schedule.assignedSuperstep(pred)][static_cast<int>(pred)]
+                    == 1);
             }
         }
 
-        if (num_com_edges > 0)
+        if (num_com_edges > 0) {
             model.AddConstr(expr >= num_com_edges);
+        }
     }
 
     // combines two constraints: node can only be communicated if it is present; and node is present if it was computed
@@ -301,15 +288,15 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
     for (unsigned int step = 0; step < max_number_supersteps; step++) {
         for (unsigned int processor = 0; processor < num_processors; processor++) {
             for (unsigned int node = 0; node < num_vertices; node++) {
-
-                if (processor == schedule.assignedProcessor(node) && step >= schedule.assignedSuperstep(node))
+                if (processor == schedule.assignedProcessor(node) && step >= schedule.assignedSuperstep(node)) {
                     continue;
+                }
 
                 Expr expr1, expr2;
                 if (step > 0) {
-
                     for (unsigned int p_from = 0; p_from < num_processors; p_from++) {
-                        expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast<int>(node)];
+                        expr1
+                            += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast<int>(node)];
                     }
                 }
 
@@ -324,26 +311,23 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
 
     for (unsigned step = 0; step < max_number_supersteps; step++) {
         for (unsigned processor = 0; processor < num_processors; processor++) {
-
             Expr expr1, expr2;
             for (unsigned node = 0; node < num_vertices; node++) {
-
                 for (unsigned p_to = 0; p_to < num_processors; p_to++) {
                     if (processor != p_to) {
-                        expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node) *
-                                schedule.getInstance().sendCosts(processor, p_to) *
-                                comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast<int>(node)];
+                        expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node)
+                                 * schedule.getInstance().sendCosts(processor, p_to)
+                                 * comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast<int>(node)];
                     }
                 }
 
                 for (unsigned int p_from = 0; p_from < num_processors; p_from++) {
                     if (processor != p_from) {
-                        expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node) *
-                                schedule.getInstance().sendCosts(p_from, processor) *
-                                comm_processor_to_processor_superstep_node_var[p_from][processor][step][static_cast<int>(node)];
+                        expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node)
+                                 * schedule.getInstance().sendCosts(p_from, processor)
+                                 * comm_processor_to_processor_superstep_node_var[p_from][processor][step][static_cast<int>(node)];
                     }
                 }
-
             }
 
             model.AddConstr(max_comm_superstep_var[static_cast<int>(step)] >= expr1);
@@ -357,13 +341,11 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
     Expr expr;
 
     if (!ignore_latency) {
-
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
-            expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast<int>(step)] +
-                    schedule.getInstance().synchronisationCosts() * superstep_has_comm[static_cast<int>(step)];
+            expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast<int>(step)]
+                    + schedule.getInstance().synchronisationCosts() * superstep_has_comm[static_cast<int>(step)];
         }
     } else {
-
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
             expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast<int>(step)];
         }
@@ -371,4 +353,4 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
     model.SetObjective(expr - schedule.getInstance().synchronisationCosts(), COPT_MINIMIZE);
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
index 45b58ca3..fdd3f5c1 100644
--- a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
+++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
@@ -55,9 +55,8 @@ namespace osp {
  * supersteps, enable/disable writing intermediate solutions, and get information about the best gap, objective value,
  * and bound found by the solver.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class CoptFullScheduler : public Scheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "CoptFullScheduler can only be used with computational DAGs.");
 
   private:
@@ -77,7 +76,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
     std::string solution_file_prefix;
 
     class WriteSolutionCallback : public CallbackBase {
-
       private:
         unsigned counter;
         unsigned max_number_solution;
@@ -86,9 +84,15 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
       public:
         WriteSolutionCallback()
-            : counter(0), max_number_solution(500), best_obj(COPT_INFINITY), allow_recomputation_cb(false),
-              write_solutions_path_cb(""), solution_file_prefix_cb(""), instance_ptr(),
-              node_to_processor_superstep_var_ptr(), comm_processor_to_processor_superstep_node_var_ptr() {}
+            : counter(0),
+              max_number_solution(500),
+              best_obj(COPT_INFINITY),
+              allow_recomputation_cb(false),
+              write_solutions_path_cb(""),
+              solution_file_prefix_cb(""),
+              instance_ptr(),
+              node_to_processor_superstep_var_ptr(),
+              comm_processor_to_processor_superstep_node_var_ptr() {}
 
         bool allow_recomputation_cb;
         std::string write_solutions_path_cb;
@@ -99,54 +103,40 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         std::vector<std::vector<std::vector<VarArray>>> *comm_processor_to_processor_superstep_node_var_ptr;
 
         void callback() override {
-
-            if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution &&
-                GetIntInfo(COPT_CBINFO_HASINCUMBENT)) {
-
+            if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) {
                 try {
-
                     if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) {
-
                         best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ);
 
                         if (allow_recomputation_cb) {
-
                             auto sched = constructBspScheduleRecompFromCallback();
                             DotFileWriter sched_writer;
-                            sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
-                                                                   std::to_string(counter) + "_schedule.dot",
+                            sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb
+                                                                   + "_" + std::to_string(counter) + "_schedule.dot",
                                                                sched);
 
                         } else {
-
                             BspSchedule<Graph_t> sched = constructBspScheduleFromCallback();
                             DotFileWriter sched_writer;
-                            sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
-                                                            std::to_string(counter) + "_schedule.dot",
+                            sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_"
+                                                            + std::to_string(counter) + "_schedule.dot",
                                                         sched);
                         }
                         counter++;
                     }
 
-                } catch (const std::exception &e) {
-                }
+                } catch (const std::exception &e) {}
             }
         }
 
         BspScheduleCS<Graph_t> constructBspScheduleFromCallback() {
-
             BspScheduleCS<Graph_t> schedule(*instance_ptr);
 
             for (const auto &node : instance_ptr->vertices()) {
-
                 for (unsigned int processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) {
-
-                    for (unsigned step = 0;
-                         step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) {
-
-                        if (GetSolution(
-                                (*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)]) >=
-                            .99) {
+                    for (unsigned step = 0; step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size());
+                         step++) {
+                        if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)]) >= .99) {
                             schedule.setAssignedProcessor(node, processor);
                             schedule.setAssignedSuperstep(node, step);
                         }
@@ -155,16 +145,15 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             }
 
             for (const auto &node : instance_ptr->vertices()) {
-
                 for (unsigned int p_from = 0; p_from < instance_ptr->numberOfProcessors(); p_from++) {
                     for (unsigned int p_to = 0; p_to < instance_ptr->numberOfProcessors(); p_to++) {
                         if (p_from != p_to) {
                             for (int step = 0; step < (*node_to_processor_superstep_var_ptr)[0][0].Size(); step++) {
                                 if (GetSolution(
-                                        (*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][static_cast<
-                                            unsigned>(step)][static_cast<int>(node)]) >= .99) {
-                                    schedule.addCommunicationScheduleEntry(node, p_from, p_to,
-                                                                           static_cast<unsigned>(step));
+                                        (*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][static_cast<unsigned>(
+                                            step)][static_cast<int>(node)])
+                                    >= .99) {
+                                    schedule.addCommunicationScheduleEntry(node, p_from, p_to, static_cast<unsigned>(step));
                                 }
                             }
                         }
@@ -176,16 +165,13 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         }
 
         BspScheduleRecomp<Graph_t> constructBspScheduleRecompFromCallback() {
-
             unsigned number_of_supersteps = 0;
             BspScheduleRecomp<Graph_t> schedule(*instance_ptr);
 
             for (unsigned int node = 0; node < instance_ptr->numberOfVertices(); node++) {
-
                 for (unsigned int processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) {
-
-                    for (unsigned step = 0; step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) {
-
+                    for (unsigned step = 0; step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size());
+                         step++) {
                         if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)]) >= .99) {
                             schedule.assignments(node).emplace_back(processor, step);
 
@@ -200,15 +186,15 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             schedule.setNumberOfSupersteps(number_of_supersteps);
 
             for (unsigned int node = 0; node < instance_ptr->numberOfVertices(); node++) {
-
                 for (unsigned int p_from = 0; p_from < instance_ptr->numberOfProcessors(); p_from++) {
                     for (unsigned int p_to = 0; p_to < instance_ptr->numberOfProcessors(); p_to++) {
                         if (p_from != p_to) {
-                            for (unsigned step = 0; step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) {
-                                if (GetSolution(
-                                        (*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][step][static_cast<int>(node)]) >=
-                                    .99) {
-
+                            for (unsigned step = 0;
+                                 step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size());
+                                 step++) {
+                                if (GetSolution((*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][step]
+                                                                                                     [static_cast<int>(node)])
+                                    >= .99) {
                                     schedule.addCommunicationScheduleEntry(node, p_from, p_to, step);
                                 }
                             }
@@ -234,26 +220,20 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
     VarArray max_work_superstep_var;
 
     void constructBspScheduleFromSolution(BspScheduleCS<Graph_t> &schedule, bool cleanup_ = false) {
-
         const auto &instance = schedule.getInstance();
 
         unsigned number_of_supersteps = 0;
 
         for (unsigned step = 0; step < max_number_supersteps; step++) {
-
             if (superstep_used_var[static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
                 number_of_supersteps++;
             }
         }
 
         for (const auto &node : instance.vertices()) {
-
             for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
-
-                    if (node_to_processor_superstep_var[node][processor][static_cast<int>(step)].Get(
-                            COPT_DBLINFO_VALUE) >= .99) {
+                    if (node_to_processor_superstep_var[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
                         schedule.setAssignedProcessor(node, processor);
                         schedule.setAssignedSuperstep(node, step);
                     }
@@ -261,19 +241,19 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             }
         }
 
-        if (is_max_bsp && number_of_supersteps > 0) // can ignore last 2 comm phases in this case
+        if (is_max_bsp && number_of_supersteps > 0) {    // can ignore last 2 comm phases in this case
             --number_of_supersteps;
+        }
 
         schedule.getCommunicationSchedule().clear();
         for (const auto &node : instance.vertices()) {
-
             for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
                 for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
                     if (p_from != p_to) {
                         for (unsigned int step = 0; step < number_of_supersteps - 1; step++) {
-                            if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step]
-                                                                              [static_cast<int>(node)]
-                                                                                  .Get(COPT_DBLINFO_VALUE) >= .99) {
+                            if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)].Get(
+                                    COPT_DBLINFO_VALUE)
+                                >= .99) {
                                 schedule.addCommunicationScheduleEntry(node, p_from, p_to, step);
                             }
                         }
@@ -292,7 +272,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         unsigned number_of_supersteps = 0;
 
         for (unsigned step = 0; step < max_number_supersteps; step++) {
-
             if (superstep_used_var[static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
                 number_of_supersteps++;
             }
@@ -301,11 +280,8 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         schedule.setNumberOfSupersteps(number_of_supersteps);
 
         for (unsigned node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
-
             for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) {
-
                 for (unsigned step = 0; step < number_of_supersteps - 1; step++) {
-
                     if (node_to_processor_superstep_var[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
                         schedule.assignments(node).emplace_back(processor, step);
                     }
@@ -315,13 +291,13 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
         schedule.getCommunicationSchedule().clear();
         for (unsigned int node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
-
             for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) {
                 for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) {
                     if (p_from != p_to) {
                         for (unsigned int step = 0; step < max_number_supersteps; step++) {
                             if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)].Get(
-                                    COPT_DBLINFO_VALUE) >= .99) {
+                                    COPT_DBLINFO_VALUE)
+                                >= .99) {
                                 schedule.addCommunicationScheduleEntry(node, p_from, p_to, step);
                             }
                         }
@@ -337,36 +313,39 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
     }
 
     void loadInitialSchedule(Model &model, const BspInstance<Graph_t> &instance) {
-
-        if (use_initial_schedule_recomp &&
-            (max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() ||
-             instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() ||
-             instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) {
+        if (use_initial_schedule_recomp
+            && (max_number_supersteps < initial_schedule_recomp->numberOfSupersteps()
+                || instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors()
+                || instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) {
             throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not "
                                         "agree with those of the initial schedule's instance!");
         }
 
-        if (!use_initial_schedule_recomp & use_initial_schedule &&
-            (max_number_supersteps < initial_schedule->numberOfSupersteps() ||
-             instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
-             instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
+        if (!use_initial_schedule_recomp & use_initial_schedule
+            && (max_number_supersteps < initial_schedule->numberOfSupersteps()
+                || instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors()
+                || instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
             throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not "
                                         "agree with those of the initial schedule's instance!");
         }
 
-        const auto &DAG = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getComputationalDag() : initial_schedule->getInstance().getComputationalDag();
+        const auto &DAG = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getComputationalDag()
+                                                      : initial_schedule->getInstance().getComputationalDag();
 
-        const auto &arch = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getArchitecture() : initial_schedule->getInstance().getArchitecture();
+        const auto &arch = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getArchitecture()
+                                                       : initial_schedule->getInstance().getArchitecture();
 
-        const unsigned &num_processors = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().numberOfProcessors() : initial_schedule->getInstance().numberOfProcessors();
+        const unsigned &num_processors = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().numberOfProcessors()
+                                                                     : initial_schedule->getInstance().numberOfProcessors();
 
-        const unsigned &num_supersteps = use_initial_schedule_recomp ? initial_schedule_recomp->numberOfSupersteps() : initial_schedule->numberOfSupersteps();
+        const unsigned &num_supersteps = use_initial_schedule_recomp ? initial_schedule_recomp->numberOfSupersteps()
+                                                                     : initial_schedule->numberOfSupersteps();
 
-        const auto &cs = use_initial_schedule_recomp ? initial_schedule_recomp->getCommunicationSchedule() : initial_schedule->getCommunicationSchedule();
+        const auto &cs = use_initial_schedule_recomp ? initial_schedule_recomp->getCommunicationSchedule()
+                                                     : initial_schedule->getCommunicationSchedule();
 
         assert(max_number_supersteps <= static_cast<unsigned>(std::numeric_limits<int>::max()));
         for (unsigned step = 0; step < max_number_supersteps; step++) {
-
             if (step < num_supersteps) {
                 model.SetMipStart(superstep_used_var[static_cast<int>(step)], 1);
 
@@ -380,49 +359,43 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
         std::vector<std::set<std::pair<unsigned, unsigned>>> computed(DAG.num_vertices());
         for (const auto &node : DAG.vertices()) {
-            if (use_initial_schedule_recomp)
-                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node))
+            if (use_initial_schedule_recomp) {
+                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node)) {
                     computed[node].emplace(assignment);
-            else
+                }
+            } else {
                 computed[node].emplace(initial_schedule->assignedProcessor(node), initial_schedule->assignedSuperstep(node));
+            }
         }
 
-        std::vector<std::vector<unsigned>> first_at(DAG.num_vertices(), std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
+        std::vector<std::vector<unsigned>> first_at(DAG.num_vertices(),
+                                                    std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
         for (const auto &node : DAG.vertices()) {
             if (use_initial_schedule_recomp) {
-                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node))
+                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node)) {
                     first_at[node][assignment.first] = std::min(first_at[node][assignment.first], assignment.second);
+                }
             } else {
-                first_at[node][initial_schedule->assignedProcessor(node)] = std::min(first_at[node][initial_schedule->assignedProcessor(node)],
-                                                                                     initial_schedule->assignedSuperstep(node));
+                first_at[node][initial_schedule->assignedProcessor(node)] = std::min(
+                    first_at[node][initial_schedule->assignedProcessor(node)], initial_schedule->assignedSuperstep(node));
             }
         }
 
         unsigned staleness = is_max_bsp ? 2 : 1;
         for (const auto &node : DAG.vertices()) {
-
             for (unsigned p1 = 0; p1 < num_processors; p1++) {
-
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
-
                     for (unsigned p2 = 0; p2 < num_processors; p2++) {
-
                         if (p1 != p2) {
-
                             const auto &key = std::make_tuple(node, p1, p2);
                             if (cs.find(key) != cs.end()) {
-
                                 if (cs.at(key) == step) {
                                     model.SetMipStart(
-                                        comm_processor_to_processor_superstep_node_var[p1][p2][step]
-                                                                                      [static_cast<int>(node)],
-                                        1);
+                                        comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node)], 1);
                                     first_at[node][p2] = std::min(first_at[node][p2], step + staleness);
                                 } else {
                                     model.SetMipStart(
-                                        comm_processor_to_processor_superstep_node_var[p1][p2][step]
-                                                                                      [static_cast<int>(node)],
-                                        0);
+                                        comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node)], 0);
                                 }
                             }
                         }
@@ -431,39 +404,35 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             }
         }
 
-        for (const auto &node : DAG.vertices())
-            for (unsigned proc = 0; proc < num_processors; proc++)
+        for (const auto &node : DAG.vertices()) {
+            for (unsigned proc = 0; proc < num_processors; proc++) {
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
-                    if (step >= first_at[node][proc])
-                        model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
-                                                                                        [static_cast<int>(node)],
-                                          1);
-                    else
-                        model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
-                                                                                        [static_cast<int>(node)],
-                                          0);
+                    if (step >= first_at[node][proc]) {
+                        model.SetMipStart(
+                            comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast<int>(node)], 1);
+                    } else {
+                        model.SetMipStart(
+                            comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast<int>(node)], 0);
+                    }
                 }
+            }
+        }
 
         for (const auto &node : DAG.vertices()) {
-
             for (unsigned proc = 0; proc < num_processors; proc++) {
-
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
-
                     if (computed[node].find(std::make_pair(proc, step)) != computed[node].end()) {
                         model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast<int>(step)], 1);
 
                     } else {
-
                         model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast<int>(step)], 0);
                     }
                 }
             }
         }
 
-        std::vector<std::vector<v_workw_t<Graph_t>>> work(
-            max_number_supersteps,
-            std::vector<v_workw_t<Graph_t>>(num_processors, 0));
+        std::vector<std::vector<v_workw_t<Graph_t>>> work(max_number_supersteps,
+                                                          std::vector<v_workw_t<Graph_t>>(num_processors, 0));
 
         if (use_initial_schedule_recomp) {
             for (const auto &node : initial_schedule_recomp->getInstance().vertices()) {
@@ -472,28 +441,23 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
                 }
             }
         } else {
-            for (const auto &node : initial_schedule->getInstance().vertices())
-                work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] +=
-                    DAG.vertex_work_weight(node);
+            for (const auto &node : initial_schedule->getInstance().vertices()) {
+                work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)]
+                    += DAG.vertex_work_weight(node);
+            }
         }
 
-        std::vector<std::vector<v_commw_t<Graph_t>>> send(
-            max_number_supersteps,
-            std::vector<v_commw_t<Graph_t>>(num_processors, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> send(max_number_supersteps,
+                                                          std::vector<v_commw_t<Graph_t>>(num_processors, 0));
 
-        std::vector<std::vector<v_commw_t<Graph_t>>> rec(
-            max_number_supersteps,
-            std::vector<v_commw_t<Graph_t>>(num_processors, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> rec(max_number_supersteps, std::vector<v_commw_t<Graph_t>>(num_processors, 0));
 
         for (const auto &[key, val] : cs) {
+            send[val][std::get<1>(key)]
+                += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key));
 
-            send[val][std::get<1>(key)] +=
-                DAG.vertex_comm_weight(std::get<0>(key)) *
-                arch.sendCosts(std::get<1>(key), std::get<2>(key));
-
-            rec[val][std::get<2>(key)] +=
-                DAG.vertex_comm_weight(std::get<0>(key)) *
-                arch.sendCosts(std::get<1>(key), std::get<2>(key));
+            rec[val][std::get<2>(key)]
+                += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key));
         }
 
         for (unsigned step = 0; step < max_number_supersteps; step++) {
@@ -523,7 +487,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
     }
 
     void setupVariablesConstraintsObjective(const BspInstance<Graph_t> &instance, Model &model) {
-
         /*
        Variables
        */
@@ -539,7 +502,8 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             // variables indicating if there is any communication in superstep
             superstep_has_comm = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_has_comm");
             // variables that incentivize the schedule to be continuous - needs to be done differently for maxBsp
-            mergeable_superstep_penalty = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "mergeable_superstep_penalty");
+            mergeable_superstep_penalty
+                = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "mergeable_superstep_penalty");
         }
 
         // variables for assigments of nodes to processor and superstep
@@ -547,11 +511,9 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             instance.numberOfVertices(), std::vector<VarArray>(instance.numberOfProcessors()));
 
         for (const auto &node : instance.vertices()) {
-
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
-                node_to_processor_superstep_var[node][processor] =
-                    model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep");
+                node_to_processor_superstep_var[node][processor]
+                    = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep");
             }
         }
 
@@ -559,14 +521,12 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         Constraints
           */
         if (use_memory_constraint) {
-
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
-
                     Expr expr;
                     for (const auto &node : instance.vertices()) {
-                        expr += node_to_processor_superstep_var[node][processor][static_cast<int>(step)] *
-                                instance.getComputationalDag().vertex_mem_weight(node);
+                        expr += node_to_processor_superstep_var[node][processor][static_cast<int>(step)]
+                                * instance.getComputationalDag().vertex_mem_weight(node);
                     }
 
                     model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor));
@@ -583,24 +543,20 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
         // superstep is used at all
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
-
             Expr expr;
             for (const auto &node : instance.vertices()) {
-
                 for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
                     expr += node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
                 }
             }
-            model.AddConstr(expr <= static_cast<double>(instance.numberOfVertices() * instance.numberOfProcessors()) *
-                                        superstep_used_var[static_cast<int>(step)]);
+            model.AddConstr(expr <= static_cast<double>(instance.numberOfVertices() * instance.numberOfProcessors())
+                                        * superstep_used_var[static_cast<int>(step)]);
         }
 
         // nodes are assigend depending on whether recomputation is allowed or not
         for (const auto &node : instance.vertices()) {
-
             Expr expr;
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 for (unsigned int step = 0; step < max_number_supersteps; step++) {
                     expr += node_to_processor_superstep_var[node][processor].GetVar(static_cast<int>(step));
                 }
@@ -608,42 +564,36 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
             model.AddConstr(allow_recomputation ? expr >= .99 : expr == 1);
         }
-        if (allow_recomputation)
+        if (allow_recomputation) {
             std::cout << "setting up constraints with recomputation: " << allow_recomputation << std::endl;
+        }
 
         comm_processor_to_processor_superstep_node_var = std::vector<std::vector<std::vector<VarArray>>>(
             instance.numberOfProcessors(),
-            std::vector<std::vector<VarArray>>(instance.numberOfProcessors(),
-                                               std::vector<VarArray>(max_number_supersteps)));
+            std::vector<std::vector<VarArray>>(instance.numberOfProcessors(), std::vector<VarArray>(max_number_supersteps)));
 
         for (unsigned int p1 = 0; p1 < instance.numberOfProcessors(); p1++) {
-
             for (unsigned int p2 = 0; p2 < instance.numberOfProcessors(); p2++) {
                 for (unsigned int step = 0; step < max_number_supersteps; step++) {
-
-                    comm_processor_to_processor_superstep_node_var[p1][p2][step] =
-                        model.AddVars(static_cast<int>(instance.numberOfVertices()), COPT_BINARY,
-                                      "comm_processor_to_processor_superstep_node");
+                    comm_processor_to_processor_superstep_node_var[p1][p2][step] = model.AddVars(
+                        static_cast<int>(instance.numberOfVertices()), COPT_BINARY, "comm_processor_to_processor_superstep_node");
                 }
             }
         }
 
         // precedence constraint: if task is computed then all of its predecessors must have been present
         for (const auto &node : instance.vertices()) {
-
             if (instance.getComputationalDag().in_degree(node) > 0) {
                 for (unsigned int step = 0; step < max_number_supersteps; step++) {
                     for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                         Expr expr;
                         for (const auto &parent : instance.getComputationalDag().parents(node)) {
                             expr += comm_processor_to_processor_superstep_node_var[processor][processor][step]
                                                                                   [static_cast<int>(parent)];
                         }
 
-                        model.AddConstr(expr >=
-                                        static_cast<double>(instance.getComputationalDag().in_degree(node)) *
-                                            node_to_processor_superstep_var[node][processor][static_cast<int>(step)]);
+                        model.AddConstr(expr >= static_cast<double>(instance.getComputationalDag().in_degree(node))
+                                                    * node_to_processor_superstep_var[node][processor][static_cast<int>(step)]);
                     }
                 }
             }
@@ -654,10 +604,8 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
                 for (const auto &node : instance.vertices()) {
-
                     Expr expr1, expr2;
                     if (step > 0) {
-
                         for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
                             if (!is_max_bsp || p_from == processor) {
                                 expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1]
@@ -672,8 +620,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
                     expr1 += node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
 
                     for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
-                        expr2 += comm_processor_to_processor_superstep_node_var[processor][p_to][step]
-                                                                               [static_cast<int>(node)];
+                        expr2 += comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast<int>(node)];
                     }
 
                     model.AddConstr(instance.numberOfProcessors() * (expr1) >= expr2);
@@ -688,36 +635,38 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
                 for (const auto &node : instance.vertices()) {
                     for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
                         for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
-                            if (p_from != p_to)
+                            if (p_from != p_to) {
                                 expr += comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)];
+                            }
                         }
                     }
                 }
-                model.AddConstr(static_cast<unsigned>(instance.numberOfProcessors() * instance.numberOfProcessors() * instance.numberOfVertices()) *
-                                    superstep_has_comm[static_cast<int>(step)] >=
-                                expr);
+                model.AddConstr(static_cast<unsigned>(instance.numberOfProcessors() * instance.numberOfProcessors()
+                                                      * instance.numberOfVertices())
+                                    * superstep_has_comm[static_cast<int>(step)]
+                                >= expr);
             }
 
             // if step i and (i+1) has no comm, and (i+2) has work, then (i+1) and (i+2) are mergeable -> penalize
-            for (unsigned int step = 0; step < max_number_supersteps - 2; step++)
-                model.AddConstr(superstep_used_var[static_cast<int>(step + 2)] - superstep_has_comm[static_cast<int>(step)] - superstep_has_comm[static_cast<int>(step + 1)] <= mergeable_superstep_penalty[static_cast<int>(step)]);
+            for (unsigned int step = 0; step < max_number_supersteps - 2; step++) {
+                model.AddConstr(superstep_used_var[static_cast<int>(step + 2)] - superstep_has_comm[static_cast<int>(step)]
+                                    - superstep_has_comm[static_cast<int>(step + 1)]
+                                <= mergeable_superstep_penalty[static_cast<int>(step)]);
+            }
         }
 
-        max_comm_superstep_var =
-            model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_comm_superstep");
+        max_comm_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_comm_superstep");
         // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_comm_superstep");
 
-        max_work_superstep_var =
-            model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_work_superstep");
+        max_work_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_work_superstep");
         // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_work_superstep");
 
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 Expr expr;
                 for (unsigned int node = 0; node < instance.numberOfVertices(); node++) {
-                    expr += instance.getComputationalDag().vertex_work_weight(node) *
-                            node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
+                    expr += instance.getComputationalDag().vertex_work_weight(node)
+                            * node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
                 }
 
                 model.AddConstr(max_work_superstep_var[static_cast<int>(step)] >= expr);
@@ -726,15 +675,12 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 Expr expr;
                 for (const auto &node : instance.vertices()) {
                     for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
                         if (processor != p_to) {
-                            expr += instance.getComputationalDag().vertex_comm_weight(node) *
-                                    instance.sendCosts(processor, p_to) *
-                                    comm_processor_to_processor_superstep_node_var[processor][p_to][step]
-                                                                                  [static_cast<int>(node)];
+                            expr += instance.getComputationalDag().vertex_comm_weight(node) * instance.sendCosts(processor, p_to)
+                                    * comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast<int>(node)];
                         }
                     }
                 }
@@ -745,15 +691,13 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 Expr expr;
                 for (const auto &node : instance.vertices()) {
                     for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
                         if (processor != p_from) {
-                            expr += instance.getComputationalDag().vertex_comm_weight(node) *
-                                    instance.sendCosts(p_from, processor) *
-                                    comm_processor_to_processor_superstep_node_var[p_from][processor][step]
-                                                                                  [static_cast<int>(node)];
+                            expr
+                                += instance.getComputationalDag().vertex_comm_weight(node) * instance.sendCosts(p_from, processor)
+                                   * comm_processor_to_processor_superstep_node_var[p_from][processor][step][static_cast<int>(node)];
                         }
                     }
                 }
@@ -782,17 +726,19 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             VarArray max_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_superstep");
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 model.AddConstr(max_superstep_var[static_cast<int>(step)] >= max_work_superstep_var[static_cast<int>(step)]);
-                if (step > 0)
-                    model.AddConstr(max_superstep_var[static_cast<int>(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step - 1)]);
+                if (step > 0) {
+                    model.AddConstr(max_superstep_var[static_cast<int>(step)]
+                                    >= instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step - 1)]);
+                }
                 expr += max_superstep_var[static_cast<int>(step)];
                 expr += instance.synchronisationCosts() * superstep_has_comm[static_cast<int>(step)];
                 expr += instance.synchronisationCosts() * mergeable_superstep_penalty[static_cast<int>(step)];
             }
         } else {
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
-                expr += max_work_superstep_var[static_cast<int>(step)] +
-                        instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step)] +
-                        instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
+                expr += max_work_superstep_var[static_cast<int>(step)]
+                        + instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step)]
+                        + instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
             }
             expr -= instance.synchronisationCosts();
         }
@@ -814,18 +760,14 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         computeScheduleBase(schedule, model);
 
         if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
             constructBspScheduleFromSolution(schedule, true);
             return RETURN_STATUS::OSP_SUCCESS;
 
         } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
             return RETURN_STATUS::ERROR;
 
         } else {
-
             if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
                 constructBspScheduleFromSolution(schedule, true);
                 return RETURN_STATUS::BEST_FOUND;
 
@@ -837,29 +779,36 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
   public:
     CoptFullScheduler(unsigned steps = 5)
-        : allow_recomputation(false), use_memory_constraint(false), use_initial_schedule(false),
-          write_solutions_found(false), initial_schedule(0), max_number_supersteps(steps) {
-
+        : allow_recomputation(false),
+          use_memory_constraint(false),
+          use_initial_schedule(false),
+          write_solutions_found(false),
+          initial_schedule(0),
+          max_number_supersteps(steps) {
         // solution_callback.comm_processor_to_processor_superstep_node_var_ptr =
         //     &comm_processor_to_processor_superstep_node_var;
         // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var;
     }
 
     CoptFullScheduler(const BspScheduleCS<Graph_t> &schedule)
-        : allow_recomputation(false), use_memory_constraint(false), use_initial_schedule(true),
-          write_solutions_found(false), initial_schedule(&schedule),
+        : allow_recomputation(false),
+          use_memory_constraint(false),
+          use_initial_schedule(true),
+          write_solutions_found(false),
+          initial_schedule(&schedule),
           max_number_supersteps(schedule.numberOfSupersteps()) {
-
         // solution_callback.comm_processor_to_processor_superstep_node_var_ptr =
         //     &comm_processor_to_processor_superstep_node_var;
         // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var;
     }
 
     CoptFullScheduler(const BspScheduleRecomp<Graph_t> &schedule)
-        : allow_recomputation(true), use_memory_constraint(false), use_initial_schedule_recomp(true),
-          write_solutions_found(false), initial_schedule_recomp(&schedule),
-          max_number_supersteps(schedule.numberOfSupersteps()) {
-    }
+        : allow_recomputation(true),
+          use_memory_constraint(false),
+          use_initial_schedule_recomp(true),
+          write_solutions_found(false),
+          initial_schedule_recomp(&schedule),
+          max_number_supersteps(schedule.numberOfSupersteps()) {}
 
     virtual ~CoptFullScheduler() = default;
 
@@ -874,7 +823,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
      *         agree with those of the initial schedule's instance
      */
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         BspScheduleCS<Graph_t> schedule_cs(schedule.getInstance());
         RETURN_STATUS status = computeScheduleCS(schedule_cs);
         if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) {
@@ -886,13 +834,11 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
     }
 
     virtual RETURN_STATUS computeScheduleWithTimeLimit(BspSchedule<Graph_t> &schedule, unsigned timeLimit) {
-
         timeLimitSeconds = timeLimit;
         return computeSchedule(schedule);
     }
 
     virtual RETURN_STATUS computeMaxBspSchedule(MaxBspSchedule<Graph_t> &schedule) {
-
         MaxBspScheduleCS<Graph_t> schedule_cs(schedule.getInstance());
         RETURN_STATUS status = computeMaxBspScheduleCS(schedule_cs);
         if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) {
@@ -916,7 +862,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
     }
 
     virtual RETURN_STATUS computeScheduleRecomp(BspScheduleRecomp<Graph_t> &schedule) {
-
         allow_recomputation = true;
         is_max_bsp = false;
 
@@ -932,18 +877,14 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         computeScheduleBase(schedule, model);
 
         if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
             constructBspScheduleRecompFromSolution(schedule, true);
             return RETURN_STATUS::OSP_SUCCESS;
 
         } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
             return RETURN_STATUS::ERROR;
 
         } else {
-
             if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
                 constructBspScheduleRecompFromSolution(schedule, true);
                 return RETURN_STATUS::BEST_FOUND;
 
@@ -954,7 +895,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
     };
 
     virtual void computeScheduleBase(const BspScheduleRecomp<Graph_t> &schedule, Model &model) {
-
         if (timeLimitSeconds > 0) {
             model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds);
         }
@@ -971,11 +911,9 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         // model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2);
 
         if (write_solutions_found) {
-
             WriteSolutionCallback solution_callback;
             solution_callback.instance_ptr = &schedule.getInstance();
-            solution_callback.comm_processor_to_processor_superstep_node_var_ptr =
-                &comm_processor_to_processor_superstep_node_var;
+            solution_callback.comm_processor_to_processor_superstep_node_var_ptr = &comm_processor_to_processor_superstep_node_var;
             solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var;
             solution_callback.solution_file_prefix_cb = solution_file_prefix;
             solution_callback.write_solutions_path_cb = write_solutions_path;
@@ -997,7 +935,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
      * @param schedule The provided schedule.
      */
     inline void setInitialSolutionFromBspSchedule(const BspScheduleCS<Graph_t> &schedule) {
-
         initial_schedule = &schedule;
 
         max_number_supersteps = schedule.numberOfSupersteps();
@@ -1022,7 +959,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
      */
     void setMaxNumberOfSupersteps(unsigned max) {
         if (use_initial_schedule && max < initial_schedule->numberOfSupersteps()) {
-
             throw std::invalid_argument("Invalid Argument while setting "
                                         "max number of supersteps to a value "
                                         "which is less than the number of "
@@ -1090,4 +1026,4 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
     virtual std::string getScheduleName() const override { return "FullIlp"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp
index db9a01f3..e0369177 100644
--- a/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp
+++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp
@@ -32,9 +32,8 @@ namespace osp {
  * a BSP schedule, from a starting superstep to and ending superstep.
  */
 
-template<typename Graph_t>
+template <typename Graph_t>
 class CoptPartialScheduler {
-
     static_assert(is_computational_dag_v<Graph_t>, "CoptPartialScheduler can only be used with computational DAGs.");
 
     using KeyTriple = std::tuple<vertex_idx_t<Graph_t>, unsigned int, unsigned int>;
@@ -42,7 +41,6 @@ class CoptPartialScheduler {
     unsigned int timeLimitSeconds = 600;
 
   protected:
-
     unsigned start_superstep = 1, end_superstep = 3;
 
     std::vector<vertex_idx_t<Graph_t>> node_global_ID;
@@ -51,9 +49,9 @@ class CoptPartialScheduler {
     std::vector<vertex_idx_t<Graph_t>> source_global_ID;
     std::unordered_map<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t>> source_local_ID;
 
-    std::vector<std::pair<unsigned, unsigned> > node_needed_after_on_proc, source_needed_after_on_proc;
-    std::vector<std::tuple<vertex_idx_t<Graph_t>, unsigned, unsigned, unsigned> > fixed_comm_steps;
-    std::set<std::pair<unsigned, unsigned> > source_present_before;
+    std::vector<std::pair<unsigned, unsigned>> node_needed_after_on_proc, source_needed_after_on_proc;
+    std::vector<std::tuple<vertex_idx_t<Graph_t>, unsigned, unsigned, unsigned>> fixed_comm_steps;
+    std::set<std::pair<unsigned, unsigned>> source_present_before;
 
     unsigned max_number_supersteps;
 
@@ -66,30 +64,33 @@ class CoptPartialScheduler {
 
     bool has_fixed_comm_in_preceding_step;
 
-    void setupVariablesConstraintsObjective(const BspScheduleCS<Graph_t>& schedule, Model& model);
+    void setupVariablesConstraintsObjective(const BspScheduleCS<Graph_t> &schedule, Model &model);
 
-    void setInitialSolution(const BspScheduleCS<Graph_t>& schedule, Model &model);
+    void setInitialSolution(const BspScheduleCS<Graph_t> &schedule, Model &model);
 
-    void updateSchedule(BspScheduleCS<Graph_t>& schedule) const;
+    void updateSchedule(BspScheduleCS<Graph_t> &schedule) const;
 
-    void setupVertexMaps(const BspScheduleCS<Graph_t>& schedule);
+    void setupVertexMaps(const BspScheduleCS<Graph_t> &schedule);
 
   public:
-
     virtual RETURN_STATUS improveSchedule(BspScheduleCS<Graph_t> &schedule);
 
     virtual std::string getScheduleName() const { return "ILPPartial"; }
 
     virtual void setTimeLimitSeconds(unsigned int limit) { timeLimitSeconds = limit; }
+
     inline unsigned int getTimeLimitSeconds() const { return timeLimitSeconds; }
-    virtual void setStartAndEndSuperstep(unsigned start_, unsigned end_) { start_superstep = start_; end_superstep = end_; }
+
+    virtual void setStartAndEndSuperstep(unsigned start_, unsigned end_) {
+        start_superstep = start_;
+        end_superstep = end_;
+    }
 
     virtual ~CoptPartialScheduler() = default;
 };
 
-template<typename Graph_t>
-RETURN_STATUS CoptPartialScheduler<Graph_t>::improveSchedule(BspScheduleCS<Graph_t>& schedule) {
-
+template <typename Graph_t>
+RETURN_STATUS CoptPartialScheduler<Graph_t>::improveSchedule(BspScheduleCS<Graph_t> &schedule) {
     Envr env;
     Model model = env.CreateModel("bsp_schedule_partial");
 
@@ -104,86 +105,92 @@ RETURN_STATUS CoptPartialScheduler<Graph_t>::improveSchedule(BspScheduleCS<Graph
 
     model.Solve();
 
-    if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL))
+    if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
         updateSchedule(schedule);
+    }
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
         return RETURN_STATUS::OSP_SUCCESS;
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
         return RETURN_STATUS::ERROR;
     } else {
-        if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL))
+        if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
             return RETURN_STATUS::BEST_FOUND;
-        else
+        } else {
             return RETURN_STATUS::TIMEOUT;
+        }
     }
 }
 
-template<typename Graph_t>
-void CoptPartialScheduler<Graph_t>::setInitialSolution(const BspScheduleCS<Graph_t>& schedule, Model &model){
-
-    const Graph_t& DAG = schedule.getInstance().getComputationalDag();
-    const unsigned& num_processors = schedule.getInstance().numberOfProcessors();
+template <typename Graph_t>
+void CoptPartialScheduler<Graph_t>::setInitialSolution(const BspScheduleCS<Graph_t> &schedule, Model &model) {
+    const Graph_t &DAG = schedule.getInstance().getComputationalDag();
+    const unsigned &num_processors = schedule.getInstance().numberOfProcessors();
     const auto &cs = schedule.getCommunicationSchedule();
 
-    for (const vertex_idx_t<Graph_t> &node : DAG.vertices())
-    {
-        if(node_local_ID.find(node) == node_local_ID.end())
+    for (const vertex_idx_t<Graph_t> &node : DAG.vertices()) {
+        if (node_local_ID.find(node) == node_local_ID.end()) {
             continue;
-        for (unsigned proc = 0; proc < num_processors; proc++)
-            for(unsigned step = 0; step < max_number_supersteps; ++step)
-            {
-                if (schedule.assignedProcessor(node) == proc && schedule.assignedSuperstep(node) == start_superstep + step)
+        }
+        for (unsigned proc = 0; proc < num_processors; proc++) {
+            for (unsigned step = 0; step < max_number_supersteps; ++step) {
+                if (schedule.assignedProcessor(node) == proc && schedule.assignedSuperstep(node) == start_superstep + step) {
                     model.SetMipStart(node_to_processor_superstep_var[node_local_ID[node]][proc][static_cast<int>(step)], 1);
-                else
+                } else {
                     model.SetMipStart(node_to_processor_superstep_var[node_local_ID[node]][proc][static_cast<int>(step)], 0);
+                }
             }
+        }
     }
 
-    for (unsigned index = 0; index < fixed_comm_steps.size(); ++index)
+    for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) {
         model.SetMipStart(keep_fixed_comm_step[static_cast<int>(index)], 1);
+    }
 
     for (const auto &node : DAG.vertices()) {
-
-        if(node_local_ID.find(node) == node_local_ID.end())
+        if (node_local_ID.find(node) == node_local_ID.end()) {
             continue;
+        }
 
         for (unsigned p1 = 0; p1 < num_processors; p1++) {
-
             for (unsigned p2 = 0; p2 < num_processors; p2++) {
-
-                if(p1 == p2)
+                if (p1 == p2) {
                     continue;
+                }
 
                 for (unsigned step = 0; step < max_number_supersteps && step <= end_superstep - start_superstep; step++) {
-
                     const auto &key = std::make_tuple(node, p1, p2);
-                    if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step)
-                        model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node_local_ID[node])], 1);
-                    else
-                        model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node_local_ID[node])], 0);
+                    if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step) {
+                        model.SetMipStart(
+                            comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node_local_ID[node])], 1);
+                    } else {
+                        model.SetMipStart(
+                            comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast<int>(node_local_ID[node])], 0);
+                    }
                 }
             }
         }
     }
 
     for (const auto &source : DAG.vertices()) {
-
-        if(source_local_ID.find(source) == source_local_ID.end())
+        if (source_local_ID.find(source) == source_local_ID.end()) {
             continue;
+        }
 
-        for (unsigned proc = 0; proc < num_processors; proc++)
-        {
-            if(proc == schedule.assignedProcessor(source))
+        for (unsigned proc = 0; proc < num_processors; proc++) {
+            if (proc == schedule.assignedProcessor(source)) {
                 continue;
+            }
 
             for (unsigned step = 0; step < max_number_supersteps + 1 && step <= end_superstep - start_superstep + 1; step++) {
-
                 const auto &key = std::make_tuple(source, schedule.assignedProcessor(source), proc);
-                if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step - 1)
-                    model.SetMipStart(comm_to_processor_superstep_source_var[proc][step][static_cast<int>(source_local_ID[source])], 1);
-                else if(step > 0)
-                    model.SetMipStart(comm_to_processor_superstep_source_var[proc][step][static_cast<int>(source_local_ID[source])], 0);
+                if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step - 1) {
+                    model.SetMipStart(
+                        comm_to_processor_superstep_source_var[proc][step][static_cast<int>(source_local_ID[source])], 1);
+                } else if (step > 0) {
+                    model.SetMipStart(
+                        comm_to_processor_superstep_source_var[proc][step][static_cast<int>(source_local_ID[source])], 0);
+                }
             }
         }
     }
@@ -192,33 +199,33 @@ void CoptPartialScheduler<Graph_t>::setInitialSolution(const BspScheduleCS<Graph
     model.SetIntParam(COPT_INTPARAM_MIPSTARTMODE, 2);
 }
 
-template<typename Graph_t>
-void CoptPartialScheduler<Graph_t>::updateSchedule(BspScheduleCS<Graph_t>& schedule) const {
-
+template <typename Graph_t>
+void CoptPartialScheduler<Graph_t>::updateSchedule(BspScheduleCS<Graph_t> &schedule) const {
     unsigned number_of_supersteps = 0;
 
-    while (number_of_supersteps < max_number_supersteps &&
-           superstep_used_var[static_cast<int>(number_of_supersteps)].Get(COPT_DBLINFO_VALUE) >= .99) {
+    while (number_of_supersteps < max_number_supersteps
+           && superstep_used_var[static_cast<int>(number_of_supersteps)].Get(COPT_DBLINFO_VALUE) >= .99) {
         number_of_supersteps++;
     }
 
     const int offset = static_cast<int>(number_of_supersteps) - static_cast<int>(end_superstep - start_superstep + 1);
 
-    for (vertex_idx_t<Graph_t> node = 0; node < schedule.getInstance().numberOfVertices(); node++)
-        if(schedule.assignedSuperstep(node) > end_superstep)
+    for (vertex_idx_t<Graph_t> node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
+        if (schedule.assignedSuperstep(node) > end_superstep) {
             schedule.setAssignedSuperstep(node, static_cast<unsigned>(static_cast<int>(schedule.assignedSuperstep(node)) + offset));
+        }
+    }
 
     for (vertex_idx_t<Graph_t> node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
-
-        if(node_local_ID.find(node) == node_local_ID.end())
+        if (node_local_ID.find(node) == node_local_ID.end()) {
             continue;
+        }
 
         for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) {
-
             for (unsigned step = 0; step < max_number_supersteps; step++) {
-
-                if (node_to_processor_superstep_var[node_local_ID.at(node)][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99)
-                {
+                if (node_to_processor_superstep_var[node_local_ID.at(node)][processor][static_cast<int>(step)].Get(
+                        COPT_DBLINFO_VALUE)
+                    >= .99) {
                     schedule.setAssignedSuperstep(node, start_superstep + step);
                     schedule.setAssignedProcessor(node, processor);
                 }
@@ -226,36 +233,38 @@ void CoptPartialScheduler<Graph_t>::updateSchedule(BspScheduleCS<Graph_t>& sched
         }
     }
 
-    std::map<KeyTriple, unsigned int>& commSchedule = schedule.getCommunicationSchedule();
+    std::map<KeyTriple, unsigned int> &commSchedule = schedule.getCommunicationSchedule();
 
     std::vector<KeyTriple> toErase;
-    for (const auto &[key, val] : schedule.getCommunicationSchedule())
-    {
-        if (val > end_superstep)
+    for (const auto &[key, val] : schedule.getCommunicationSchedule()) {
+        if (val > end_superstep) {
             commSchedule[key] = static_cast<unsigned>(static_cast<int>(val) + offset);
-        else if (static_cast<int>(val) >= static_cast<int>(start_superstep) - 1)
+        } else if (static_cast<int>(val) >= static_cast<int>(start_superstep) - 1) {
             toErase.push_back(key);
+        }
     }
-    for(const KeyTriple& key : toErase)
+    for (const KeyTriple &key : toErase) {
         commSchedule.erase(key);
+    }
 
-    for (unsigned index = 0; index < fixed_comm_steps.size(); ++index)
-    {
-        const auto& entry = fixed_comm_steps[index];
-        if (keep_fixed_comm_step[static_cast<int>(index)].Get(COPT_DBLINFO_VALUE) >= .99 &&
-            std::get<3>(entry) < start_superstep + number_of_supersteps)
+    for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) {
+        const auto &entry = fixed_comm_steps[index];
+        if (keep_fixed_comm_step[static_cast<int>(index)].Get(COPT_DBLINFO_VALUE) >= .99
+            && std::get<3>(entry) < start_superstep + number_of_supersteps) {
             commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = std::get<3>(entry);
-        else
-            commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = start_superstep-1;
+        } else {
+            commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = start_superstep - 1;
+        }
     }
 
     for (vertex_idx_t<Graph_t> node = 0; node < node_global_ID.size(); node++) {
-
         for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) {
             for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) {
                 if (p_from != p_to) {
                     for (unsigned int step = 0; step < max_number_supersteps; step++) {
-                        if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)].Get(COPT_DBLINFO_VALUE) >= .99) {
+                        if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)].Get(
+                                COPT_DBLINFO_VALUE)
+                            >= .99) {
                             commSchedule[std::make_tuple(node_global_ID[node], p_from, p_to)] = start_superstep + step;
                             break;
                         }
@@ -266,13 +275,14 @@ void CoptPartialScheduler<Graph_t>::updateSchedule(BspScheduleCS<Graph_t>& sched
     }
 
     for (vertex_idx_t<Graph_t> source = 0; source < source_global_ID.size(); source++) {
-
         for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) {
             if (source_present_before.find(std::make_pair(source, p_to)) == source_present_before.end()) {
                 for (unsigned int step = 0; step < max_number_supersteps + 1; step++) {
-                    if (comm_to_processor_superstep_source_var[p_to][step][static_cast<int>(source)].Get(COPT_DBLINFO_VALUE) >= .99) {
-                        commSchedule[std::make_tuple(source_global_ID[source], schedule.assignedProcessor(source_global_ID[source]), p_to)] =
-                            start_superstep - 1 + step;
+                    if (comm_to_processor_superstep_source_var[p_to][step][static_cast<int>(source)].Get(COPT_DBLINFO_VALUE)
+                        >= .99) {
+                        commSchedule[std::make_tuple(
+                            source_global_ID[source], schedule.assignedProcessor(source_global_ID[source]), p_to)]
+                            = start_superstep - 1 + step;
                         break;
                     }
                 }
@@ -282,13 +292,10 @@ void CoptPartialScheduler<Graph_t>::updateSchedule(BspScheduleCS<Graph_t>& sched
 
     schedule.cleanCommSchedule();
     schedule.shrinkByMergingSupersteps();
-
 };
 
-
-template<typename Graph_t>
-void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const BspScheduleCS<Graph_t>& schedule, Model& model) {
-
+template <typename Graph_t>
+void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const BspScheduleCS<Graph_t> &schedule, Model &model) {
     const vertex_idx_t<Graph_t> num_vertices = static_cast<vertex_idx_t<Graph_t>>(node_global_ID.size());
     const vertex_idx_t<Graph_t> num_sources = static_cast<vertex_idx_t<Graph_t>>(source_global_ID.size());
     const unsigned num_processors = schedule.getInstance().numberOfProcessors();
@@ -298,53 +305,53 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
     */
     // variables indicating if superstep is used at all
     superstep_used_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_used");
-    VarArray superstep_has_comm = model.AddVars(static_cast<int>(max_number_supersteps+1), COPT_BINARY, "superstep_has_comm");
+    VarArray superstep_has_comm = model.AddVars(static_cast<int>(max_number_supersteps + 1), COPT_BINARY, "superstep_has_comm");
     VarArray has_comm_at_end = model.AddVars(1, COPT_BINARY, "has_comm_at_end");
 
     // variables for assigments of nodes to processor and superstep
     node_to_processor_superstep_var = std::vector<std::vector<VarArray>>(num_vertices, std::vector<VarArray>(num_processors));
 
     for (unsigned int node = 0; node < num_vertices; node++) {
-
         for (unsigned int processor = 0; processor < num_processors; processor++) {
-
-            node_to_processor_superstep_var[node][processor] =
-                model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep");
+            node_to_processor_superstep_var[node][processor]
+                = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep");
         }
     }
 
     // communicate node from p1 to p2 at superstep
 
-    comm_processor_to_processor_superstep_node_var = std::vector<std::vector<std::vector<VarArray>>>(num_processors,
-        std::vector<std::vector<VarArray>>(num_processors, std::vector<VarArray>(max_number_supersteps)));
+    comm_processor_to_processor_superstep_node_var = std::vector<std::vector<std::vector<VarArray>>>(
+        num_processors, std::vector<std::vector<VarArray>>(num_processors, std::vector<VarArray>(max_number_supersteps)));
 
     for (unsigned int p1 = 0; p1 < num_processors; p1++) {
         for (unsigned int p2 = 0; p2 < num_processors; p2++) {
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
-
-                comm_processor_to_processor_superstep_node_var[p1][p2][step] =
-                    model.AddVars(static_cast<int>(num_vertices), COPT_BINARY, "comm_processor_to_processor_superstep_node");
+                comm_processor_to_processor_superstep_node_var[p1][p2][step]
+                    = model.AddVars(static_cast<int>(num_vertices), COPT_BINARY, "comm_processor_to_processor_superstep_node");
             }
         }
     }
 
     // communicate nodes in supersteps smaller than start_superstep
-    comm_to_processor_superstep_source_var = std::vector<std::vector<VarArray>>(num_processors, std::vector<VarArray>(max_number_supersteps + 1));
-    std::vector<std::vector<VarArray>> present_on_processor_superstep_source_var = std::vector<std::vector<VarArray>>(num_processors, std::vector<VarArray>(max_number_supersteps));
+    comm_to_processor_superstep_source_var
+        = std::vector<std::vector<VarArray>>(num_processors, std::vector<VarArray>(max_number_supersteps + 1));
+    std::vector<std::vector<VarArray>> present_on_processor_superstep_source_var
+        = std::vector<std::vector<VarArray>>(num_processors, std::vector<VarArray>(max_number_supersteps));
 
     for (unsigned int proc = 0; proc < num_processors; proc++) {
         for (unsigned int step = 0; step < max_number_supersteps + 1; step++) {
+            comm_to_processor_superstep_source_var[proc][step]
+                = model.AddVars(static_cast<int>(num_sources), COPT_BINARY, "comm_to_processor_superstep_source");
 
-            comm_to_processor_superstep_source_var[proc][step] =
-                model.AddVars(static_cast<int>(num_sources), COPT_BINARY, "comm_to_processor_superstep_source");
-
-            if(step < max_number_supersteps)
-                present_on_processor_superstep_source_var[proc][step] =
-                    model.AddVars(static_cast<int>(num_sources), COPT_BINARY, "present_on_processor_superstep_source");
+            if (step < max_number_supersteps) {
+                present_on_processor_superstep_source_var[proc][step]
+                    = model.AddVars(static_cast<int>(num_sources), COPT_BINARY, "present_on_processor_superstep_source");
+            }
         }
     }
 
-    VarArray max_comm_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps + 1), COPT_INTEGER, "max_comm_superstep");
+    VarArray max_comm_superstep_var
+        = model.AddVars(static_cast<int>(max_number_supersteps + 1), COPT_INTEGER, "max_comm_superstep");
 
     VarArray max_work_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_work_superstep");
 
@@ -363,63 +370,75 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
 
     // check whether superstep is used at all (work or comm), and whether superstep has any communication at all
     unsigned large_constant_work = static_cast<unsigned>(num_vertices) * num_processors;
-    unsigned large_constant_comm = static_cast<unsigned>(num_vertices+num_sources) * num_processors * num_processors + static_cast<unsigned>(fixed_comm_steps.size());
+    unsigned large_constant_comm = static_cast<unsigned>(num_vertices + num_sources) * num_processors * num_processors
+                                   + static_cast<unsigned>(fixed_comm_steps.size());
     for (unsigned int step = 0; step < max_number_supersteps; step++) {
-
         Expr expr_work, expr_comm;
         for (vertex_idx_t<Graph_t> node = 0; node < num_vertices; node++) {
-
             for (unsigned int processor = 0; processor < num_processors; processor++) {
                 expr_work += node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
 
-                for (unsigned int p_other = 0; p_other < num_processors; p_other++)
-                    if(processor != p_other)
-                        expr_comm += comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast<int>(node)];
+                for (unsigned int p_other = 0; p_other < num_processors; p_other++) {
+                    if (processor != p_other) {
+                        expr_comm
+                            += comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast<int>(node)];
+                    }
+                }
+            }
+        }
+        for (vertex_idx_t<Graph_t> source = 0; source < num_sources; source++) {
+            for (unsigned int processor = 0; processor < num_processors; processor++) {
+                if (source_present_before.find(std::make_pair(source, processor)) == source_present_before.end()) {
+                    expr_comm += comm_to_processor_superstep_source_var[processor][step + 1][static_cast<int>(source)];
+                }
             }
         }
-        for (vertex_idx_t<Graph_t> source = 0; source < num_sources; source++)
-            for (unsigned int processor = 0; processor < num_processors; processor++)
-                if(source_present_before.find(std::make_pair(source, processor)) == source_present_before.end())
-                    expr_comm += comm_to_processor_superstep_source_var[processor][step+1][static_cast<int>(source)];
 
-        for (unsigned index = 0; index < fixed_comm_steps.size(); ++index)
-            if(std::get<3>(fixed_comm_steps[index]) == start_superstep + step)
+        for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) {
+            if (std::get<3>(fixed_comm_steps[index]) == start_superstep + step) {
                 expr_comm += keep_fixed_comm_step[static_cast<int>(index)];
+            }
+        }
 
-        model.AddConstr(expr_comm <= large_constant_comm * superstep_has_comm[static_cast<int>(step+1)]);
+        model.AddConstr(expr_comm <= large_constant_comm * superstep_has_comm[static_cast<int>(step + 1)]);
         model.AddConstr(expr_work <= large_constant_work * superstep_used_var[static_cast<int>(step)]);
-        model.AddConstr(superstep_has_comm[static_cast<int>(step+1)] <= superstep_used_var[static_cast<int>(step)]);
+        model.AddConstr(superstep_has_comm[static_cast<int>(step + 1)] <= superstep_used_var[static_cast<int>(step)]);
     }
 
     // check communication usage in edge case: comm phase before the segment
-    if(has_fixed_comm_in_preceding_step)
+    if (has_fixed_comm_in_preceding_step) {
         model.AddConstr(superstep_has_comm[0] == 1);
-    else {
+    } else {
         Expr expr_comm_0;
-        for (vertex_idx_t<Graph_t> source = 0; source < num_sources; source++)
-            for (unsigned int processor = 0; processor < num_processors; processor++)
-                if(source_present_before.find(std::make_pair(source, processor)) == source_present_before.end())
+        for (vertex_idx_t<Graph_t> source = 0; source < num_sources; source++) {
+            for (unsigned int processor = 0; processor < num_processors; processor++) {
+                if (source_present_before.find(std::make_pair(source, processor)) == source_present_before.end()) {
                     expr_comm_0 += comm_to_processor_superstep_source_var[processor][0][static_cast<int>(source)];
-        for (unsigned index = 0; index < fixed_comm_steps.size(); ++index)
+                }
+            }
+        }
+        for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) {
             expr_comm_0 += 1 - keep_fixed_comm_step[static_cast<int>(index)];
-        model.AddConstr(expr_comm_0 <= (static_cast<unsigned>(num_sources) * num_processors + static_cast<unsigned>(fixed_comm_steps.size())) * superstep_has_comm[0]);
+        }
+        model.AddConstr(expr_comm_0
+                        <= (static_cast<unsigned>(num_sources) * num_processors + static_cast<unsigned>(fixed_comm_steps.size()))
+                               * superstep_has_comm[0]);
     }
 
     // check if there is any communication at the end of the subschedule
-    for (unsigned int step = 0; step < max_number_supersteps - 1; step++)
-    {
-        model.AddConstr(superstep_used_var[static_cast<int>(step)] - superstep_used_var[static_cast<int>(step + 1)] +
-                        superstep_has_comm[static_cast<int>(step+1)] - 1 <= has_comm_at_end[0]);
+    for (unsigned int step = 0; step < max_number_supersteps - 1; step++) {
+        model.AddConstr(superstep_used_var[static_cast<int>(step)] - superstep_used_var[static_cast<int>(step + 1)]
+                            + superstep_has_comm[static_cast<int>(step + 1)] - 1
+                        <= has_comm_at_end[0]);
     }
-    model.AddConstr(superstep_used_var[static_cast<int>(max_number_supersteps - 1)] +
-                        superstep_has_comm[static_cast<int>(max_number_supersteps)] - 1 <= has_comm_at_end[0]);
+    model.AddConstr(superstep_used_var[static_cast<int>(max_number_supersteps - 1)]
+                        + superstep_has_comm[static_cast<int>(max_number_supersteps)] - 1
+                    <= has_comm_at_end[0]);
 
     // nodes are assigend
     for (vertex_idx_t<Graph_t> node = 0; node < num_vertices; node++) {
-
         Expr expr;
         for (unsigned int processor = 0; processor < num_processors; processor++) {
-
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 expr += node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
             }
@@ -432,26 +451,24 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
     for (vertex_idx_t<Graph_t> node = 0; node < num_vertices; node++) {
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
             for (unsigned int processor = 0; processor < num_processors; processor++) {
-
                 Expr expr;
                 unsigned num_terms = 0;
-                for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node_global_ID[node]))
-                {
-                    if(node_local_ID.find(pred) != node_local_ID.end())
-                    {
+                for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node_global_ID[node])) {
+                    if (node_local_ID.find(pred) != node_local_ID.end()) {
                         ++num_terms;
-                        expr += comm_processor_to_processor_superstep_node_var[processor][processor][step][static_cast<int>(node_local_ID[pred])];
-                    }
-                    else if(source_local_ID.find(pred) != source_local_ID.end() &&
-                            source_present_before.find(std::make_pair(source_local_ID[pred], processor)) == source_present_before.end())
-                    {
+                        expr += comm_processor_to_processor_superstep_node_var[processor][processor][step]
+                                                                              [static_cast<int>(node_local_ID[pred])];
+                    } else if (source_local_ID.find(pred) != source_local_ID.end()
+                               && source_present_before.find(std::make_pair(source_local_ID[pred], processor))
+                                      == source_present_before.end()) {
                         ++num_terms;
                         expr += present_on_processor_superstep_source_var[processor][step][static_cast<int>(source_local_ID[pred])];
                     }
                 }
 
-                if(num_terms > 0)
+                if (num_terms > 0) {
                     model.AddConstr(expr >= num_terms * node_to_processor_superstep_var[node][processor][static_cast<int>(step)]);
+                }
             }
         }
     }
@@ -461,12 +478,11 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
     for (unsigned int step = 0; step < max_number_supersteps; step++) {
         for (unsigned int processor = 0; processor < num_processors; processor++) {
             for (vertex_idx_t<Graph_t> node = 0; node < num_vertices; node++) {
-
                 Expr expr1, expr2;
                 if (step > 0) {
-
                     for (unsigned int p_from = 0; p_from < num_processors; p_from++) {
-                        expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast<int>(node)];
+                        expr1
+                            += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast<int>(node)];
                     }
                 }
 
@@ -486,13 +502,14 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
     for (unsigned int step = 0; step < max_number_supersteps; step++) {
         for (unsigned int processor = 0; processor < num_processors; processor++) {
             for (vertex_idx_t<Graph_t> source_node = 0; source_node < num_sources; source_node++) {
-
-                if(source_present_before.find(std::make_pair(source_node, processor)) != source_present_before.end())
+                if (source_present_before.find(std::make_pair(source_node, processor)) != source_present_before.end()) {
                     continue;
+                }
 
                 Expr expr1 = comm_to_processor_superstep_source_var[processor][step][static_cast<int>(source_node)];
-                if (step > 0)
-                    expr1 += present_on_processor_superstep_source_var[processor][step-1][static_cast<int>(source_node)];
+                if (step > 0) {
+                    expr1 += present_on_processor_superstep_source_var[processor][step - 1][static_cast<int>(source_node)];
+                }
 
                 Expr expr2 = present_on_processor_superstep_source_var[processor][step][static_cast<int>(source_node)];
 
@@ -502,30 +519,31 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
     }
 
     // boundary conditions at the end
-    for(const std::pair<vertex_idx_t<Graph_t>, unsigned> node_and_proc : node_needed_after_on_proc)
-    {
+    for (const std::pair<vertex_idx_t<Graph_t>, unsigned> node_and_proc : node_needed_after_on_proc) {
         Expr expr;
-        for (unsigned int p_from = 0; p_from < num_processors; p_from++)
-            expr += comm_processor_to_processor_superstep_node_var[p_from][node_and_proc.second][max_number_supersteps - 1][static_cast<int>(node_and_proc.first)];
+        for (unsigned int p_from = 0; p_from < num_processors; p_from++) {
+            expr += comm_processor_to_processor_superstep_node_var[p_from][node_and_proc.second][max_number_supersteps - 1]
+                                                                  [static_cast<int>(node_and_proc.first)];
+        }
 
         model.AddConstr(expr >= 1);
     }
 
-    for(const std::pair<vertex_idx_t<Graph_t>, unsigned> source_and_proc : source_needed_after_on_proc)
-    {
-        Expr expr = present_on_processor_superstep_source_var[source_and_proc.second][max_number_supersteps - 1][static_cast<int>(source_and_proc.first)];
-        expr += comm_to_processor_superstep_source_var[source_and_proc.second][max_number_supersteps][static_cast<int>(source_and_proc.first)];
+    for (const std::pair<vertex_idx_t<Graph_t>, unsigned> source_and_proc : source_needed_after_on_proc) {
+        Expr expr = present_on_processor_superstep_source_var[source_and_proc.second][max_number_supersteps - 1]
+                                                             [static_cast<int>(source_and_proc.first)];
+        expr += comm_to_processor_superstep_source_var[source_and_proc.second][max_number_supersteps]
+                                                      [static_cast<int>(source_and_proc.first)];
         model.AddConstr(expr >= 1);
     }
 
     // cost calculation - work
     for (unsigned int step = 0; step < max_number_supersteps; step++) {
         for (unsigned int processor = 0; processor < num_processors; processor++) {
-
             Expr expr;
             for (unsigned int node = 0; node < num_vertices; node++) {
-                expr += schedule.getInstance().getComputationalDag().vertex_work_weight(node_global_ID[node]) *
-                        node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
+                expr += schedule.getInstance().getComputationalDag().vertex_work_weight(node_global_ID[node])
+                        * node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
             }
 
             model.AddConstr(max_work_superstep_var[static_cast<int>(step)] >= expr);
@@ -535,51 +553,49 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
     // cost calculation - comm
     for (unsigned int step = 0; step < max_number_supersteps; step++) {
         for (unsigned int processor = 0; processor < num_processors; processor++) {
-
             Expr expr1, expr2;
             for (vertex_idx_t<Graph_t> node = 0; node < num_vertices; node++) {
                 for (unsigned int p_other = 0; p_other < num_processors; p_other++) {
                     if (processor != p_other) {
-                        expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node]) *
-                                schedule.getInstance().sendCosts(processor, p_other) *
-                                comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast<int>(node)];
-                        expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node]) *
-                                schedule.getInstance().sendCosts(p_other, processor) *
-                                comm_processor_to_processor_superstep_node_var[p_other][processor][step][static_cast<int>(node)];
+                        expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node])
+                                 * schedule.getInstance().sendCosts(processor, p_other)
+                                 * comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast<int>(node)];
+                        expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node])
+                                 * schedule.getInstance().sendCosts(p_other, processor)
+                                 * comm_processor_to_processor_superstep_node_var[p_other][processor][step][static_cast<int>(node)];
                     }
                 }
             }
 
             for (vertex_idx_t<Graph_t> source = 0; source < num_sources; source++) {
                 const unsigned origin_proc = schedule.assignedProcessor(source_global_ID[source]);
-                if(origin_proc == processor)
-                {
-                    for (unsigned int p_other = 0; p_other < num_processors; p_other++)
-                    {
-                        expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) *
-                        schedule.getInstance().sendCosts(processor, p_other) *
-                        comm_to_processor_superstep_source_var[p_other][step + 1][static_cast<int>(source)];
+                if (origin_proc == processor) {
+                    for (unsigned int p_other = 0; p_other < num_processors; p_other++) {
+                        expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source])
+                                 * schedule.getInstance().sendCosts(processor, p_other)
+                                 * comm_to_processor_superstep_source_var[p_other][step + 1][static_cast<int>(source)];
                     }
                 }
-                expr2 +=
-                    schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) *
-                    schedule.getInstance().sendCosts(origin_proc, processor) *
-                    comm_to_processor_superstep_source_var[processor][step + 1][static_cast<int>(source)];
+                expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source])
+                         * schedule.getInstance().sendCosts(origin_proc, processor)
+                         * comm_to_processor_superstep_source_var[processor][step + 1][static_cast<int>(source)];
             }
 
-            for (unsigned index = 0; index < fixed_comm_steps.size(); ++index)
-            {
-                const auto& entry = fixed_comm_steps[index];
-                if(std::get<3>(entry) != start_superstep + step)
+            for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) {
+                const auto &entry = fixed_comm_steps[index];
+                if (std::get<3>(entry) != start_superstep + step) {
                     continue;
-                if(std::get<1>(entry) == processor)
-                    expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) *
-                        schedule.getInstance().sendCosts(processor, std::get<2>(entry)) *
-                        keep_fixed_comm_step[static_cast<int>(index)];
-                if(std::get<2>(entry) == processor)
-                    expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) *
-                        schedule.getInstance().sendCosts(std::get<1>(entry), processor) *
-                        keep_fixed_comm_step[static_cast<int>(index)];
+                }
+                if (std::get<1>(entry) == processor) {
+                    expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry))
+                             * schedule.getInstance().sendCosts(processor, std::get<2>(entry))
+                             * keep_fixed_comm_step[static_cast<int>(index)];
+                }
+                if (std::get<2>(entry) == processor) {
+                    expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry))
+                             * schedule.getInstance().sendCosts(std::get<1>(entry), processor)
+                             * keep_fixed_comm_step[static_cast<int>(index)];
+                }
             }
 
             model.AddConstr(max_comm_superstep_var[static_cast<int>(step + 1)] >= expr1);
@@ -589,36 +605,33 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
 
     // cost calculation - first comm phase handled separately
     for (unsigned int processor = 0; processor < num_processors; processor++) {
-
         Expr expr1, expr2;
         for (vertex_idx_t<Graph_t> source = 0; source < num_sources; source++) {
             const unsigned origin_proc = schedule.assignedProcessor(source_global_ID[source]);
-            if(origin_proc == processor)
-            {
-                for (unsigned int p_other = 0; p_other < num_processors; p_other++)
-                {
-                    expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) *
-                    schedule.getInstance().sendCosts(processor, p_other) *
-                    comm_to_processor_superstep_source_var[p_other][0][static_cast<int>(source)];
+            if (origin_proc == processor) {
+                for (unsigned int p_other = 0; p_other < num_processors; p_other++) {
+                    expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source])
+                             * schedule.getInstance().sendCosts(processor, p_other)
+                             * comm_to_processor_superstep_source_var[p_other][0][static_cast<int>(source)];
                 }
             }
-            expr2 +=
-                schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) *
-                schedule.getInstance().sendCosts(origin_proc, processor) *
-                comm_to_processor_superstep_source_var[processor][0][static_cast<int>(source)];
+            expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source])
+                     * schedule.getInstance().sendCosts(origin_proc, processor)
+                     * comm_to_processor_superstep_source_var[processor][0][static_cast<int>(source)];
         }
 
-        for (unsigned index = 0; index < fixed_comm_steps.size(); ++index)
-        {
-            const auto& entry = fixed_comm_steps[index];
-            if(std::get<1>(entry) == processor)
-                expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) *
-                    schedule.getInstance().sendCosts(processor, std::get<2>(entry)) *
-                    (1-keep_fixed_comm_step[static_cast<int>(index)]);
-            if(std::get<2>(entry) == processor)
-                expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) *
-                    schedule.getInstance().sendCosts(std::get<1>(entry), processor) *
-                    (1-keep_fixed_comm_step[static_cast<int>(index)]);
+        for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) {
+            const auto &entry = fixed_comm_steps[index];
+            if (std::get<1>(entry) == processor) {
+                expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry))
+                         * schedule.getInstance().sendCosts(processor, std::get<2>(entry))
+                         * (1 - keep_fixed_comm_step[static_cast<int>(index)]);
+            }
+            if (std::get<2>(entry) == processor) {
+                expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry))
+                         * schedule.getInstance().sendCosts(std::get<1>(entry), processor)
+                         * (1 - keep_fixed_comm_step[static_cast<int>(index)]);
+            }
         }
 
         model.AddConstr(max_comm_superstep_var[0] >= expr1);
@@ -631,8 +644,9 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
     Expr expr;
 
     for (unsigned int step = 0; step < max_number_supersteps; step++) {
-        expr += max_work_superstep_var[static_cast<int>(step)] + schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast<int>(step + 1)] +
-                schedule.getInstance().synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
+        expr += max_work_superstep_var[static_cast<int>(step)]
+                + schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast<int>(step + 1)]
+                + schedule.getInstance().synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
     }
 
     expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[0];
@@ -642,9 +656,8 @@ void CoptPartialScheduler<Graph_t>::setupVariablesConstraintsObjective(const Bsp
     model.SetObjective(expr - schedule.getInstance().synchronisationCosts(), COPT_MINIMIZE);
 };
 
-template<typename Graph_t>
-void CoptPartialScheduler<Graph_t>::setupVertexMaps(const BspScheduleCS<Graph_t>& schedule) {
-
+template <typename Graph_t>
+void CoptPartialScheduler<Graph_t>::setupVertexMaps(const BspScheduleCS<Graph_t> &schedule) {
     node_local_ID.clear();
     node_global_ID.clear();
     source_local_ID.clear();
@@ -655,28 +668,23 @@ void CoptPartialScheduler<Graph_t>::setupVertexMaps(const BspScheduleCS<Graph_t>
     fixed_comm_steps.clear();
     source_present_before.clear();
 
-    std::vector<std::vector<unsigned> > first_at = schedule.getFirstPresence();
+    std::vector<std::vector<unsigned>> first_at = schedule.getFirstPresence();
 
     max_number_supersteps = end_superstep - start_superstep + 3;
 
     for (unsigned node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
-
         if (schedule.assignedSuperstep(node) >= start_superstep && schedule.assignedSuperstep(node) <= end_superstep) {
-
             node_local_ID[node] = static_cast<vertex_idx_t<Graph_t>>(node_global_ID.size());
             node_global_ID.push_back(node);
 
             for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node)) {
-
                 if (schedule.assignedSuperstep(pred) < start_superstep) {
-
                     if (source_local_ID.find(pred) == source_local_ID.end()) {
                         source_local_ID[pred] = static_cast<vertex_idx_t<Graph_t>>(source_global_ID.size());
                         source_global_ID.push_back(pred);
                     }
 
                 } else if (schedule.assignedSuperstep(pred) > end_superstep) {
-
                     throw std::invalid_argument("Initial Schedule might be invalid?!");
                 }
             }
@@ -684,77 +692,83 @@ void CoptPartialScheduler<Graph_t>::setupVertexMaps(const BspScheduleCS<Graph_t>
     }
 
     // find where the sources are already present before the segment
-    for(const auto& source_and_ID : source_local_ID)
-    {
+    for (const auto &source_and_ID : source_local_ID) {
         vertex_idx_t<Graph_t> source = source_and_ID.first;
-        for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
-            if(first_at[source][proc] < start_superstep)
+        for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) {
+            if (first_at[source][proc] < start_superstep) {
                 source_present_before.emplace(std::make_pair(source_and_ID.second, proc));
+            }
+        }
     }
 
     // collect values that are needed by the end of the segment
-    for(const auto& source_and_ID : source_local_ID)
-    {
+    for (const auto &source_and_ID : source_local_ID) {
         vertex_idx_t<Graph_t> source = source_and_ID.first;
 
         std::set<unsigned> procs_needing_this;
-        for (const auto &succ : schedule.getInstance().getComputationalDag().children(source))
-            if(schedule.assignedProcessor(succ) != schedule.assignedProcessor(source) &&
-                schedule.assignedSuperstep(succ) > end_superstep)
+        for (const auto &succ : schedule.getInstance().getComputationalDag().children(source)) {
+            if (schedule.assignedProcessor(succ) != schedule.assignedProcessor(source)
+                && schedule.assignedSuperstep(succ) > end_superstep) {
                 procs_needing_this.insert(schedule.assignedProcessor(succ));
+            }
+        }
 
-        for(unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1)
-            for(unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2)
-            {
-                if(proc1 == proc2)
+        for (unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1) {
+            for (unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2) {
+                if (proc1 == proc2) {
                     continue;
+                }
                 auto itr = schedule.getCommunicationSchedule().find(std::make_tuple(source, proc1, proc2));
-                if (itr != schedule.getCommunicationSchedule().end() && itr->second > end_superstep)
+                if (itr != schedule.getCommunicationSchedule().end() && itr->second > end_superstep) {
                     procs_needing_this.insert(schedule.assignedProcessor(proc1));
+                }
             }
+        }
 
-        for(unsigned proc : procs_needing_this)
-            if(first_at[source][proc] >= start_superstep && first_at[source][proc] <= end_superstep + 1)
+        for (unsigned proc : procs_needing_this) {
+            if (first_at[source][proc] >= start_superstep && first_at[source][proc] <= end_superstep + 1) {
                 source_needed_after_on_proc.emplace_back(source_and_ID.second, proc);
+            }
+        }
     }
-    for(const auto& node_and_ID : node_local_ID)
-    {
+    for (const auto &node_and_ID : node_local_ID) {
         vertex_idx_t<Graph_t> node = node_and_ID.first;
 
         std::set<unsigned> procs_needing_this;
-        for (const auto &succ : schedule.getInstance().getComputationalDag().children(node))
-            if(schedule.assignedSuperstep(succ) > end_superstep)
+        for (const auto &succ : schedule.getInstance().getComputationalDag().children(node)) {
+            if (schedule.assignedSuperstep(succ) > end_superstep) {
                 procs_needing_this.insert(schedule.assignedProcessor(succ));
+            }
+        }
 
-        for(unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1)
-            for(unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2)
-            {
+        for (unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1) {
+            for (unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2) {
                 auto itr = schedule.getCommunicationSchedule().find(std::make_tuple(node, proc1, proc2));
-                if (itr != schedule.getCommunicationSchedule().end() && proc1 != proc2 && itr->second > end_superstep)
+                if (itr != schedule.getCommunicationSchedule().end() && proc1 != proc2 && itr->second > end_superstep) {
                     procs_needing_this.insert(schedule.assignedProcessor(proc1));
+                }
             }
+        }
 
-        for(unsigned proc : procs_needing_this)
-            if(first_at[node][proc] <= end_superstep + 1)
+        for (unsigned proc : procs_needing_this) {
+            if (first_at[node][proc] <= end_superstep + 1) {
                 node_needed_after_on_proc.emplace_back(node_and_ID.second, proc);
+            }
+        }
     }
 
-
     // comm steps that just happen to be in this interval, but not connected to the nodes within
     has_fixed_comm_in_preceding_step = false;
-    for (const auto &[key, val] : schedule.getCommunicationSchedule())
-    {
+    for (const auto &[key, val] : schedule.getCommunicationSchedule()) {
         vertex_idx_t<Graph_t> source = std::get<0>(key);
-        if(source_local_ID.find(source) == source_local_ID.end() &&
-            schedule.assignedSuperstep(source) < start_superstep &&
-            val >= start_superstep - 1 && val <= end_superstep)
-            {
-                fixed_comm_steps.emplace_back(std::get<0>(key), std::get<1>(key), std::get<2>(key), val);
-                if(val == start_superstep - 1)
-                    has_fixed_comm_in_preceding_step = true;
+        if (source_local_ID.find(source) == source_local_ID.end() && schedule.assignedSuperstep(source) < start_superstep
+            && val >= start_superstep - 1 && val <= end_superstep) {
+            fixed_comm_steps.emplace_back(std::get<0>(key), std::get<1>(key), std::get<2>(key), val);
+            if (val == start_superstep - 1) {
+                has_fixed_comm_in_preceding_step = true;
             }
+        }
     }
-
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
index c051c8dc..150c6f73 100644
--- a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
+++ b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
@@ -28,9 +28,8 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class TotalCommunicationScheduler : public Scheduler<Graph_t> {
-
   private:
     Envr env;
     Model model;
@@ -45,7 +44,6 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
     bool use_lk_heuristic_callback;
 
     class WriteSolutionCallback : public CallbackBase {
-
       private:
         unsigned counter;
         unsigned max_number_solution;
@@ -54,8 +52,13 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
 
       public:
         WriteSolutionCallback()
-            : counter(0), max_number_solution(100), best_obj(COPT_INFINITY), write_solutions_path_cb(""),
-              solution_file_prefix_cb(""), instance_ptr(0), node_to_processor_superstep_var_ptr() {}
+            : counter(0),
+              max_number_solution(100),
+              best_obj(COPT_INFINITY),
+              write_solutions_path_cb(""),
+              solution_file_prefix_cb(""),
+              instance_ptr(0),
+              node_to_processor_superstep_var_ptr() {}
 
         std::string write_solutions_path_cb;
         std::string solution_file_prefix_cb;
@@ -64,44 +67,32 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         std::vector<std::vector<VarArray>> *node_to_processor_superstep_var_ptr;
 
         void callback() override {
-
-            if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution &&
-                GetIntInfo(COPT_CBINFO_HASINCUMBENT)) {
-
+            if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) {
                 try {
-
                     if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) {
-
                         best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ);
 
                         auto sched = constructBspScheduleFromCallback();
                         DotFileWriter sched_writer;
-                        sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb +
-                                                        "_" + std::to_string(counter) + "_schedule.dot",
+                        sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_"
+                                                        + std::to_string(counter) + "_schedule.dot",
                                                     sched);
                         counter++;
                     }
 
-                } catch (const std::exception &e) {
-                }
+                } catch (const std::exception &e) {}
             }
         }
 
         BspSchedule<Graph_t> constructBspScheduleFromCallback() {
-
             BspSchedule<Graph_t> schedule(*instance_ptr);
 
             for (const auto &node : instance_ptr->vertices()) {
-
                 for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) {
-
-                    for (unsigned step = 0;
-                         step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) {
-
+                    for (unsigned step = 0; step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size());
+                         step++) {
                         assert(size < std::numeric_limits<int>::max());
-                        if (GetSolution(
-                                (*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)]) >=
-                            .99) {
+                        if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)]) >= .99) {
                             schedule.setAssignedProcessor(node, processor);
                             schedule.setAssignedSuperstep(node, step);
                         }
@@ -114,7 +105,6 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
     };
 
     class LKHeuristicCallback : public CallbackBase {
-
       private:
         kl_total_comm<Graph_t> lk_heuristic;
 
@@ -122,8 +112,14 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
 
       public:
         LKHeuristicCallback()
-            : lk_heuristic(), best_obj(COPT_INFINITY), num_step(0), instance_ptr(0), max_work_superstep_var_ptr(0),
-              superstep_used_var_ptr(0), node_to_processor_superstep_var_ptr(0), edge_vars_ptr(0) {}
+            : lk_heuristic(),
+              best_obj(COPT_INFINITY),
+              num_step(0),
+              instance_ptr(0),
+              max_work_superstep_var_ptr(0),
+              superstep_used_var_ptr(0),
+              node_to_processor_superstep_var_ptr(0),
+              edge_vars_ptr(0) {}
 
         unsigned num_step;
         const BspInstance<Graph_t> *instance_ptr;
@@ -134,16 +130,12 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         std::vector<std::vector<VarArray>> *edge_vars_ptr;
 
         void callback() override {
-
             if (Where() == COPT_CBCONTEXT_MIPSOL && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) {
-
                 try {
-
                     if (0.0 < GetDblInfo(COPT_CBINFO_BESTBND) && 1.0 < GetDblInfo(COPT_CBINFO_BESTOBJ) &&
                         // GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj &&
-                        0.1 < (GetDblInfo(COPT_CBINFO_BESTOBJ) - GetDblInfo(COPT_CBINFO_BESTBND)) /
-                                  GetDblInfo(COPT_CBINFO_BESTOBJ)) {
-
+                        0.1 < (GetDblInfo(COPT_CBINFO_BESTOBJ) - GetDblInfo(COPT_CBINFO_BESTBND))
+                                  / GetDblInfo(COPT_CBINFO_BESTOBJ)) {
                         // best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ);
 
                         auto sched = constructBspScheduleFromCallback();
@@ -157,25 +149,19 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
                         }
                     }
 
-                } catch (const std::exception &e) {
-                }
+                } catch (const std::exception &e) {}
             }
         }
 
         BspSchedule<Graph_t> constructBspScheduleFromCallback() {
-
             BspSchedule schedule(*instance_ptr);
 
             for (const auto &node : instance_ptr->vertices()) {
-
                 for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) {
-
-                    for (unsigned step = 0;
-                         step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) {
+                    for (unsigned step = 0; step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size());
+                         step++) {
                         assert(step <= std::numeric_limits<int>::max());
-                        if (GetSolution(
-                                (*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)]) >=
-                            .99) {
+                        if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)]) >= .99) {
                             schedule.setAssignedProcessor(node, processor);
                             schedule.setAssignedSuperstep(node, step);
                         }
@@ -187,9 +173,7 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         };
 
         void feedImprovedSchedule(const BspSchedule<Graph_t> &schedule) {
-
             for (unsigned step = 0; step < num_step; step++) {
-
                 if (step < schedule.numberOfSupersteps()) {
                     assert(step <= std::numeric_limits<int>::max());
                     SetSolution((*superstep_used_var_ptr)[static_cast<int>(step)], 1.0);
@@ -200,20 +184,15 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
             }
 
             for (const auto &node : instance_ptr->vertices()) {
-
                 for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) {
-
-                    for (unsigned step = 0;
-                         step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) {
-
+                    for (unsigned step = 0; step < static_cast<unsigned>((*node_to_processor_superstep_var_ptr)[0][0].Size());
+                         step++) {
                         if (schedule.assignedProcessor(node) == processor && schedule.assignedSuperstep(node) == step) {
                             assert(step <= std::numeric_limits<int>::max());
-                            SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)],
-                                        1.0);
+                            SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)], 1.0);
                         } else {
                             assert(step <= std::numeric_limits<int>::max());
-                            SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)],
-                                        0.0);
+                            SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast<int>(step)], 0.0);
                         }
                     }
                 }
@@ -223,12 +202,11 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
                 num_step, std::vector<v_workw_t<Graph_t>>(instance_ptr->numberOfProcessors(), 0));
 
             for (const auto &node : instance_ptr->vertices()) {
-                work[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] +=
-                    instance_ptr->getComputationalDag().vertex_work_weight(node);
+                work[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)]
+                    += instance_ptr->getComputationalDag().vertex_work_weight(node);
             }
 
             for (unsigned step = 0; step < num_step; step++) {
-
                 v_workw_t<Graph_t> max_work = 0;
                 for (unsigned proc = 0; proc < instance_ptr->numberOfProcessors(); proc++) {
                     if (max_work < work[step][proc]) {
@@ -241,17 +219,12 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
             }
 
             if (instance_ptr->getArchitecture().isNumaArchitecture()) {
-
                 for (unsigned p1 = 0; p1 < instance_ptr->numberOfProcessors(); p1++) {
                     for (unsigned p2 = 0; p2 < instance_ptr->numberOfProcessors(); p2++) {
                         if (p1 != p2) {
-
                             int edge_id = 0;
                             for (const auto &ep : edge_view(instance_ptr->getComputationalDag())) {
-
-                                if (schedule.assignedProcessor(ep.source) == p1 &&
-                                    schedule.assignedProcessor(ep.target) == p2) {
-
+                                if (schedule.assignedProcessor(ep.source) == p1 && schedule.assignedProcessor(ep.target) == p2) {
                                     SetSolution((*edge_vars_ptr)[p1][p2][edge_id], 1.0);
                                 } else {
                                     SetSolution((*edge_vars_ptr)[p1][p2][edge_id], 0.0);
@@ -264,12 +237,9 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
                 }
 
             } else {
-
                 int edge_id = 0;
                 for (const auto &ep : edge_view(instance_ptr->getComputationalDag())) {
-
                     if (schedule.assignedProcessor(ep.source) != schedule.assignedProcessor(ep.target)) {
-
                         SetSolution((*edge_vars_ptr)[0][0][edge_id], 1.0);
                     } else {
                         SetSolution((*edge_vars_ptr)[0][0][edge_id], 0.0);
@@ -297,15 +267,11 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
     VarArray max_work_superstep_var;
 
     void constructBspScheduleFromSolution(BspSchedule<Graph_t> &schedule, bool cleanup_ = false) {
-
         const auto &instance = schedule.getInstance();
 
         for (const auto &node : instance.vertices()) {
-
             for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
-
                     if (node_to_processor_superstep_var[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) {
                         schedule.setAssignedProcessor(node, processor);
                         schedule.setAssignedSuperstep(node, step);
@@ -320,9 +286,7 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
     }
 
     void loadInitialSchedule() {
-
         for (unsigned step = 0; step < max_number_supersteps; step++) {
-
             if (step < initial_schedule->numberOfSupersteps()) {
                 assert(step <= std::numeric_limits<int>::max());
                 model.SetMipStart(superstep_used_var[static_cast<int>(step)], 1);
@@ -334,19 +298,13 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         }
 
         for (const auto &node : initial_schedule->getInstance().vertices()) {
-
             for (unsigned proc = 0; proc < initial_schedule->getInstance().numberOfProcessors(); proc++) {
-
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
-
-                    if (proc == initial_schedule->assignedProcessor(node) &&
-                        step == initial_schedule->assignedSuperstep(node)) {
-
+                    if (proc == initial_schedule->assignedProcessor(node) && step == initial_schedule->assignedSuperstep(node)) {
                         assert(step <= std::numeric_limits<int>::max());
                         model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast<int>(step)], 1);
 
                     } else {
-
                         assert(step <= std::numeric_limits<int>::max());
                         model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast<int>(step)], 0);
                     }
@@ -355,12 +313,11 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         }
 
         std::vector<std::vector<v_workw_t<Graph_t>>> work(
-            max_number_supersteps,
-            std::vector<v_workw_t<Graph_t>>(initial_schedule->getInstance().numberOfProcessors(), 0));
+            max_number_supersteps, std::vector<v_workw_t<Graph_t>>(initial_schedule->getInstance().numberOfProcessors(), 0));
 
         for (const auto &node : initial_schedule->getInstance().vertices()) {
-            work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] +=
-                initial_schedule->getInstance().getComputationalDag().vertex_work_weight(node);
+            work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)]
+                += initial_schedule->getInstance().getComputationalDag().vertex_work_weight(node);
         }
 
         for (unsigned step = 0; step < max_number_supersteps; step++) {
@@ -380,7 +337,6 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
     }
 
     void setupVariablesConstraintsObjective(const BspInstance<Graph_t> &instance) {
-
         /*
         Variables
         */
@@ -393,11 +349,9 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         assert(max_number_supersteps <= std::numeric_limits<int>::max());
         // variables for assigments of nodes to processor and superstep
         for (const auto &node : instance.vertices()) {
-
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
-                node_to_processor_superstep_var[node][processor] =
-                    model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep");
+                node_to_processor_superstep_var[node][processor]
+                    = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep");
             }
         }
 
@@ -409,14 +363,12 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         Constraints
           */
         if (use_memory_constraint) {
-
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
                     Expr expr;
                     for (unsigned int node = 0; node < instance.numberOfVertices(); node++) {
-                        expr += node_to_processor_superstep_var[node][processor][static_cast<int>(step)] *
-                                instance.getComputationalDag().vertex_mem_weight(node);
+                        expr += node_to_processor_superstep_var[node][processor][static_cast<int>(step)]
+                                * instance.getComputationalDag().vertex_mem_weight(node);
                     }
                     model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor));
                 }
@@ -427,30 +379,25 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         model.AddConstr(superstep_used_var[0] == 1);
 
         for (unsigned int step = 0; step < max_number_supersteps - 1; step++) {
-            model.AddConstr(superstep_used_var[static_cast<int>(step)] >=
-                            superstep_used_var[static_cast<int>(step + 1)]);
+            model.AddConstr(superstep_used_var[static_cast<int>(step)] >= superstep_used_var[static_cast<int>(step + 1)]);
         }
 
         // superstep is used at all
         for (unsigned int step = 0; step < max_number_supersteps; step++) {
-
             Expr expr;
             for (const auto &node : instance.vertices()) {
-
                 for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
                     expr += node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
                 }
             }
-            model.AddConstr(expr <= static_cast<double>(instance.numberOfVertices() * instance.numberOfProcessors()) *
-                                        superstep_used_var.GetVar(static_cast<int>(step)));
+            model.AddConstr(expr <= static_cast<double>(instance.numberOfVertices() * instance.numberOfProcessors())
+                                        * superstep_used_var.GetVar(static_cast<int>(step)));
         }
 
         // nodes are assigend depending on whether recomputation is allowed or not
         for (const auto &node : instance.vertices()) {
-
             Expr expr;
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 assert(max_number_supersteps <= std::numeric_limits<int>::max());
                 for (unsigned int step = 0; step < max_number_supersteps; step++) {
                     expr += node_to_processor_superstep_var[node][processor].GetVar(static_cast<int>(step));
@@ -462,28 +409,21 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         }
 
         for (const auto &node : instance.vertices()) {
-
             for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 assert(max_number_supersteps <= std::numeric_limits<int>::max());
                 for (unsigned step = 0; step < max_number_supersteps; step++) {
-
                     for (const auto &source : instance.getComputationalDag().parents(node)) {
-
                         Expr expr1;
 
                         for (unsigned p2 = 0; p2 < instance.numberOfProcessors(); p2++) {
-
                             for (unsigned step_prime = 0; step_prime < step; step_prime++) {
-
                                 expr1 += node_to_processor_superstep_var[source][p2][static_cast<int>(step_prime)];
                             }
                         }
 
                         expr1 += node_to_processor_superstep_var[source][processor][static_cast<int>(step)];
 
-                        model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast<int>(step)] <=
-                                        expr1);
+                        model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast<int>(step)] <= expr1);
                     }
                 }
             }
@@ -492,21 +432,18 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         Expr total_edges_cut;
 
         if (instance.getArchitecture().isNumaArchitecture()) {
-
             edge_vars = std::vector<std::vector<VarArray>>(instance.numberOfProcessors(),
                                                            std::vector<VarArray>(instance.numberOfProcessors()));
 
             for (unsigned int p1 = 0; p1 < instance.numberOfProcessors(); p1++) {
                 for (unsigned int p2 = 0; p2 < instance.numberOfProcessors(); p2++) {
                     if (p1 != p2) {
-
                         assert(instance.getComputationalDag().num_edges() <= std::numeric_limits<int>::max());
-                        edge_vars[p1][p2] = model.AddVars(static_cast<int>(instance.getComputationalDag().num_edges()),
-                                                          COPT_BINARY, "edge");
+                        edge_vars[p1][p2]
+                            = model.AddVars(static_cast<int>(instance.getComputationalDag().num_edges()), COPT_BINARY, "edge");
 
                         int edge_id = 0;
                         for (const auto &ep : edge_view(instance.getComputationalDag())) {
-
                             Expr expr1, expr2;
                             assert(max_number_supersteps <= std::numeric_limits<int>::max());
                             for (unsigned step = 0; step < max_number_supersteps; step++) {
@@ -515,9 +452,9 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
                             }
                             model.AddConstr(edge_vars[p1][p2][edge_id] >= expr1 + expr2 - 1.001);
 
-                            total_edges_cut += edge_vars[p1][p2][edge_id] *
-                                               instance.getComputationalDag().vertex_comm_weight(ep.source) *
-                                               instance.sendCosts(p1, p2);
+                            total_edges_cut += edge_vars[p1][p2][edge_id]
+                                               * instance.getComputationalDag().vertex_comm_weight(ep.source)
+                                               * instance.sendCosts(p1, p2);
 
                             edge_id++;
                         }
@@ -526,15 +463,12 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
             }
 
         } else {
-
             edge_vars = std::vector<std::vector<VarArray>>(1, std::vector<VarArray>(1));
             assert(instance.getComputationalDag().num_edges() <= std::numeric_limits<int>::max());
-            edge_vars[0][0] =
-                model.AddVars(static_cast<int>(instance.getComputationalDag().num_edges()), COPT_BINARY, "edge");
+            edge_vars[0][0] = model.AddVars(static_cast<int>(instance.getComputationalDag().num_edges()), COPT_BINARY, "edge");
 
             int edge_id = 0;
             for (const auto &ep : edge_view(instance.getComputationalDag())) {
-
                 for (unsigned p1 = 0; p1 < instance.numberOfProcessors(); p1++) {
                     Expr expr1, expr2;
                     for (unsigned step = 0; step < max_number_supersteps; step++) {
@@ -543,9 +477,7 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
 
                     for (unsigned p2 = 0; p2 < instance.numberOfProcessors(); p2++) {
                         if (p1 != p2) {
-
                             for (unsigned step = 0; step < max_number_supersteps; step++) {
-
                                 expr2 += node_to_processor_superstep_var[ep.target][p2][static_cast<int>(step)];
                             }
                         }
@@ -553,8 +485,7 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
                     model.AddConstr(edge_vars[0][0][edge_id] >= expr1 + expr2 - 1.001);
                 }
 
-                total_edges_cut +=
-                    instance.getComputationalDag().vertex_comm_weight(ep.source) * edge_vars[0][0][edge_id];
+                total_edges_cut += instance.getComputationalDag().vertex_comm_weight(ep.source) * edge_vars[0][0][edge_id];
 
                 edge_id++;
             }
@@ -563,7 +494,6 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         Expr expr;
 
         if (ignore_workload_balance) {
-
             for (unsigned step = 0; step < max_number_supersteps; step++) {
                 assert(step <= std::numeric_limits<int>::max());
                 expr += instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
@@ -571,18 +501,16 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
 
         } else {
             assert(max_number_supersteps <= std::numeric_limits<int>::max());
-            max_work_superstep_var =
-                model.AddVars(static_cast<int>(max_number_supersteps), COPT_CONTINUOUS, "max_work_superstep");
+            max_work_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_CONTINUOUS, "max_work_superstep");
             // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_work_superstep");
 
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 assert(step <= std::numeric_limits<int>::max());
                 for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                     Expr expr_work;
                     for (const auto &node : instance.vertices()) {
-                        expr_work += instance.getComputationalDag().vertex_work_weight(node) *
-                                     node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
+                        expr_work += instance.getComputationalDag().vertex_work_weight(node)
+                                     * node_to_processor_superstep_var[node][processor][static_cast<int>(step)];
                     }
 
                     model.AddConstr(max_work_superstep_var[static_cast<int>(step)] >= expr_work);
@@ -591,8 +519,8 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
 
             for (unsigned step = 0; step < max_number_supersteps; step++) {
                 assert(step <= std::numeric_limits<int>::max());
-                expr += max_work_superstep_var[static_cast<int>(step)] +
-                        instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
+                expr += max_work_superstep_var[static_cast<int>(step)]
+                        + instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
             }
         }
 
@@ -606,11 +534,18 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
 
   public:
     TotalCommunicationScheduler(unsigned steps = 5)
-        : Scheduler<Graph_t>(), env(), model(env.CreateModel("TotalCommScheduler")), use_memory_constraint(false),
-          ignore_workload_balance(false), use_initial_schedule(false), initial_schedule(0),
-          write_solutions_found(false), use_lk_heuristic_callback(true), solution_callback(), heuristic_callback(),
+        : Scheduler<Graph_t>(),
+          env(),
+          model(env.CreateModel("TotalCommScheduler")),
+          use_memory_constraint(false),
+          ignore_workload_balance(false),
+          use_initial_schedule(false),
+          initial_schedule(0),
+          write_solutions_found(false),
+          use_lk_heuristic_callback(true),
+          solution_callback(),
+          heuristic_callback(),
           max_number_supersteps(steps) {
-
         heuristic_callback.max_work_superstep_var_ptr = &max_work_superstep_var;
         heuristic_callback.superstep_used_var_ptr = &superstep_used_var;
         heuristic_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var;
@@ -620,11 +555,18 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
     }
 
     TotalCommunicationScheduler(const BspSchedule<Graph_t> &schedule)
-        : Scheduler<Graph_t>(), env(), model(env.CreateModel("TotalCommScheduler")), use_memory_constraint(false),
-          ignore_workload_balance(false), use_initial_schedule(true), initial_schedule(&schedule),
-          write_solutions_found(false), use_lk_heuristic_callback(true), solution_callback(), heuristic_callback(),
+        : Scheduler<Graph_t>(),
+          env(),
+          model(env.CreateModel("TotalCommScheduler")),
+          use_memory_constraint(false),
+          ignore_workload_balance(false),
+          use_initial_schedule(true),
+          initial_schedule(&schedule),
+          write_solutions_found(false),
+          use_lk_heuristic_callback(true),
+          solution_callback(),
+          heuristic_callback(),
           max_number_supersteps(schedule.numberOfSupersteps()) {
-
         heuristic_callback.max_work_superstep_var_ptr = &max_work_superstep_var;
         heuristic_callback.superstep_used_var_ptr = &superstep_used_var;
         heuristic_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var;
@@ -651,15 +593,14 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
      *         agree with those of the initial schedule's instance
      */
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         auto &instance = schedule.getInstance();
 
         assert(!ignore_workload_balance || !use_lk_heuristic_callback);
 
-        if (use_initial_schedule &&
-            (max_number_supersteps < initial_schedule->numberOfSupersteps() ||
-             instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
-             instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
+        if (use_initial_schedule
+            && (max_number_supersteps < initial_schedule->numberOfSupersteps()
+                || instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors()
+                || instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
             throw std::invalid_argument("Invalid Argument while computeSchedule(instance): instance parameters do not "
                                         "agree with those of the initial schedule's instance!");
         }
@@ -681,12 +622,10 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         // model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2);
 
         if (write_solutions_found) {
-
             solution_callback.instance_ptr = &instance;
             model.SetCallback(&solution_callback, COPT_CBCONTEXT_MIPSOL);
         }
         if (use_lk_heuristic_callback) {
-
             heuristic_callback.instance_ptr = &instance;
             heuristic_callback.num_step = max_number_supersteps;
             model.SetCallback(&heuristic_callback, COPT_CBCONTEXT_MIPSOL);
@@ -695,18 +634,14 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
         model.Solve();
 
         if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
-            return RETURN_STATUS::OSP_SUCCESS; //, constructBspScheduleFromSolution(instance, true)};
+            return RETURN_STATUS::OSP_SUCCESS;    //, constructBspScheduleFromSolution(instance, true)};
 
         } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
             return RETURN_STATUS::ERROR;
 
         } else {
-
             if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
-                return RETURN_STATUS::BEST_FOUND; //, constructBspScheduleFromSolution(instance, true)};
+                return RETURN_STATUS::BEST_FOUND;    //, constructBspScheduleFromSolution(instance, true)};
 
             } else {
                 return RETURN_STATUS::TIMEOUT;
@@ -724,7 +659,6 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
      * @param schedule The provided schedule.
      */
     inline void setInitialSolutionFromBspSchedule(const BspSchedule<Graph_t> &schedule) {
-
         initial_schedule = &schedule;
 
         max_number_supersteps = schedule.numberOfSupersteps();
@@ -749,7 +683,6 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
      */
     void setMaxNumberOfSupersteps(unsigned max) {
         if (use_initial_schedule && max < initial_schedule->numberOfSupersteps()) {
-
             throw std::invalid_argument("Invalid Argument while setting "
                                         "max number of supersteps to a value "
                                         "which is less than the number of "
@@ -862,4 +795,4 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
     virtual std::string getScheduleName() const override { return "TotalCommIlp"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/ImprovementScheduler.hpp b/include/osp/bsp/scheduler/ImprovementScheduler.hpp
index f45ab33c..05bfcfb4 100644
--- a/include/osp/bsp/scheduler/ImprovementScheduler.hpp
+++ b/include/osp/bsp/scheduler/ImprovementScheduler.hpp
@@ -28,9 +28,8 @@ namespace osp {
  * The ImprovementScheduler class provides a common interface for improvement scheduling scheduler.
  * Subclasses of this class can implement specific improvement scheduler by overriding the virtual methods.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class ImprovementScheduler {
-
   protected:
     unsigned timeLimitSeconds; /**< The time limit in seconds for the improvement algorithm. */
 
@@ -91,9 +90,8 @@ class ImprovementScheduler {
     virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule<Graph_t> &schedule) = 0;
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 class ComboScheduler : public Scheduler<Graph_t> {
-
   private:
     Scheduler<Graph_t> &base_scheduler;
     ImprovementScheduler<Graph_t> &improvement_scheduler;
@@ -109,7 +107,6 @@ class ComboScheduler : public Scheduler<Graph_t> {
     }
 
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         RETURN_STATUS status = base_scheduler.computeSchedule(schedule);
         if (status != RETURN_STATUS::OSP_SUCCESS and status != RETURN_STATUS::BEST_FOUND) {
             return status;
@@ -119,4 +116,4 @@ class ComboScheduler : public Scheduler<Graph_t> {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp
index 1ad7304d..32818027 100644
--- a/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp
+++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp
@@ -23,11 +23,11 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
-std::vector<std::vector<vertex_idx_t<Graph_t>>>
-heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median,
-                      const double min_percent_components_retained, const double bound_component_weight_percent) {
-
+template <typename Graph_t>
+std::vector<std::vector<vertex_idx_t<Graph_t>>> heavy_edge_preprocess(const Graph_t &graph,
+                                                                      const double heavy_is_x_times_median,
+                                                                      const double min_percent_components_retained,
+                                                                      const double bound_component_weight_percent) {
     static_assert(is_computational_dag_edge_desc_v<Graph_t>,
                   "HeavyEdgePreProcess can only be used with computational DAGs with edge weights.");
 
@@ -44,7 +44,6 @@ heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median
     std::vector<e_commw_t<Graph_t>> edge_communications;
     edge_communications.reserve(graph.num_edges());
     for (const auto &edge : edges(graph)) {
-
         if constexpr (has_edge_weights_v<Graph_t>) {
             edge_communications.emplace_back(graph.edge_comm_weight(edge));
         } else {
@@ -55,21 +54,17 @@ heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median
     // Computing the median and setting it to at least one
     e_commw_t<Graph_t> median_edge_weight = 1;
     if (not edge_communications.empty()) {
-
         auto median_it = edge_communications.begin();
         std::advance(median_it, edge_communications.size() / 2);
         std::nth_element(edge_communications.begin(), median_it, edge_communications.end());
-        median_edge_weight =
-            std::max(edge_communications[edge_communications.size() / 2], static_cast<e_commw_t<Graph_t>>(1));
+        median_edge_weight = std::max(edge_communications[edge_communications.size() / 2], static_cast<e_commw_t<Graph_t>>(1));
     }
 
     // Making edge list
-    e_commw_t<Graph_t> minimal_edge_weight =
-        static_cast<e_commw_t<Graph_t>>(heavy_is_x_times_median * median_edge_weight);
+    e_commw_t<Graph_t> minimal_edge_weight = static_cast<e_commw_t<Graph_t>>(heavy_is_x_times_median * median_edge_weight);
     std::vector<EdgeType> edge_list;
     edge_list.reserve(graph.num_edges());
     for (const auto &edge : edges(graph)) {
-
         if constexpr (has_edge_weights_v<Graph_t>) {
             if (graph.edge_comm_weight(edge) > minimal_edge_weight) {
                 edge_list.emplace_back(edge);
@@ -102,14 +97,16 @@ heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median
 
     // Joining heavy edges
     for (const EdgeType &edge : edge_list) {
-        if (static_cast<double>(uf_structure.get_number_of_connected_components()) - 1.0 <
-            min_percent_components_retained * static_cast<double>(graph.num_vertices()))
+        if (static_cast<double>(uf_structure.get_number_of_connected_components()) - 1.0
+            < min_percent_components_retained * static_cast<double>(graph.num_vertices())) {
             break;
+        }
 
         v_workw_t<Graph_t> weight_comp_a = uf_structure.get_weight_of_component_by_name(source(edge, graph));
         v_workw_t<Graph_t> weight_comp_b = uf_structure.get_weight_of_component_by_name(target(edge, graph));
-        if (weight_comp_a + weight_comp_b > max_component_size)
+        if (weight_comp_a + weight_comp_b > max_component_size) {
             continue;
+        }
 
         uf_structure.join_by_name(source(edge, graph), target(edge, graph));
     }
@@ -117,4 +114,4 @@ heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median
     return uf_structure.get_connected_components();
 }
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp
index 49c9030e..00ffb584 100644
--- a/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp
+++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp
@@ -18,14 +18,13 @@ limitations under the License.
 
 #pragma once
 
-#include "VariancePartitioner.hpp"
 #include "HeavyEdgePreProcess.hpp"
+#include "VariancePartitioner.hpp"
 
 namespace osp {
 
-template<typename Graph_t, typename Interpolation_t, typename MemoryConstraint_t = no_memory_constraint>
+template <typename Graph_t, typename Interpolation_t, typename MemoryConstraint_t = no_memory_constraint>
 class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpolation_t, MemoryConstraint_t> {
-
   private:
     using VertexType = vertex_idx_t<Graph_t>;
 
@@ -45,14 +44,19 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
     double bound_component_weight_percent;
 
   public:
-    LightEdgeVariancePartitioner(double max_percent_idle_processors_ = 0.2, double variance_power_ = 2,
-                                 double heavy_is_x_times_median_ = 5.0, double min_percent_components_retained_ = 0.8,
+    LightEdgeVariancePartitioner(double max_percent_idle_processors_ = 0.2,
+                                 double variance_power_ = 2,
+                                 double heavy_is_x_times_median_ = 5.0,
+                                 double min_percent_components_retained_ = 0.8,
                                  double bound_component_weight_percent_ = 0.7,
                                  bool increase_parallelism_in_new_superstep_ = true,
-                                 float max_priority_difference_percent_ = 0.34f, float slack_ = 0.0f)
-        : VariancePartitioner<Graph_t, Interpolation_t, MemoryConstraint_t>(
-              max_percent_idle_processors_, variance_power_, increase_parallelism_in_new_superstep_,
-              max_priority_difference_percent_, slack_),
+                                 float max_priority_difference_percent_ = 0.34f,
+                                 float slack_ = 0.0f)
+        : VariancePartitioner<Graph_t, Interpolation_t, MemoryConstraint_t>(max_percent_idle_processors_,
+                                                                            variance_power_,
+                                                                            increase_parallelism_in_new_superstep_,
+                                                                            max_priority_difference_percent_,
+                                                                            slack_),
           heavy_is_x_times_median(heavy_is_x_times_median_),
           min_percent_components_retained(min_percent_components_retained_),
           bound_component_weight_percent(bound_component_weight_percent_) {};
@@ -93,7 +97,6 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
 
         v_workw_t<Graph_t> total_work = 0;
         for (const auto &v : graph.vertices()) {
-
             schedule.setAssignedProcessor(v, n_processors);
 
             total_work += graph.vertex_work_weight(v);
@@ -110,9 +113,8 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
         std::vector<v_workw_t<Graph_t>> total_partition_work(n_processors, 0);
         std::vector<v_workw_t<Graph_t>> superstep_partition_work(n_processors, 0);
 
-        std::vector<std::vector<VertexType>> preprocessed_partition =
-            heavy_edge_preprocess(graph, heavy_is_x_times_median, min_percent_components_retained,
-                                  bound_component_weight_percent / n_processors);
+        std::vector<std::vector<VertexType>> preprocessed_partition = heavy_edge_preprocess(
+            graph, heavy_is_x_times_median, min_percent_components_retained, bound_component_weight_percent / n_processors);
 
         std::vector<size_t> which_preprocess_partition(graph.num_vertices());
         for (size_t i = 0; i < preprocessed_partition.size(); i++) {
@@ -131,8 +133,8 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
         std::vector<v_commw_t<Graph_t>> transient_cost_of_preprocessed_partition(preprocessed_partition.size(), 0);
         for (size_t i = 0; i < preprocessed_partition.size(); i++) {
             for (const auto &vert : preprocessed_partition[i]) {
-                transient_cost_of_preprocessed_partition[i] =
-                    std::max(transient_cost_of_preprocessed_partition[i], graph.vertex_comm_weight(vert));
+                transient_cost_of_preprocessed_partition[i]
+                    = std::max(transient_cost_of_preprocessed_partition[i], graph.vertex_comm_weight(vert));
             }
         }
 
@@ -147,9 +149,7 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                 endsuperstep = true;
                 // std::cout << "\nCall for new superstep - unable to schedule.\n";
             } else {
-
                 if constexpr (base::use_memory_constraint) {
-
                     if (num_unable_to_partition_node_loop >= 2) {
                         return RETURN_STATUS::ERROR;
                     }
@@ -161,13 +161,12 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
             // * n_processors << " ready size " << ready.size() << " small increase " << 1.2 * (n_processors -
             // free_processors.size()) << " large increase " << n_processors - free_processors.size() +  (0.5 *
             // free_processors.size()) << "\n";
-            if (num_unable_to_partition_node_loop == 0 &&
-                static_cast<double>(free_processors.size()) > base::max_percent_idle_processors * n_processors &&
-                ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors ||
-                 static_cast<double>(ready.size()) >=
-                     1.2 * (n_processors - static_cast<double>(free_processors.size())) ||
-                 static_cast<double>(ready.size()) >=
-                     n_processors - static_cast<double>(free_processors.size()) + (0.5 * static_cast<double>(free_processors.size())))) {
+            if (num_unable_to_partition_node_loop == 0
+                && static_cast<double>(free_processors.size()) > base::max_percent_idle_processors * n_processors
+                && ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors
+                    || static_cast<double>(ready.size()) >= 1.2 * (n_processors - static_cast<double>(free_processors.size()))
+                    || static_cast<double>(ready.size()) >= n_processors - static_cast<double>(free_processors.size())
+                                                                + (0.5 * static_cast<double>(free_processors.size())))) {
                 endsuperstep = true;
                 // std::cout << "\nCall for new superstep - parallelism.\n";
             }
@@ -181,9 +180,9 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                 min_priority = std::min(min_priority, prio);
                 max_priority = std::max(max_priority, prio);
             }
-            if (num_unable_to_partition_node_loop == 0 &&
-                (max_priority - min_priority) >
-                    base::max_priority_difference_percent * static_cast<float>(total_work) / static_cast<float>(n_processors)) {
+            if (num_unable_to_partition_node_loop == 0
+                && (max_priority - min_priority) > base::max_priority_difference_percent * static_cast<float>(total_work)
+                                                       / static_cast<float>(n_processors)) {
                 endsuperstep = true;
                 // std::cout << "\nCall for new superstep - difference.\n";
             }
@@ -219,18 +218,18 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                 superstep_partition_work, total_partition_work, total_work, instance, base::slack);
 
             for (unsigned &proc : processors_in_order) {
-                if ((free_processors.find(proc)) != free_processors.cend())
+                if ((free_processors.find(proc)) != free_processors.cend()) {
                     continue;
+                }
 
                 // Check for too many free processors - needed here because free processors may not have been detected
                 // yet
-                if (num_unable_to_partition_node_loop == 0 &&
-                    static_cast<double>(free_processors.size()) > base::max_percent_idle_processors * n_processors &&
-                    ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors ||
-                     static_cast<double>(ready.size()) >=
-                         1.2 * (n_processors - static_cast<double>(free_processors.size())) ||
-                     static_cast<double>(ready.size()) >=
-                         n_processors - static_cast<double>(free_processors.size()) + (0.5 * static_cast<double>(free_processors.size())))) {
+                if (num_unable_to_partition_node_loop == 0
+                    && static_cast<double>(free_processors.size()) > base::max_percent_idle_processors * n_processors
+                    && ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors
+                        || static_cast<double>(ready.size()) >= 1.2 * (n_processors - static_cast<double>(free_processors.size()))
+                        || static_cast<double>(ready.size()) >= n_processors - static_cast<double>(free_processors.size())
+                                                                    + (0.5 * static_cast<double>(free_processors.size())))) {
                     endsuperstep = true;
                     // std::cout << "\nCall for new superstep - parallelism.\n";
                     break;
@@ -240,17 +239,19 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
 
                 // Choosing next node
                 VertexType next_node;
-                for (auto vertex_prior_pair_iter = procReady[proc].begin();
-                     vertex_prior_pair_iter != procReady[proc].end(); vertex_prior_pair_iter++) {
-                    if (assigned_a_node)
+                for (auto vertex_prior_pair_iter = procReady[proc].begin(); vertex_prior_pair_iter != procReady[proc].end();
+                     vertex_prior_pair_iter++) {
+                    if (assigned_a_node) {
                         break;
+                    }
 
                     const VertexType &vert = vertex_prior_pair_iter->first;
                     if constexpr (base::use_memory_constraint) {
-                        if (has_vertex_been_assigned[vert] || base::memory_constraint.can_add(
-                                proc, memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]],
-                            transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) {
-
+                        if (has_vertex_been_assigned[vert]
+                            || base::memory_constraint.can_add(
+                                proc,
+                                memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]],
+                                transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) {
                             next_node = vert;
                             assigned_a_node = true;
                         }
@@ -261,16 +262,19 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                 }
 
                 for (auto vertex_prior_pair_iter = procReadyPrior[proc].begin();
-                     vertex_prior_pair_iter != procReadyPrior[proc].end(); vertex_prior_pair_iter++) {
-                    if (assigned_a_node)
+                     vertex_prior_pair_iter != procReadyPrior[proc].end();
+                     vertex_prior_pair_iter++) {
+                    if (assigned_a_node) {
                         break;
+                    }
 
                     const VertexType &vert = vertex_prior_pair_iter->first;
                     if constexpr (base::use_memory_constraint) {
-                        if (has_vertex_been_assigned[vert] || base::memory_constraint.can_add(
-                                proc, memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]],
-                            transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) {
-
+                        if (has_vertex_been_assigned[vert]
+                            || base::memory_constraint.can_add(
+                                proc,
+                                memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]],
+                                transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) {
                             next_node = vert;
                             assigned_a_node = true;
                         }
@@ -281,15 +285,17 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                 }
                 for (auto vertex_prior_pair_iter = allReady.begin(); vertex_prior_pair_iter != allReady.cend();
                      vertex_prior_pair_iter++) {
-                    if (assigned_a_node)
+                    if (assigned_a_node) {
                         break;
+                    }
 
                     const VertexType &vert = vertex_prior_pair_iter->first;
                     if constexpr (base::use_memory_constraint) {
-                        if (has_vertex_been_assigned[vert] || base::memory_constraint.can_add(
-                                proc, memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]],
-                            transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) {
-
+                        if (has_vertex_been_assigned[vert]
+                            || base::memory_constraint.can_add(
+                                proc,
+                                memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]],
+                                transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) {
                             next_node = vert;
                             assigned_a_node = true;
                         }
@@ -304,7 +310,6 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                 } else {
                     // Assignments
                     if (has_vertex_been_assigned[next_node]) {
-
                         unsigned proc_alloc_prior = schedule.assignedProcessor(next_node);
 
                         // std::cout << "Allocated node " << next_node << " to processor " << proc_alloc_prior << "
@@ -335,8 +340,8 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                                 ready.insert(std::make_pair(chld, variance_priorities[chld]));
                                 bool is_proc_ready = true;
                                 for (const auto &parent : graph.parents(chld)) {
-                                    if ((schedule.assignedProcessor(parent) != proc_alloc_prior) &&
-                                        (schedule.assignedSuperstep(parent) == superstep)) {
+                                    if ((schedule.assignedProcessor(parent) != proc_alloc_prior)
+                                        && (schedule.assignedSuperstep(parent) == superstep)) {
                                         is_proc_ready = false;
                                         break;
                                     }
@@ -349,7 +354,6 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                             }
                         }
                     } else {
-
                         schedule.setAssignedProcessor(next_node, proc);
                         has_vertex_been_assigned[next_node] = true;
                         // std::cout << "Allocated node " << next_node << " to processor " << proc << ".\n";
@@ -386,8 +390,8 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
                                 ready.insert(std::make_pair(chld, variance_priorities[chld]));
                                 bool is_proc_ready = true;
                                 for (const auto &parent : graph.parents(chld)) {
-                                    if ((schedule.assignedProcessor(parent) != proc) &&
-                                        (schedule.assignedSuperstep(parent) == superstep)) {
+                                    if ((schedule.assignedProcessor(parent) != proc)
+                                        && (schedule.assignedSuperstep(parent) == superstep)) {
                                         is_proc_ready = false;
                                         break;
                                     }
@@ -402,9 +406,9 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
 
                         // Allocating all nodes in the same partition
                         for (VertexType node_in_same_partition : preprocessed_partition[which_preprocess_partition[next_node]]) {
-                            
-                            if (node_in_same_partition == next_node)
+                            if (node_in_same_partition == next_node) {
                                 continue;
+                            }
 
                             // Allocation
                             schedule.setAssignedProcessor(node_in_same_partition, proc);
@@ -436,4 +440,4 @@ class LightEdgeVariancePartitioner : public VariancePartitioner<Graph_t, Interpo
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/LoadBalancerBase.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/LoadBalancerBase.hpp
index 26e45fa8..10c77647 100644
--- a/include/osp/bsp/scheduler/LoadBalanceScheduler/LoadBalancerBase.hpp
+++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/LoadBalancerBase.hpp
@@ -18,10 +18,11 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/bsp/scheduler/Scheduler.hpp"
 #include <cmath>
 #include <functional>
 
+#include "osp/bsp/scheduler/Scheduler.hpp"
+
 namespace osp {
 
 struct linear_interpolation {
@@ -42,9 +43,8 @@ struct global_only_interpolation {
     float operator()(float, const float) { return 1.0f; };
 };
 
-template<typename Graph_t, typename Interpolation_t = flat_spline_interpolation>
+template <typename Graph_t, typename Interpolation_t = flat_spline_interpolation>
 class LoadBalancerBase : public Scheduler<Graph_t> {
-
     static_assert(std::is_invocable_r<float, Interpolation_t, float, float>::value,
                   "Interpolation_t must be invocable with two float arguments and return a float.");
 
@@ -56,11 +56,11 @@ class LoadBalancerBase : public Scheduler<Graph_t> {
     /// @param instance bsp instance
     /// @param slack how much to ignore global balance
     /// @return vector with the interpolated priorities
-    std::vector<float>
-    computeProcessorPrioritiesInterpolation(const std::vector<v_workw_t<Graph_t>> &superstep_partition_work,
-                                            const std::vector<v_workw_t<Graph_t>> &total_partition_work,
-                                            const v_workw_t<Graph_t> &total_work, const BspInstance<Graph_t> &instance,
-                                            const float slack = 0.0) {
+    std::vector<float> computeProcessorPrioritiesInterpolation(const std::vector<v_workw_t<Graph_t>> &superstep_partition_work,
+                                                               const std::vector<v_workw_t<Graph_t>> &total_partition_work,
+                                                               const v_workw_t<Graph_t> &total_work,
+                                                               const BspInstance<Graph_t> &instance,
+                                                               const float slack = 0.0) {
         v_workw_t<Graph_t> work_till_now = 0;
         for (const auto &part_work : total_partition_work) {
             work_till_now += part_work;
@@ -72,8 +72,10 @@ class LoadBalancerBase : public Scheduler<Graph_t> {
 
         std::vector<float> proc_prio(instance.numberOfProcessors());
         for (size_t i = 0; i < proc_prio.size(); i++) {
-            assert(static_cast<double>(total_partition_work[i]) < std::numeric_limits<float>::max() && static_cast<double>(superstep_partition_work[i]) < std::numeric_limits<float>::max()); 
-            proc_prio[i] = ((1 - value) * static_cast<float>(superstep_partition_work[i])) + (value * static_cast<float>(total_partition_work[i]));
+            assert(static_cast<double>(total_partition_work[i]) < std::numeric_limits<float>::max()
+                   && static_cast<double>(superstep_partition_work[i]) < std::numeric_limits<float>::max());
+            proc_prio[i] = ((1 - value) * static_cast<float>(superstep_partition_work[i]))
+                           + (value * static_cast<float>(total_partition_work[i]));
         }
 
         return proc_prio;
@@ -89,9 +91,10 @@ class LoadBalancerBase : public Scheduler<Graph_t> {
     std::vector<unsigned> computeProcessorPriority(const std::vector<v_workw_t<Graph_t>> &superstep_partition_work,
                                                    const std::vector<v_workw_t<Graph_t>> &total_partition_work,
                                                    const v_workw_t<Graph_t> &total_work,
-                                                   const BspInstance<Graph_t> &instance, const float slack = 0.0) {
-        return sorting_arrangement<float, unsigned>(computeProcessorPrioritiesInterpolation(
-            superstep_partition_work, total_partition_work, total_work, instance, slack));
+                                                   const BspInstance<Graph_t> &instance,
+                                                   const float slack = 0.0) {
+        return sorting_arrangement<float, unsigned>(
+            computeProcessorPrioritiesInterpolation(superstep_partition_work, total_partition_work, total_work, instance, slack));
     }
 
   public:
@@ -99,4 +102,4 @@ class LoadBalancerBase : public Scheduler<Graph_t> {
     virtual ~LoadBalancerBase() = default;
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp
index 089cf4d4..6b5f904c 100644
--- a/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp
+++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp
@@ -24,12 +24,12 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename Interpolation_t, typename MemoryConstraint_t = no_memory_constraint>
+template <typename Graph_t, typename Interpolation_t, typename MemoryConstraint_t = no_memory_constraint>
 class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "VariancePartitioner can only be used with computational DAGs.");
 
     using VertexType = vertex_idx_t<Graph_t>;
+
     struct VarianceCompare {
         bool operator()(const std::pair<VertexType, double> &lhs, const std::pair<VertexType, double> &rhs) const {
             return ((lhs.second > rhs.second) || ((lhs.second >= rhs.second) && (lhs.first < rhs.first)));
@@ -37,8 +37,8 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
     };
 
   protected:
-    constexpr static bool use_memory_constraint =
-        is_memory_constraint_v<MemoryConstraint_t> or is_memory_constraint_schedule_v<MemoryConstraint_t>;
+    constexpr static bool use_memory_constraint = is_memory_constraint_v<MemoryConstraint_t>
+                                                  or is_memory_constraint_schedule_v<MemoryConstraint_t>;
 
     static_assert(not use_memory_constraint or std::is_same_v<Graph_t, typename MemoryConstraint_t::Graph_impl_t>,
                   "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t.");
@@ -65,7 +65,6 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
     /// @param power the power in the power mean average
     /// @return vector of the logarithm of power mean averaged bottom node distance
     std::vector<double> compute_work_variance(const Graph_t &graph, double power = 2) const {
-
         std::vector<double> work_variance(graph.num_vertices(), 0.0);
 
         const auto top_order = GetTopOrder(graph);
@@ -84,25 +83,27 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
             double node_weight = std::log(graph.vertex_work_weight(*r_iter));
             double larger_val = node_weight > temp ? node_weight : temp;
 
-            work_variance[*r_iter] =
-                std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val;
+            work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val;
         }
 
         return work_variance;
     }
 
   public:
-    VariancePartitioner(double max_percent_idle_processors_ = 0.2, double variance_power_ = 2.0,
+    VariancePartitioner(double max_percent_idle_processors_ = 0.2,
+                        double variance_power_ = 2.0,
                         bool increase_parallelism_in_new_superstep_ = true,
-                        float max_priority_difference_percent_ = 0.34f, float slack_ = 0.0f)
-        : max_percent_idle_processors(max_percent_idle_processors_), variance_power(variance_power_),
+                        float max_priority_difference_percent_ = 0.34f,
+                        float slack_ = 0.0f)
+        : max_percent_idle_processors(max_percent_idle_processors_),
+          variance_power(variance_power_),
           increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_),
-          max_priority_difference_percent(max_priority_difference_percent_), slack(slack_) {};
+          max_priority_difference_percent(max_priority_difference_percent_),
+          slack(slack_) {};
 
     virtual ~VariancePartitioner() = default;
 
     virtual RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         const auto &instance = schedule.getInstance();
         const auto &n_vert = instance.numberOfVertices();
         const unsigned &n_processors = instance.numberOfProcessors();
@@ -132,7 +133,6 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
         std::vector<unsigned> which_proc_ready_prior(n_vert, n_processors);
 
         for (const auto &v : graph.vertices()) {
-
             schedule.setAssignedProcessor(v, n_processors);
 
             total_work += graph.vertex_work_weight(v);
@@ -158,9 +158,7 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
                 endsuperstep = true;
                 // std::cout << "\nCall for new superstep - unable to schedule.\n";
             } else {
-
                 if constexpr (use_memory_constraint) {
-
                     if (num_unable_to_partition_node_loop >= 2) {
                         return RETURN_STATUS::ERROR;
                     }
@@ -172,18 +170,17 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
             // * n_processors << " ready size " << ready.size() << " small increase " << 1.2 * (n_processors -
             // free_processors.size()) << " large increase " << n_processors - free_processors.size() +  (0.5 *
             // free_processors.size()) << "\n";
-            if (num_unable_to_partition_node_loop == 0 &&
-                static_cast<double>(free_processors.size()) > max_percent_idle_processors * n_processors &&
-                ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors ||
-                 static_cast<double>(ready.size()) >=
-                     1.2 * (n_processors - static_cast<double>(free_processors.size())) ||
-                 static_cast<double>(ready.size()) >= n_processors - static_cast<double>(free_processors.size()) +
-                                                          (0.5 * static_cast<double>(free_processors.size())))) {
+            if (num_unable_to_partition_node_loop == 0
+                && static_cast<double>(free_processors.size()) > max_percent_idle_processors * n_processors
+                && ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors
+                    || static_cast<double>(ready.size()) >= 1.2 * (n_processors - static_cast<double>(free_processors.size()))
+                    || static_cast<double>(ready.size()) >= n_processors - static_cast<double>(free_processors.size())
+                                                                + (0.5 * static_cast<double>(free_processors.size())))) {
                 endsuperstep = true;
                 // std::cout << "\nCall for new superstep - parallelism.\n";
             }
-            std::vector<float> processor_priorities =
-                LoadBalancerBase<Graph_t, Interpolation_t>::computeProcessorPrioritiesInterpolation(
+            std::vector<float> processor_priorities
+                = LoadBalancerBase<Graph_t, Interpolation_t>::computeProcessorPrioritiesInterpolation(
                     superstep_partition_work, total_partition_work, total_work, instance);
             float min_priority = processor_priorities[0];
             float max_priority = processor_priorities[0];
@@ -191,9 +188,9 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
                 min_priority = std::min(min_priority, prio);
                 max_priority = std::max(max_priority, prio);
             }
-            if (num_unable_to_partition_node_loop == 0 &&
-                (max_priority - min_priority) >
-                    max_priority_difference_percent * static_cast<float>(total_work) / static_cast<float>(n_processors)) {
+            if (num_unable_to_partition_node_loop == 0
+                && (max_priority - min_priority)
+                       > max_priority_difference_percent * static_cast<float>(total_work) / static_cast<float>(n_processors)) {
                 endsuperstep = true;
                 // std::cout << "\nCall for new superstep - difference.\n";
             }
@@ -225,22 +222,21 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
             bool assigned_a_node = false;
 
             // Choosing next processor
-            std::vector<unsigned> processors_in_order =
-                LoadBalancerBase<Graph_t, Interpolation_t>::computeProcessorPriority(
-                    superstep_partition_work, total_partition_work, total_work, instance, slack);
+            std::vector<unsigned> processors_in_order = LoadBalancerBase<Graph_t, Interpolation_t>::computeProcessorPriority(
+                superstep_partition_work, total_partition_work, total_work, instance, slack);
             for (unsigned &proc : processors_in_order) {
-                if ((free_processors.find(proc)) != free_processors.cend())
+                if ((free_processors.find(proc)) != free_processors.cend()) {
                     continue;
+                }
 
                 // Check for too many free processors - needed here because free processors may not have been detected
                 // yet
-                if (num_unable_to_partition_node_loop == 0 &&
-                    static_cast<double>(free_processors.size()) > max_percent_idle_processors * n_processors &&
-                    ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors ||
-                     static_cast<double>(ready.size()) >=
-                         1.2 * (n_processors - static_cast<double>(free_processors.size())) ||
-                     static_cast<double>(ready.size()) >= n_processors - static_cast<double>(free_processors.size()) +
-                                                              (0.5 * static_cast<double>(free_processors.size())))) {
+                if (num_unable_to_partition_node_loop == 0
+                    && static_cast<double>(free_processors.size()) > max_percent_idle_processors * n_processors
+                    && ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors
+                        || static_cast<double>(ready.size()) >= 1.2 * (n_processors - static_cast<double>(free_processors.size()))
+                        || static_cast<double>(ready.size()) >= n_processors - static_cast<double>(free_processors.size())
+                                                                    + (0.5 * static_cast<double>(free_processors.size())))) {
                     endsuperstep = true;
                     // std::cout << "\nCall for new superstep - parallelism.\n";
                     break;
@@ -250,10 +246,11 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
 
                 // Choosing next node
                 VertexType next_node;
-                for (auto vertex_prior_pair_iter = procReady[proc].begin();
-                     vertex_prior_pair_iter != procReady[proc].cend(); vertex_prior_pair_iter++) {
-                    if (assigned_a_node)
+                for (auto vertex_prior_pair_iter = procReady[proc].begin(); vertex_prior_pair_iter != procReady[proc].cend();
+                     vertex_prior_pair_iter++) {
+                    if (assigned_a_node) {
                         break;
+                    }
 
                     if constexpr (use_memory_constraint) {
                         if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) {
@@ -267,9 +264,11 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
                 }
 
                 for (auto vertex_prior_pair_iter = procReadyPrior[proc].begin();
-                     vertex_prior_pair_iter != procReadyPrior[proc].cend(); vertex_prior_pair_iter++) {
-                    if (assigned_a_node)
+                     vertex_prior_pair_iter != procReadyPrior[proc].cend();
+                     vertex_prior_pair_iter++) {
+                    if (assigned_a_node) {
                         break;
+                    }
 
                     if constexpr (use_memory_constraint) {
                         if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) {
@@ -284,9 +283,9 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
 
                 for (auto vertex_prior_pair_iter = allReady.begin(); vertex_prior_pair_iter != allReady.cend();
                      vertex_prior_pair_iter++) {
-
-                    if (assigned_a_node)
+                    if (assigned_a_node) {
                         break;
+                    }
 
                     if constexpr (use_memory_constraint) {
                         if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) {
@@ -334,8 +333,8 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
                             ready.insert(std::make_pair(chld, variance_priorities[chld]));
                             bool is_proc_ready = true;
                             for (const auto &parent : graph.parents(chld)) {
-                                if ((schedule.assignedProcessor(parent) != proc) &&
-                                    (schedule.assignedSuperstep(parent) == superstep)) {
+                                if ((schedule.assignedProcessor(parent) != proc)
+                                    && (schedule.assignedSuperstep(parent) == superstep)) {
                                     is_proc_ready = false;
                                     break;
                                 }
@@ -362,4 +361,4 @@ class VariancePartitioner : public LoadBalancerBase<Graph_t, Interpolation_t> {
     std::string getScheduleName() const override { return "VariancePartitioner"; };
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp
index 48a983a6..5d2374de 100644
--- a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp
@@ -18,27 +18,28 @@ limitations under the License.
 
 #pragma once
 
+#include <chrono>
+
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
 
-#include <chrono>
+namespace osp {
 
-namespace osp{
-
-template<typename Graph_t>
+template <typename Graph_t>
 class HillClimbingScheduler : public ImprovementScheduler<Graph_t> {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     static_assert(is_computational_dag_v<Graph_t>, "Graph_t must satisfy the computational_dag concept");
 
     using vertex_idx = vertex_idx_t<Graph_t>;
     using cost_type = v_workw_t<Graph_t>;
 
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>, "HillClimbing requires work and comm. weights to have the same type.");
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "HillClimbing requires work and comm. weights to have the same type.");
 
   public:
     enum Direction { EARLIER = 0, AT, LATER };
+
     static const int NumDirections = 3;
 
     // aux structure for efficiently storing the changes incurred by a potential HC step
@@ -49,9 +50,8 @@ class HillClimbingScheduler : public ImprovementScheduler<Graph_t> {
     };
 
   private:
-
     BspSchedule<Graph_t> *schedule;
-    cost_type cost=0;
+    cost_type cost = 0;
 
     // Main parameters for runnign algorithm
     bool shrink = true;
@@ -60,18 +60,18 @@ class HillClimbingScheduler : public ImprovementScheduler<Graph_t> {
     // aux data structures
     std::vector<std::vector<std::list<vertex_idx>>> supsteplists;
     std::vector<std::vector<std::vector<bool>>> canMove;
-    std::vector<std::list<std::pair<vertex_idx, unsigned> > > moveOptions;
-    std::vector<std::vector<std::vector<typename std::list<std::pair<vertex_idx, unsigned> >::iterator>>> movePointer;
+    std::vector<std::list<std::pair<vertex_idx, unsigned>>> moveOptions;
+    std::vector<std::vector<std::vector<typename std::list<std::pair<vertex_idx, unsigned>>::iterator>>> movePointer;
     std::vector<std::vector<std::map<unsigned, unsigned>>> succSteps;
-    std::vector<std::vector<cost_type> > workCost, sent, received, commCost;
-    std::vector<std::set<std::pair<cost_type, unsigned> > > workCostList, commCostList;
-    std::vector<std::vector<typename std::set<std::pair<cost_type, unsigned> >::iterator> > workCostPointer, commCostPointer;
+    std::vector<std::vector<cost_type>> workCost, sent, received, commCost;
+    std::vector<std::set<std::pair<cost_type, unsigned>>> workCostList, commCostList;
+    std::vector<std::vector<typename std::set<std::pair<cost_type, unsigned>>::iterator>> workCostPointer, commCostPointer;
     std::vector<typename std::list<vertex_idx>::iterator> supStepListPointer;
-    std::pair<int, typename std::list<std::pair<vertex_idx, unsigned> >::iterator> nextMove;
+    std::pair<int, typename std::list<std::pair<vertex_idx, unsigned>>::iterator> nextMove;
     bool HCwithLatency = true;
 
     // for improved candidate selection
-    std::deque<std::tuple<vertex_idx, unsigned, int> > promisingMoves;
+    std::deque<std::tuple<vertex_idx, unsigned, int>> promisingMoves;
     bool findPromisingMoves = true;
 
     // Initialize data structures (based on current schedule)
@@ -117,28 +117,27 @@ class HillClimbingScheduler : public ImprovementScheduler<Graph_t> {
 
     virtual RETURN_STATUS improveSchedule(BspSchedule<Graph_t> &input_schedule) override;
 
-    //call with time/step limits
+    // call with time/step limits
     virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule<Graph_t> &input_schedule) override;
     virtual RETURN_STATUS improveScheduleWithStepLimit(BspSchedule<Graph_t> &input_schedule, const unsigned stepLimit = 10);
 
-    //setting parameters
-    void setSteepestAscend(bool steepestAscent_) {steepestAscent = steepestAscent_;}
-    void setShrink(bool shrink_) {shrink = shrink_;}
+    // setting parameters
+    void setSteepestAscend(bool steepestAscent_) { steepestAscent = steepestAscent_; }
+
+    void setShrink(bool shrink_) { shrink = shrink_; }
 
     virtual std::string getScheduleName() const override { return "HillClimbing"; }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 RETURN_STATUS HillClimbingScheduler<Graph_t>::improveSchedule(BspSchedule<Graph_t> &input_schedule) {
-
     ImprovementScheduler<Graph_t>::setTimeLimitSeconds(600U);
     return improveScheduleWithTimeLimit(input_schedule);
 }
 
 // Main method for hill climbing (with time limit)
-template<typename Graph_t>
+template <typename Graph_t>
 RETURN_STATUS HillClimbingScheduler<Graph_t>::improveScheduleWithTimeLimit(BspSchedule<Graph_t> &input_schedule) {
-
     schedule = &input_schedule;
 
     CreateSupstepLists();
@@ -146,7 +145,7 @@ RETURN_STATUS HillClimbingScheduler<Graph_t>::improveScheduleWithTimeLimit(BspSc
     const std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now();
 
     int counter = 0;
-    while (Improve())
+    while (Improve()) {
         if ((++counter) == 10) {
             counter = 0;
             std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
@@ -156,29 +155,31 @@ RETURN_STATUS HillClimbingScheduler<Graph_t>::improveScheduleWithTimeLimit(BspSc
                 break;
             }
         }
+    }
 
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 // Hill climbing with step limit (designed as an ingredient for multilevel algorithms, no safety checks)
-RETURN_STATUS HillClimbingScheduler<Graph_t>::improveScheduleWithStepLimit(BspSchedule<Graph_t> &input_schedule, const unsigned stepLimit) {
-
+RETURN_STATUS HillClimbingScheduler<Graph_t>::improveScheduleWithStepLimit(BspSchedule<Graph_t> &input_schedule,
+                                                                           const unsigned stepLimit) {
     schedule = &input_schedule;
 
     CreateSupstepLists();
     Init();
-    for (unsigned step = 0; step < stepLimit; ++step)
-        if (!Improve())
+    for (unsigned step = 0; step < stepLimit; ++step) {
+        if (!Improve()) {
             break;
+        }
+    }
 
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingScheduler<Graph_t>::Init() {
-    if(shrink)
-    {
+    if (shrink) {
         schedule->shrinkByMergingSupersteps();
         CreateSupstepLists();
     }
@@ -194,20 +195,23 @@ void HillClimbingScheduler<Graph_t>::Init() {
     moveOptions.clear();
     moveOptions.resize(NumDirections);
     movePointer.clear();
-    movePointer.resize(NumDirections, std::vector<std::vector<typename std::list<std::pair<vertex_idx, unsigned> >::iterator>>(
-                                          N, std::vector<typename std::list<std::pair<vertex_idx, unsigned> >::iterator>(P)));
+    movePointer.resize(NumDirections,
+                       std::vector<std::vector<typename std::list<std::pair<vertex_idx, unsigned>>::iterator>>(
+                           N, std::vector<typename std::list<std::pair<vertex_idx, unsigned>>::iterator>(P)));
 
     // Value use lists
     succSteps.clear();
     succSteps.resize(N, std::vector<std::map<unsigned, unsigned>>(P));
-    for (vertex_idx node = 0; node < N; ++node)
+    for (vertex_idx node = 0; node < N; ++node) {
         for (const vertex_idx &succ : G.children(node)) {
-            if (succSteps[node][schedule->assignedProcessor(succ)].find(schedule->assignedSuperstep(succ)) ==
-                succSteps[node][schedule->assignedProcessor(succ)].end())
+            if (succSteps[node][schedule->assignedProcessor(succ)].find(schedule->assignedSuperstep(succ))
+                == succSteps[node][schedule->assignedProcessor(succ)].end()) {
                 succSteps[node][schedule->assignedProcessor(succ)].insert({schedule->assignedSuperstep(succ), 1U});
-            else
+            } else {
                 succSteps[node][schedule->assignedProcessor(succ)].at(schedule->assignedSuperstep(succ)) += 1;
+            }
         }
+    }
 
     // Cost data
     workCost.clear();
@@ -224,21 +228,25 @@ void HillClimbingScheduler<Graph_t>::Init() {
     commCostList.clear();
     commCostList.resize(M - 1);
     workCostPointer.clear();
-    workCostPointer.resize(M, std::vector<typename std::set<std::pair<cost_type, unsigned> >::iterator>(P));
+    workCostPointer.resize(M, std::vector<typename std::set<std::pair<cost_type, unsigned>>::iterator>(P));
     commCostPointer.clear();
-    commCostPointer.resize(M - 1, std::vector<typename std::set<std::pair<cost_type, unsigned> >::iterator>(P));
+    commCostPointer.resize(M - 1, std::vector<typename std::set<std::pair<cost_type, unsigned>>::iterator>(P));
 
     // Supstep std::list pointers
     supStepListPointer.clear();
     supStepListPointer.resize(N);
-    for (unsigned step = 0; step < M; ++step)
-        for (unsigned proc = 0; proc < P; ++proc)
-            for (auto it = supsteplists[step][proc].begin(); it != supsteplists[step][proc].end(); ++it)
+    for (unsigned step = 0; step < M; ++step) {
+        for (unsigned proc = 0; proc < P; ++proc) {
+            for (auto it = supsteplists[step][proc].begin(); it != supsteplists[step][proc].end(); ++it) {
                 supStepListPointer[*it] = it;
+            }
+        }
+    }
 
     // Compute movement options
-    for (vertex_idx node = 0; node < N; ++node)
+    for (vertex_idx node = 0; node < N; ++node) {
         updateNodeMoves(node);
+    }
 
     nextMove.first = 0;
     nextMove.second = moveOptions[0].begin();
@@ -247,8 +255,9 @@ void HillClimbingScheduler<Graph_t>::Init() {
     std::vector<cost_type> work_cost(M, 0);
     for (unsigned step = 0; step < M; ++step) {
         for (unsigned proc = 0; proc < P; ++proc) {
-            for (const vertex_idx node : supsteplists[step][proc])
+            for (const vertex_idx node : supsteplists[step][proc]) {
                 workCost[step][proc] += schedule->getInstance().getComputationalDag().vertex_work_weight(node);
+            }
 
             std::pair<cost_type, unsigned> entry(workCost[step][proc], proc);
             workCostPointer[step][proc] = workCostList[step].insert(entry).first;
@@ -259,16 +268,24 @@ void HillClimbingScheduler<Graph_t>::Init() {
     cost = work_cost[0];
     std::vector<std::vector<bool>> present(N, std::vector<bool>(P, false));
     for (unsigned step = 0; step < M - schedule->getStaleness(); ++step) {
-        for (unsigned proc = 0; proc < P; ++proc)
-            for (const vertex_idx node : supsteplists[step + schedule->getStaleness()][proc])
-                for (const vertex_idx &pred : G.parents(node))
-                    if (schedule->assignedProcessor(node) != schedule->assignedProcessor(pred) && !present[pred][schedule->assignedProcessor(node)]) {
+        for (unsigned proc = 0; proc < P; ++proc) {
+            for (const vertex_idx node : supsteplists[step + schedule->getStaleness()][proc]) {
+                for (const vertex_idx &pred : G.parents(node)) {
+                    if (schedule->assignedProcessor(node) != schedule->assignedProcessor(pred)
+                        && !present[pred][schedule->assignedProcessor(node)]) {
                         present[pred][schedule->assignedProcessor(node)] = true;
-                        sent[step][schedule->assignedProcessor(pred)] +=
-                            schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), schedule->assignedProcessor(node));
-                        received[step][schedule->assignedProcessor(node)] +=
-                            schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), schedule->assignedProcessor(node));
+                        sent[step][schedule->assignedProcessor(pred)]
+                            += schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                               * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred),
+                                                                                     schedule->assignedProcessor(node));
+                        received[step][schedule->assignedProcessor(node)]
+                            += schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                               * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred),
+                                                                                     schedule->assignedProcessor(node));
                     }
+                }
+            }
+        }
     }
 
     for (unsigned step = 0; step < M - 1; ++step) {
@@ -280,175 +297,212 @@ void HillClimbingScheduler<Graph_t>::Init() {
         cost_type comm_cost = schedule->getInstance().getArchitecture().communicationCosts() * commCostList[step].rbegin()->first;
         cost_type sync_cost = (comm_cost > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0;
 
-        if(schedule->getStaleness() == 1)
-            cost += comm_cost + work_cost[step+1] + sync_cost;
-        else
-            cost += std::max(comm_cost, work_cost[step+1]) + sync_cost;
+        if (schedule->getStaleness() == 1) {
+            cost += comm_cost + work_cost[step + 1] + sync_cost;
+        } else {
+            cost += std::max(comm_cost, work_cost[step + 1]) + sync_cost;
+        }
     }
 
     updatePromisingMoves();
 
     // memory_constraints
-    if(use_memory_constraint)
-    {
+    if (use_memory_constraint) {
         memory_used.clear();
         memory_used.resize(P, std::vector<v_memw_t<Graph_t>>(M, 0));
-        for (vertex_idx node = 0; node < N; ++node)
-            memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)] += schedule->getInstance().getComputationalDag().vertex_mem_weight(node);
+        for (vertex_idx node = 0; node < N; ++node) {
+            memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)]
+                += schedule->getInstance().getComputationalDag().vertex_mem_weight(node);
+        }
     }
-
 }
 
-template<typename Graph_t>
-void HillClimbingScheduler<Graph_t>::updatePromisingMoves()
-{
-    if(!findPromisingMoves)
+template <typename Graph_t>
+void HillClimbingScheduler<Graph_t>::updatePromisingMoves() {
+    if (!findPromisingMoves) {
         return;
+    }
 
     const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors();
     const Graph_t &G = schedule->getInstance().getComputationalDag();
 
     promisingMoves.clear();
-    for(vertex_idx node=0; node < schedule->getInstance().getComputationalDag().num_vertices(); ++node)
-    {
+    for (vertex_idx node = 0; node < schedule->getInstance().getComputationalDag().num_vertices(); ++node) {
         std::vector<unsigned> nrPredOnProc(P, 0);
-        for(const vertex_idx &pred : G.parents(node))
+        for (const vertex_idx &pred : G.parents(node)) {
             ++nrPredOnProc[schedule->assignedProcessor(pred)];
+        }
 
         unsigned otherProcUsed = 0;
-        for(unsigned proc=0; proc<P; ++proc)
-            if(schedule->assignedProcessor(node)!=proc && nrPredOnProc[proc]>0)
+        for (unsigned proc = 0; proc < P; ++proc) {
+            if (schedule->assignedProcessor(node) != proc && nrPredOnProc[proc] > 0) {
                 ++otherProcUsed;
+            }
+        }
 
-        if(otherProcUsed==1)
-            for(unsigned proc=0; proc<P; ++proc)
-                if(schedule->assignedProcessor(node)!=proc && nrPredOnProc[proc]>0 && schedule->getInstance().isCompatible(node,proc))
-                {
+        if (otherProcUsed == 1) {
+            for (unsigned proc = 0; proc < P; ++proc) {
+                if (schedule->assignedProcessor(node) != proc && nrPredOnProc[proc] > 0
+                    && schedule->getInstance().isCompatible(node, proc)) {
                     promisingMoves.push_back(std::make_tuple(node, proc, EARLIER));
                     promisingMoves.push_back(std::make_tuple(node, proc, AT));
                     promisingMoves.push_back(std::make_tuple(node, proc, LATER));
                 }
+            }
+        }
 
         std::vector<unsigned> nrSuccOnProc(P, 0);
-        for(const vertex_idx &succ : G.children(node))
+        for (const vertex_idx &succ : G.children(node)) {
             ++nrSuccOnProc[schedule->assignedProcessor(succ)];
+        }
 
         otherProcUsed = 0;
-        for(unsigned proc=0; proc<P; ++proc)
-            if(schedule->assignedProcessor(node)!=proc && nrSuccOnProc[proc]>0)
+        for (unsigned proc = 0; proc < P; ++proc) {
+            if (schedule->assignedProcessor(node) != proc && nrSuccOnProc[proc] > 0) {
                 ++otherProcUsed;
+            }
+        }
 
-        if(otherProcUsed==1)
-            for(unsigned proc=0; proc<P; ++proc)
-                if(schedule->assignedProcessor(node)!=proc && nrSuccOnProc[proc]>0 && schedule->getInstance().isCompatible(node,proc))
-                {
+        if (otherProcUsed == 1) {
+            for (unsigned proc = 0; proc < P; ++proc) {
+                if (schedule->assignedProcessor(node) != proc && nrSuccOnProc[proc] > 0
+                    && schedule->getInstance().isCompatible(node, proc)) {
                     promisingMoves.push_back(std::make_tuple(node, proc, EARLIER));
                     promisingMoves.push_back(std::make_tuple(node, proc, AT));
                     promisingMoves.push_back(std::make_tuple(node, proc, LATER));
                 }
+            }
         }
+    }
 
-    for(unsigned step=0; step < schedule->numberOfSupersteps(); ++step)
-    {
+    for (unsigned step = 0; step < schedule->numberOfSupersteps(); ++step) {
         std::list<unsigned> minProcs, maxProcs;
-        cost_type minWork=std::numeric_limits<cost_type>::max(), maxWork=std::numeric_limits<cost_type>::min();
-        for(unsigned proc=0; proc<P; ++proc)
-        {
-            if(workCost[step][proc]> maxWork)
-                maxWork=workCost[step][proc];
-            if(workCost[step][proc]< minWork)
-                minWork=workCost[step][proc];
-        }
-        for(unsigned proc=0; proc<P; ++proc)
-        {
-            if(workCost[step][proc]==minWork)
+        cost_type minWork = std::numeric_limits<cost_type>::max(), maxWork = std::numeric_limits<cost_type>::min();
+        for (unsigned proc = 0; proc < P; ++proc) {
+            if (workCost[step][proc] > maxWork) {
+                maxWork = workCost[step][proc];
+            }
+            if (workCost[step][proc] < minWork) {
+                minWork = workCost[step][proc];
+            }
+        }
+        for (unsigned proc = 0; proc < P; ++proc) {
+            if (workCost[step][proc] == minWork) {
                 minProcs.push_back(proc);
-            if(workCost[step][proc]==maxWork)
+            }
+            if (workCost[step][proc] == maxWork) {
                 maxProcs.push_back(proc);
+            }
+        }
+        for (unsigned to : minProcs) {
+            for (unsigned from : maxProcs) {
+                for (vertex_idx node : supsteplists[step][from]) {
+                    if (schedule->getInstance().isCompatible(node, to)) {
+                        promisingMoves.push_back(std::make_tuple(node, to, AT));
+                    }
+                }
+            }
         }
-        for(unsigned to: minProcs)
-            for(unsigned from: maxProcs)
-                for(vertex_idx node : supsteplists[step][from])
-                    if(schedule->getInstance().isCompatible(node, to))
-                        promisingMoves.push_back(std::make_tuple(node,to, AT));
     }
 }
 
 // Functions to compute and update the std::list of possible moves
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingScheduler<Graph_t>::updateNodeMovesEarlier(const vertex_idx node) {
-    if (schedule->assignedSuperstep(node) == 0)
+    if (schedule->assignedSuperstep(node) == 0) {
         return;
+    }
 
     std::set<unsigned> predProc;
     for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) {
-        if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node))
+        if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node)) {
             return;
-        if (static_cast<int>(schedule->assignedSuperstep(pred)) >= static_cast<int>(schedule->assignedSuperstep(node)) - static_cast<int>(schedule->getStaleness()))
+        }
+        if (static_cast<int>(schedule->assignedSuperstep(pred))
+            >= static_cast<int>(schedule->assignedSuperstep(node)) - static_cast<int>(schedule->getStaleness())) {
             predProc.insert(schedule->assignedProcessor(pred));
+        }
     }
-    if(schedule->getStaleness() == 2)
-    {
-        for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node))
-            if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node))
+    if (schedule->getStaleness() == 2) {
+        for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) {
+            if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node)) {
                 predProc.insert(schedule->assignedProcessor(succ));
+            }
+        }
     }
 
-    if (predProc.size() > 1)
+    if (predProc.size() > 1) {
         return;
+    }
 
-    if (predProc.size() == 1)
+    if (predProc.size() == 1) {
         addMoveOption(node, *predProc.begin(), EARLIER);
-    else
-        for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc)
+    } else {
+        for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) {
             addMoveOption(node, proc, EARLIER);
+        }
+    }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingScheduler<Graph_t>::updateNodeMovesAt(const vertex_idx node) {
-    for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node))
-        if (static_cast<int>(schedule->assignedSuperstep(pred)) >= static_cast<int>(schedule->assignedSuperstep(node)) - static_cast<int>(schedule->getStaleness()) + 1)
+    for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) {
+        if (static_cast<int>(schedule->assignedSuperstep(pred))
+            >= static_cast<int>(schedule->assignedSuperstep(node)) - static_cast<int>(schedule->getStaleness()) + 1) {
             return;
+        }
+    }
 
-    for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node))
-        if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness() - 1)
+    for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) {
+        if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness() - 1) {
             return;
+        }
+    }
 
-    for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc)
-        if (proc != schedule->assignedProcessor(node))
+    for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) {
+        if (proc != schedule->assignedProcessor(node)) {
             addMoveOption(node, proc, AT);
+        }
+    }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingScheduler<Graph_t>::updateNodeMovesLater(const vertex_idx node) {
-    if (schedule->assignedSuperstep(node) == schedule->numberOfSupersteps() - 1)
+    if (schedule->assignedSuperstep(node) == schedule->numberOfSupersteps() - 1) {
         return;
+    }
 
     std::set<unsigned> succProc;
     for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) {
-        if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node))
+        if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node)) {
             return;
-        if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness())
+        }
+        if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness()) {
             succProc.insert(schedule->assignedProcessor(succ));
+        }
     }
-    if(schedule->getStaleness() == 2)
-    {
-        for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node))
-            if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node))
+    if (schedule->getStaleness() == 2) {
+        for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) {
+            if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node)) {
                 succProc.insert(schedule->assignedProcessor(pred));
+            }
+        }
     }
 
-    if (succProc.size() > 1)
+    if (succProc.size() > 1) {
         return;
+    }
 
-    if (succProc.size() == 1)
+    if (succProc.size() == 1) {
         addMoveOption(node, *succProc.begin(), LATER);
-    else
-        for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc)
+    } else {
+        for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) {
             addMoveOption(node, proc, LATER);
+        }
+    }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingScheduler<Graph_t>::updateNodeMoves(const vertex_idx node) {
     eraseMoveOptions(node);
     updateNodeMovesEarlier(node);
@@ -456,70 +510,56 @@ void HillClimbingScheduler<Graph_t>::updateNodeMoves(const vertex_idx node) {
     updateNodeMovesLater(node);
 }
 
-template<typename Graph_t>
-void HillClimbingScheduler<Graph_t>::updateMoveOptions(vertex_idx node, int where)
-{
+template <typename Graph_t>
+void HillClimbingScheduler<Graph_t>::updateMoveOptions(vertex_idx node, int where) {
     const Graph_t &G = schedule->getInstance().getComputationalDag();
 
     updateNodeMoves(node);
-    if(where==0)
-    {
-        for(const vertex_idx &pred : G.parents(node))
-        {
+    if (where == 0) {
+        for (const vertex_idx &pred : G.parents(node)) {
             eraseMoveOptionsLater(pred);
             updateNodeMovesLater(pred);
         }
-        for(const vertex_idx &succ : G.children(node))
-        {
+        for (const vertex_idx &succ : G.children(node)) {
             eraseMoveOptionsEarlier(succ);
             updateNodeMovesEarlier(succ);
         }
     }
-    if(where==-1)
-    {
-        for(const vertex_idx &pred : G.parents(node))
-        {
+    if (where == -1) {
+        for (const vertex_idx &pred : G.parents(node)) {
             eraseMoveOptionsLater(pred);
             updateNodeMovesLater(pred);
             eraseMoveOptionsAt(pred);
             updateNodeMovesAt(pred);
-            if(schedule->getStaleness() == 2)
-            {
+            if (schedule->getStaleness() == 2) {
                 eraseMoveOptionsEarlier(pred);
                 updateNodeMovesEarlier(pred);
             }
         }
-        for(const vertex_idx &succ : G.children(node))
-        {
+        for (const vertex_idx &succ : G.children(node)) {
             eraseMoveOptionsEarlier(succ);
             updateNodeMovesEarlier(succ);
-            if(schedule->getStaleness() == 2)
-            {
+            if (schedule->getStaleness() == 2) {
                 eraseMoveOptionsAt(succ);
                 updateNodeMovesAt(succ);
             }
         }
     }
-    if(where==1)
-    {
-        for(const vertex_idx &pred : G.parents(node))
-        {
+    if (where == 1) {
+        for (const vertex_idx &pred : G.parents(node)) {
             eraseMoveOptionsLater(pred);
             updateNodeMovesLater(pred);
-            if(schedule->getStaleness() == 2)
-            {
+            if (schedule->getStaleness() == 2) {
                 eraseMoveOptionsAt(pred);
                 updateNodeMovesAt(pred);
             }
         }
-        for(const vertex_idx &succ : G.children(node))
-        {
+        for (const vertex_idx &succ : G.children(node)) {
             eraseMoveOptionsEarlier(succ);
             updateNodeMovesEarlier(succ);
             eraseMoveOptionsAt(succ);
             updateNodeMovesAt(succ);
-            if(schedule->getStaleness() == 2)
-            {
+            if (schedule->getStaleness() == 2) {
                 eraseMoveOptionsLater(succ);
                 updateNodeMovesLater(succ);
             }
@@ -527,7 +567,7 @@ void HillClimbingScheduler<Graph_t>::updateMoveOptions(vertex_idx node, int wher
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingScheduler<Graph_t>::addMoveOption(const vertex_idx node, const unsigned p, const Direction dir) {
     if (!canMove[dir][node][p] && schedule->getInstance().isCompatible(node, p)) {
         canMove[dir][node][p] = true;
@@ -536,49 +576,51 @@ void HillClimbingScheduler<Graph_t>::addMoveOption(const vertex_idx node, const
     }
 }
 
-template<typename Graph_t>
-void HillClimbingScheduler<Graph_t>::eraseMoveOption(vertex_idx node, unsigned p, Direction dir)
-{
+template <typename Graph_t>
+void HillClimbingScheduler<Graph_t>::eraseMoveOption(vertex_idx node, unsigned p, Direction dir) {
     canMove[dir][node][p] = false;
-    if(nextMove.first == dir && nextMove.second->first == node && nextMove.second->second == p)
+    if (nextMove.first == dir && nextMove.second->first == node && nextMove.second->second == p) {
         ++nextMove.second;
+    }
     moveOptions[dir].erase(movePointer[dir][node][p]);
 }
 
-template<typename Graph_t>
-void HillClimbingScheduler<Graph_t>::eraseMoveOptionsEarlier(vertex_idx node)
-{
-    for(unsigned proc=0; proc<schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc)
-        if(canMove[EARLIER][node][proc])
+template <typename Graph_t>
+void HillClimbingScheduler<Graph_t>::eraseMoveOptionsEarlier(vertex_idx node) {
+    for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) {
+        if (canMove[EARLIER][node][proc]) {
             eraseMoveOption(node, proc, EARLIER);
+        }
+    }
 }
 
-template<typename Graph_t>
-void HillClimbingScheduler<Graph_t>::eraseMoveOptionsAt(vertex_idx node)
-{
-    for(unsigned proc=0; proc<schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc)
-        if(canMove[AT][node][proc])
+template <typename Graph_t>
+void HillClimbingScheduler<Graph_t>::eraseMoveOptionsAt(vertex_idx node) {
+    for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) {
+        if (canMove[AT][node][proc]) {
             eraseMoveOption(node, proc, AT);
+        }
+    }
 }
 
-template<typename Graph_t>
-void HillClimbingScheduler<Graph_t>::eraseMoveOptionsLater(vertex_idx node)
-{
-    for(unsigned proc=0; proc<schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc)
-        if(canMove[LATER][node][proc])
+template <typename Graph_t>
+void HillClimbingScheduler<Graph_t>::eraseMoveOptionsLater(vertex_idx node) {
+    for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) {
+        if (canMove[LATER][node][proc]) {
             eraseMoveOption(node, proc, LATER);
+        }
+    }
 }
 
-template<typename Graph_t>
-void HillClimbingScheduler<Graph_t>::eraseMoveOptions(vertex_idx node)
-{
+template <typename Graph_t>
+void HillClimbingScheduler<Graph_t>::eraseMoveOptions(vertex_idx node) {
     eraseMoveOptionsEarlier(node);
     eraseMoveOptionsAt(node);
     eraseMoveOptionsLater(node);
 }
 
 // Compute the cost change incurred by a potential move
-template<typename Graph_t>
+template <typename Graph_t>
 int HillClimbingScheduler<Graph_t>::moveCostChange(const vertex_idx node, unsigned p, const int where, stepAuxData &changing) {
     const unsigned step = schedule->assignedSuperstep(node);
     const unsigned new_step = static_cast<unsigned>(static_cast<int>(step) + where);
@@ -596,116 +638,174 @@ int HillClimbingScheduler<Graph_t>::moveCostChange(const vertex_idx node, unsign
     if (itBest->second == oldProc) {
         auto itNext = itBest;
         --itNext;
-        maxAfterRemoval = std::max(itBest->first - schedule->getInstance().getComputationalDag().vertex_work_weight(node), itNext->first);
-        if(itBest->first != maxAfterRemoval)
-        {
-            if(step == 0 || schedule->getStaleness() == 1) // incorporate immediately into cost change
+        maxAfterRemoval
+            = std::max(itBest->first - schedule->getInstance().getComputationalDag().vertex_work_weight(node), itNext->first);
+        if (itBest->first != maxAfterRemoval) {
+            if (step == 0 || schedule->getStaleness() == 1) {    // incorporate immediately into cost change
                 change -= static_cast<int>(itBest->first) - static_cast<int>(maxAfterRemoval);
-            else
-            {
+            } else {
                 newWorkCost[step] = maxAfterRemoval;
-                affectedSteps.insert(step-1);
+                affectedSteps.insert(step - 1);
             }
         }
     }
 
     const cost_type maxBeforeAddition = (where == 0) ? maxAfterRemoval : workCostList[new_step].rbegin()->first;
-    if (workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node) > maxBeforeAddition)
-    {
-        if(new_step == 0 || schedule->getStaleness() == 1) // incorporate immediately into cost change
-            change += static_cast<int>(workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node)) - static_cast<int>(maxBeforeAddition);
-        else
-        {
+    if (workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node) > maxBeforeAddition) {
+        if (new_step == 0 || schedule->getStaleness() == 1) {    // incorporate immediately into cost change
+            change
+                += static_cast<int>(workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node))
+                   - static_cast<int>(maxBeforeAddition);
+        } else {
             newWorkCost[new_step] = workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node);
-            affectedSteps.insert(new_step-1);
+            affectedSteps.insert(new_step - 1);
         }
     }
 
     // Comm cost change
-    std::list<std::tuple<unsigned, unsigned, int> > sentInc, recInc;
+    std::list<std::tuple<unsigned, unsigned, int>> sentInc, recInc;
     //  -outputs
     if (p != oldProc) {
         for (unsigned j = 0; j < schedule->getInstance().getArchitecture().numberOfProcessors(); ++j) {
-            if (succSteps[node][j].empty())
+            if (succSteps[node][j].empty()) {
                 continue;
+            }
 
             unsigned affectedStep = succSteps[node][j].begin()->first - schedule->getStaleness();
             if (j == p) {
-                sentInc.emplace_back(affectedStep, oldProc,
-                                     -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(oldProc, j)));
-                recInc.emplace_back(affectedStep, p, -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(oldProc, j)));
+                sentInc.emplace_back(affectedStep,
+                                     oldProc,
+                                     -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                                       * schedule->getInstance().getArchitecture().sendCosts(oldProc, j)));
+                recInc.emplace_back(affectedStep,
+                                    p,
+                                    -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                                      * schedule->getInstance().getArchitecture().sendCosts(oldProc, j)));
             } else if (j == oldProc) {
-                recInc.emplace_back(affectedStep, oldProc, static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(p, j)));
-                sentInc.emplace_back(affectedStep, p, static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(p, j)));
+                recInc.emplace_back(affectedStep,
+                                    oldProc,
+                                    static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                                     * schedule->getInstance().getArchitecture().sendCosts(p, j)));
+                sentInc.emplace_back(affectedStep,
+                                     p,
+                                     static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                                      * schedule->getInstance().getArchitecture().sendCosts(p, j)));
             } else {
-                sentInc.emplace_back(affectedStep, oldProc,
-                                     -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(oldProc, j)));
-                recInc.emplace_back(affectedStep, j, -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(oldProc, j)));
-                sentInc.emplace_back(affectedStep, p, static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(p, j)));
-                recInc.emplace_back(affectedStep, j, static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(p, j)));
+                sentInc.emplace_back(affectedStep,
+                                     oldProc,
+                                     -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                                       * schedule->getInstance().getArchitecture().sendCosts(oldProc, j)));
+                recInc.emplace_back(affectedStep,
+                                    j,
+                                    -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                                      * schedule->getInstance().getArchitecture().sendCosts(oldProc, j)));
+                sentInc.emplace_back(affectedStep,
+                                     p,
+                                     static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                                      * schedule->getInstance().getArchitecture().sendCosts(p, j)));
+                recInc.emplace_back(affectedStep,
+                                    j,
+                                    static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                                     * schedule->getInstance().getArchitecture().sendCosts(p, j)));
             }
         }
     }
 
     //  -inputs
-    if (p == oldProc)
+    if (p == oldProc) {
         for (const vertex_idx &pred : G.parents(node)) {
-            if (schedule->assignedProcessor(pred) == p)
+            if (schedule->assignedProcessor(pred) == p) {
                 continue;
+            }
 
             const auto firstUse = *succSteps[pred][p].begin();
             const bool skip = firstUse.first < step || (firstUse.first == step && where >= 0 && firstUse.second > 1);
             if (!skip) {
-                sentInc.emplace_back(step - schedule->getStaleness(), schedule->assignedProcessor(pred),
-                                     -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
-                recInc.emplace_back(step - schedule->getStaleness(), p,
-                                    -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
-                sentInc.emplace_back(new_step - schedule->getStaleness(), schedule->assignedProcessor(pred),
-                                     static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
-                recInc.emplace_back(new_step - schedule->getStaleness(), p,
-                                    static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
+                sentInc.emplace_back(step - schedule->getStaleness(),
+                                     schedule->assignedProcessor(pred),
+                                     -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                                       * schedule->getInstance().getArchitecture().sendCosts(
+                                                           schedule->assignedProcessor(pred), p)));
+                recInc.emplace_back(step - schedule->getStaleness(),
+                                    p,
+                                    -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                                      * schedule->getInstance().getArchitecture().sendCosts(
+                                                          schedule->assignedProcessor(pred), p)));
+                sentInc.emplace_back(
+                    new_step - schedule->getStaleness(),
+                    schedule->assignedProcessor(pred),
+                    static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                     * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
+                recInc.emplace_back(
+                    new_step - schedule->getStaleness(),
+                    p,
+                    static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                     * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
             }
         }
-    else
+    } else {
         for (const vertex_idx &pred : G.parents(node)) {
             // Comm. cost of sending pred to oldProc
             auto firstUse = succSteps[pred][oldProc].begin();
-            bool skip = (schedule->assignedProcessor(pred) == oldProc) || firstUse->first < step ||
-                        (firstUse->first == step && firstUse->second > 1);
+            bool skip = (schedule->assignedProcessor(pred) == oldProc) || firstUse->first < step
+                        || (firstUse->first == step && firstUse->second > 1);
             if (!skip) {
-                sentInc.emplace_back(step - schedule->getStaleness(), schedule->assignedProcessor(pred),
-                                     -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), oldProc)));
-                recInc.emplace_back(step - schedule->getStaleness(), oldProc,
-                                    -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), oldProc)));
+                sentInc.emplace_back(step - schedule->getStaleness(),
+                                     schedule->assignedProcessor(pred),
+                                     -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                                       * schedule->getInstance().getArchitecture().sendCosts(
+                                                           schedule->assignedProcessor(pred), oldProc)));
+                recInc.emplace_back(step - schedule->getStaleness(),
+                                    oldProc,
+                                    -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                                      * schedule->getInstance().getArchitecture().sendCosts(
+                                                          schedule->assignedProcessor(pred), oldProc)));
                 ++firstUse;
                 if (firstUse != succSteps[pred][oldProc].end()) {
                     const unsigned nextStep = firstUse->first;
-                    sentInc.emplace_back(nextStep - schedule->getStaleness(), schedule->assignedProcessor(pred),
-                                         static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) *
-                                             schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), oldProc)));
-                    recInc.emplace_back(nextStep - schedule->getStaleness(), oldProc,
-                                        static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) *
-                                            schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), oldProc)));
+                    sentInc.emplace_back(nextStep - schedule->getStaleness(),
+                                         schedule->assignedProcessor(pred),
+                                         static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                                          * schedule->getInstance().getArchitecture().sendCosts(
+                                                              schedule->assignedProcessor(pred), oldProc)));
+                    recInc.emplace_back(nextStep - schedule->getStaleness(),
+                                        oldProc,
+                                        static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                                         * schedule->getInstance().getArchitecture().sendCosts(
+                                                             schedule->assignedProcessor(pred), oldProc)));
                 }
             }
 
             // Comm. cost of sending pred to p
             firstUse = succSteps[pred][p].begin();
-            skip = (schedule->assignedProcessor(pred) == p) ||
-                   ((firstUse != succSteps[pred][p].end()) && (firstUse->first <= new_step));
+            skip = (schedule->assignedProcessor(pred) == p)
+                   || ((firstUse != succSteps[pred][p].end()) && (firstUse->first <= new_step));
             if (!skip) {
-                sentInc.emplace_back(new_step - schedule->getStaleness(), schedule->assignedProcessor(pred),
-                                     static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
-                recInc.emplace_back(new_step - schedule->getStaleness(), p,
-                                    static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
+                sentInc.emplace_back(
+                    new_step - schedule->getStaleness(),
+                    schedule->assignedProcessor(pred),
+                    static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                     * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
+                recInc.emplace_back(
+                    new_step - schedule->getStaleness(),
+                    p,
+                    static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                     * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
                 if (firstUse != succSteps[pred][p].end()) {
-                    sentInc.emplace_back(firstUse->first - schedule->getStaleness(), schedule->assignedProcessor(pred),
-                                         -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
-                    recInc.emplace_back(firstUse->first - schedule->getStaleness(), p,
-                                        -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p)));
+                    sentInc.emplace_back(firstUse->first - schedule->getStaleness(),
+                                         schedule->assignedProcessor(pred),
+                                         -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                                           * schedule->getInstance().getArchitecture().sendCosts(
+                                                               schedule->assignedProcessor(pred), p)));
+                    recInc.emplace_back(firstUse->first - schedule->getStaleness(),
+                                        p,
+                                        -static_cast<int>(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred)
+                                                          * schedule->getInstance().getArchitecture().sendCosts(
+                                                              schedule->assignedProcessor(pred), p)));
                 }
             }
         }
+    }
 
     //  -process changes
     changing.sentChange.clear();
@@ -716,10 +816,11 @@ int HillClimbingScheduler<Graph_t>::moveCostChange(const vertex_idx node, unsign
         const int e_increase = std::get<2>(entry);
         affectedSteps.insert(e_step);
         auto itr = changing.sentChange.find(std::make_pair(e_step, e_proc));
-        if (itr == changing.sentChange.end())
+        if (itr == changing.sentChange.end()) {
             changing.sentChange.insert({std::make_pair(e_step, e_proc), e_increase});
-        else
+        } else {
             itr->second += e_increase;
+        }
     }
     for (auto entry : recInc) {
         const unsigned e_step = std::get<0>(entry);
@@ -727,10 +828,11 @@ int HillClimbingScheduler<Graph_t>::moveCostChange(const vertex_idx node, unsign
         const int e_increase = std::get<2>(entry);
         affectedSteps.insert(e_step);
         auto itr = changing.recChange.find(std::make_pair(e_step, e_proc));
-        if (itr == changing.recChange.end())
+        if (itr == changing.recChange.end()) {
             changing.recChange.insert({std::make_pair(e_step, e_proc), e_increase});
-        else
+        } else {
             itr->second += e_increase;
+        }
     }
 
     auto itrSent = changing.sentChange.begin(), itrRec = changing.recChange.begin();
@@ -744,38 +846,40 @@ int HillClimbingScheduler<Graph_t>::moveCostChange(const vertex_idx node, unsign
             int diff = (itrSent != changing.sentChange.end() && itrSent->first.first == sstep && itrSent->first.second == j)
                            ? (itrSent++)->second
                            : 0;
-            if (static_cast<int>(sent[sstep][j]) + diff > static_cast<int>(newMax))
+            if (static_cast<int>(sent[sstep][j]) + diff > static_cast<int>(newMax)) {
                 newMax = static_cast<cost_type>(static_cast<int>(sent[sstep][j]) + diff);
+            }
             diff = (itrRec != changing.recChange.end() && itrRec->first.first == sstep && itrRec->first.second == j)
                        ? (itrRec++)->second
                        : 0;
-            if (static_cast<int>(received[sstep][j]) + diff > static_cast<int>(newMax))
+            if (static_cast<int>(received[sstep][j]) + diff > static_cast<int>(newMax)) {
                 newMax = static_cast<cost_type>(static_cast<int>(received[sstep][j]) + diff);
+            }
         }
         newMax *= schedule->getInstance().getArchitecture().communicationCosts();
         cost_type newSync = (HCwithLatency && newMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0;
 
-        if(newMax == 0)
-        {
-            if(schedule->getStaleness() == 1)
+        if (newMax == 0) {
+            if (schedule->getStaleness() == 1) {
                 changing.canShrink = true;
-            else
-            {
-                if( (sstep > 0 && affectedSteps.find(sstep-1) == affectedSteps.end() && commCostList[sstep-1].rbegin()->first == 0) ||
-                    (sstep < commCostList.size()-1 && affectedSteps.find(sstep+1) == affectedSteps.end() && commCostList[sstep+1].rbegin()->first == 0) ||
-                    (sstep > 0 && affectedSteps.find(sstep-1) != affectedSteps.end() && last_affected_empty) )
+            } else {
+                if ((sstep > 0 && affectedSteps.find(sstep - 1) == affectedSteps.end()
+                     && commCostList[sstep - 1].rbegin()->first == 0)
+                    || (sstep < commCostList.size() - 1 && affectedSteps.find(sstep + 1) == affectedSteps.end()
+                        && commCostList[sstep + 1].rbegin()->first == 0)
+                    || (sstep > 0 && affectedSteps.find(sstep - 1) != affectedSteps.end() && last_affected_empty)) {
                     changing.canShrink = true;
+                }
             }
             last_affected_empty = true;
-        }
-        else
+        } else {
             last_affected_empty = false;
+        }
 
-        if(schedule->getStaleness() == 2)
-        {
-            auto itrWork = newWorkCost.find(sstep+1);
-            oldMax = std::max(oldMax, workCostList[sstep+1].rbegin()->first);
-            newMax = std::max(newMax, itrWork != newWorkCost.end() ? itrWork->second : workCostList[sstep+1].rbegin()->first);
+        if (schedule->getStaleness() == 2) {
+            auto itrWork = newWorkCost.find(sstep + 1);
+            oldMax = std::max(oldMax, workCostList[sstep + 1].rbegin()->first);
+            newMax = std::max(newMax, itrWork != newWorkCost.end() ? itrWork->second : workCostList[sstep + 1].rbegin()->first);
         }
         change += static_cast<int>(newMax + newSync) - static_cast<int>(oldMax + oldSync);
     }
@@ -785,8 +889,11 @@ int HillClimbingScheduler<Graph_t>::moveCostChange(const vertex_idx node, unsign
 }
 
 // Execute a chosen move, updating the schedule and the data structures
-template<typename Graph_t>
-void HillClimbingScheduler<Graph_t>::executeMove(const vertex_idx node, const unsigned newProc, const int where, const stepAuxData &changing) {
+template <typename Graph_t>
+void HillClimbingScheduler<Graph_t>::executeMove(const vertex_idx node,
+                                                 const unsigned newProc,
+                                                 const int where,
+                                                 const stepAuxData &changing) {
     unsigned oldStep = schedule->assignedSuperstep(node);
     unsigned newStep = static_cast<unsigned>(static_cast<int>(oldStep) + where);
     const unsigned oldProc = schedule->assignedProcessor(node);
@@ -795,55 +902,63 @@ void HillClimbingScheduler<Graph_t>::executeMove(const vertex_idx node, const un
     // Work cost change
     workCostList[oldStep].erase(workCostPointer[oldStep][oldProc]);
     workCost[oldStep][oldProc] -= schedule->getInstance().getComputationalDag().vertex_work_weight(node);
-    workCostPointer[oldStep][oldProc] =
-        workCostList[oldStep].insert(std::make_pair(workCost[oldStep][oldProc], oldProc)).first;
+    workCostPointer[oldStep][oldProc] = workCostList[oldStep].insert(std::make_pair(workCost[oldStep][oldProc], oldProc)).first;
 
     workCostList[newStep].erase(workCostPointer[newStep][newProc]);
     workCost[newStep][newProc] += schedule->getInstance().getComputationalDag().vertex_work_weight(node);
-    workCostPointer[newStep][newProc] =
-        workCostList[newStep].insert(std::make_pair(workCost[newStep][newProc], newProc)).first;
+    workCostPointer[newStep][newProc] = workCostList[newStep].insert(std::make_pair(workCost[newStep][newProc], newProc)).first;
 
     // Comm cost change
-    for (const auto& update : changing.sentChange)
-        sent[update.first.first][update.first.second] = static_cast<cost_type>(static_cast<int>(sent[update.first.first][update.first.second]) + update.second);
-    for (const auto& update : changing.recChange)
-        received[update.first.first][update.first.second] = static_cast<cost_type>(static_cast<int>(received[update.first.first][update.first.second]) + update.second);
-
-    std::set<std::pair<unsigned, unsigned> > toUpdate;
-    for (const auto& update : changing.sentChange)
-        if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second]) !=
-            commCost[update.first.first][update.first.second])
+    for (const auto &update : changing.sentChange) {
+        sent[update.first.first][update.first.second]
+            = static_cast<cost_type>(static_cast<int>(sent[update.first.first][update.first.second]) + update.second);
+    }
+    for (const auto &update : changing.recChange) {
+        received[update.first.first][update.first.second]
+            = static_cast<cost_type>(static_cast<int>(received[update.first.first][update.first.second]) + update.second);
+    }
+
+    std::set<std::pair<unsigned, unsigned>> toUpdate;
+    for (const auto &update : changing.sentChange) {
+        if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second])
+            != commCost[update.first.first][update.first.second]) {
             toUpdate.insert(std::make_pair(update.first.first, update.first.second));
+        }
+    }
 
-    for (const auto& update : changing.recChange)
-        if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second]) !=
-            commCost[update.first.first][update.first.second])
+    for (const auto &update : changing.recChange) {
+        if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second])
+            != commCost[update.first.first][update.first.second]) {
             toUpdate.insert(std::make_pair(update.first.first, update.first.second));
+        }
+    }
 
-    for (const auto& update : toUpdate) {
+    for (const auto &update : toUpdate) {
         commCostList[update.first].erase(commCostPointer[update.first][update.second]);
         commCost[update.first][update.second] = std::max(sent[update.first][update.second], received[update.first][update.second]);
-        commCostPointer[update.first][update.second] =
-            commCostList[update.first].insert(std::make_pair(commCost[update.first][update.second], update.second)).first;
+        commCostPointer[update.first][update.second]
+            = commCostList[update.first].insert(std::make_pair(commCost[update.first][update.second], update.second)).first;
     }
 
     // update successor lists
     for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) {
         auto itr = succSteps[pred][oldProc].find(oldStep);
-        if ((--(itr->second)) == 0)
+        if ((--(itr->second)) == 0) {
             succSteps[pred][oldProc].erase(itr);
+        }
 
         itr = succSteps[pred][newProc].find(newStep);
-        if (itr == succSteps[pred][newProc].end())
+        if (itr == succSteps[pred][newProc].end()) {
             succSteps[pred][newProc].insert({newStep, 1U});
-        else
+        } else {
             itr->second += 1;
+        }
     }
 
     // memory constraints, if any
-    if(use_memory_constraint)
-    {
-        memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)] -= schedule->getInstance().getComputationalDag().vertex_mem_weight(node);
+    if (use_memory_constraint) {
+        memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)]
+            -= schedule->getInstance().getComputationalDag().vertex_mem_weight(node);
         memory_used[newProc][newStep] += schedule->getInstance().getComputationalDag().vertex_mem_weight(node);
     }
 
@@ -858,7 +973,7 @@ void HillClimbingScheduler<Graph_t>::executeMove(const vertex_idx node, const un
 }
 
 // Single hill climbing step
-template<typename Graph_t>
+template <typename Graph_t>
 bool HillClimbingScheduler<Graph_t>::Improve() {
     cost_type bestCost = cost;
     stepAuxData bestMoveData;
@@ -867,8 +982,7 @@ bool HillClimbingScheduler<Graph_t>::Improve() {
     int startingDir = nextMove.first;
 
     // pre-selected "promising" moves
-    while(!promisingMoves.empty() && !steepestAscent)
-    {
+    while (!promisingMoves.empty() && !steepestAscent) {
         std::tuple<vertex_idx, unsigned, int> next = promisingMoves.front();
         promisingMoves.pop_front();
 
@@ -876,44 +990,43 @@ bool HillClimbingScheduler<Graph_t>::Improve() {
         const unsigned proc = std::get<1>(next);
         const int where = std::get<2>(next);
 
-        if(!canMove[static_cast<Direction>(where)][node][proc])
+        if (!canMove[static_cast<Direction>(where)][node][proc]) {
             continue;
+        }
 
-        if(use_memory_constraint && violatesMemConstraint(node, proc, where-1))
+        if (use_memory_constraint && violatesMemConstraint(node, proc, where - 1)) {
             continue;
+        }
 
         stepAuxData moveData;
-        int costDiff = moveCostChange(node, proc, where-1, moveData);
+        int costDiff = moveCostChange(node, proc, where - 1, moveData);
 
-        if(costDiff<0)
-        {
-            executeMove(node, proc, where-1, moveData);
-            if(shrink && moveData.canShrink)
+        if (costDiff < 0) {
+            executeMove(node, proc, where - 1, moveData);
+            if (shrink && moveData.canShrink) {
                 Init();
+            }
 
             return true;
         }
-
     }
 
     // standard moves
     int dir = startingDir;
-    while(true)
-    {
+    while (true) {
         bool reachedBeginning = false;
-        while(nextMove.second == moveOptions[static_cast<unsigned>(nextMove.first)].end())
-        {
-            dir = (nextMove.first+1)%NumDirections;
-            if(dir == startingDir)
-            {
+        while (nextMove.second == moveOptions[static_cast<unsigned>(nextMove.first)].end()) {
+            dir = (nextMove.first + 1) % NumDirections;
+            if (dir == startingDir) {
                 reachedBeginning = true;
                 break;
             }
             nextMove.first = dir;
             nextMove.second = moveOptions[static_cast<unsigned>(nextMove.first)].begin();
         }
-        if(reachedBeginning)
+        if (reachedBeginning) {
             break;
+        }
 
         std::pair<vertex_idx, unsigned> next = *nextMove.second;
         ++nextMove.second;
@@ -921,55 +1034,54 @@ bool HillClimbingScheduler<Graph_t>::Improve() {
         const vertex_idx node = next.first;
         const unsigned proc = next.second;
 
-        if(use_memory_constraint && violatesMemConstraint(node, proc, dir-1))
+        if (use_memory_constraint && violatesMemConstraint(node, proc, dir - 1)) {
             continue;
+        }
 
         stepAuxData moveData;
-        int costDiff = moveCostChange(node, proc, dir-1, moveData);
+        int costDiff = moveCostChange(node, proc, dir - 1, moveData);
 
-        if(!steepestAscent && costDiff<0)
-        {
-            executeMove(node, proc, dir-1, moveData);
-            if(shrink && moveData.canShrink)
+        if (!steepestAscent && costDiff < 0) {
+            executeMove(node, proc, dir - 1, moveData);
+            if (shrink && moveData.canShrink) {
                 Init();
+            }
 
             return true;
-        }
-        else if(static_cast<cost_type>(static_cast<int>(cost)+costDiff) < bestCost)
-        {
-            bestCost = static_cast<cost_type>(static_cast<int>(cost)+costDiff);
+        } else if (static_cast<cost_type>(static_cast<int>(cost) + costDiff) < bestCost) {
+            bestCost = static_cast<cost_type>(static_cast<int>(cost) + costDiff);
             bestMove = next;
             bestMoveData = moveData;
-            bestDir = dir-1;
+            bestDir = dir - 1;
         }
-
-
     }
 
-    if (bestCost == cost)
+    if (bestCost == cost) {
         return false;
+    }
 
     executeMove(bestMove.first, bestMove.second, bestDir, bestMoveData);
-    if(shrink && bestMoveData.canShrink)
+    if (shrink && bestMoveData.canShrink) {
         Init();
+    }
 
     return true;
 }
 
 // Check if move violates mem constraints
-template<typename Graph_t>
-bool HillClimbingScheduler<Graph_t>::violatesMemConstraint(vertex_idx node, unsigned processor, int where)
-{
-    if(memory_used[processor][static_cast<unsigned>(static_cast<int>(schedule->assignedSuperstep(node))+where)]
-        + schedule->getInstance().getComputationalDag().vertex_mem_weight(node) > schedule->getInstance().memoryBound(processor)) // TODO ANDRAS double check change
+template <typename Graph_t>
+bool HillClimbingScheduler<Graph_t>::violatesMemConstraint(vertex_idx node, unsigned processor, int where) {
+    if (memory_used[processor][static_cast<unsigned>(static_cast<int>(schedule->assignedSuperstep(node)) + where)]
+            + schedule->getInstance().getComputationalDag().vertex_mem_weight(node)
+        > schedule->getInstance().memoryBound(processor)) {    // TODO ANDRAS double check change
         return true;
+    }
 
     return false;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingScheduler<Graph_t>::CreateSupstepLists() {
-
     const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors();
     const Graph_t &G = schedule->getInstance().getComputationalDag();
 
@@ -979,9 +1091,9 @@ void HillClimbingScheduler<Graph_t>::CreateSupstepLists() {
     supsteplists.clear();
     supsteplists.resize(M, std::vector<std::list<vertex_idx>>(P));
 
-    for (vertex_idx node : top_sort_view(G))
+    for (vertex_idx node : top_sort_view(G)) {
         supsteplists[schedule->assignedSuperstep(node)][schedule->assignedProcessor(node)].push_back(node);
-
+    }
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp
index ba895b70..07131ff3 100644
--- a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp
@@ -25,9 +25,8 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class HillClimbingForCommSteps {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     static_assert(is_computational_dag_v<Graph_t>, "Graph_t must satisfy the computational_dag concept");
 
@@ -88,27 +87,27 @@ class HillClimbingForCommSteps {
     virtual std::string getScheduleName() const { return "HillClimbingForCommSchedule"; }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 RETURN_STATUS HillClimbingForCommSteps<Graph_t>::improveSchedule(BspScheduleCS<Graph_t> &input_schedule) {
-
     return improveScheduleWithTimeLimit(input_schedule, 180);
 }
 
 // Main method for hill climbing (with time limit)
-template<typename Graph_t>
-RETURN_STATUS HillClimbingForCommSteps<Graph_t>::improveScheduleWithTimeLimit(BspScheduleCS<Graph_t> &input_schedule, const unsigned timeLimit) {
-
+template <typename Graph_t>
+RETURN_STATUS HillClimbingForCommSteps<Graph_t>::improveScheduleWithTimeLimit(BspScheduleCS<Graph_t> &input_schedule,
+                                                                              const unsigned timeLimit) {
     schedule = &input_schedule;
 
-    if (schedule->numberOfSupersteps() <= 2)
+    if (schedule->numberOfSupersteps() <= 2) {
         return RETURN_STATUS::OSP_SUCCESS;
+    }
 
     Init();
     // ConvertCommSchedule();
     const std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now();
 
     unsigned counter = 0;
-    while (Improve())
+    while (Improve()) {
         if ((++counter) == 100) {
             counter = 0;
             std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
@@ -118,6 +117,7 @@ RETURN_STATUS HillClimbingForCommSteps<Graph_t>::improveScheduleWithTimeLimit(Bs
                 break;
             }
         }
+    }
 
     ConvertCommSchedule();
 
@@ -125,7 +125,7 @@ RETURN_STATUS HillClimbingForCommSteps<Graph_t>::improveScheduleWithTimeLimit(Bs
 }
 
 // Initialization for comm. schedule hill climbing
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingForCommSteps<Graph_t>::Init() {
     const unsigned N = static_cast<unsigned>(schedule->getInstance().getComputationalDag().num_vertices());
     const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors();
@@ -160,63 +160,72 @@ void HillClimbingForCommSteps<Graph_t>::Init() {
     commSchedRecListPointer.resize(N, std::vector<typename std::list<std::pair<vertex_idx, unsigned>>::iterator>(P));
 
     // initialize to lazy comm schedule first - to make sure it's correct even if e.g. com scehdule has indirect sending
-    for (unsigned step = 1; step < M; ++step)
-        for (unsigned proc = 0; proc < P; ++proc)
-            for (const vertex_idx node : supsteplists[step][proc])
-                for (const vertex_idx &pred : G.parents(node))
-                    if (schedule->assignedProcessor(pred) != schedule->assignedProcessor(node) &&
-                        commSchedule[pred][schedule->assignedProcessor(node)] == UINT_MAX) {
+    for (unsigned step = 1; step < M; ++step) {
+        for (unsigned proc = 0; proc < P; ++proc) {
+            for (const vertex_idx node : supsteplists[step][proc]) {
+                for (const vertex_idx &pred : G.parents(node)) {
+                    if (schedule->assignedProcessor(pred) != schedule->assignedProcessor(node)
+                        && commSchedule[pred][schedule->assignedProcessor(node)] == UINT_MAX) {
                         commSchedule[pred][schedule->assignedProcessor(node)] = step - schedule->getStaleness();
-                        commBounds[pred][schedule->assignedProcessor(node)] = std::make_pair(schedule->assignedSuperstep(pred), step - schedule->getStaleness());
+                        commBounds[pred][schedule->assignedProcessor(node)]
+                            = std::make_pair(schedule->assignedSuperstep(pred), step - schedule->getStaleness());
                     }
+                }
+            }
+        }
+    }
 
     // overwrite with original comm schedule, wherever possible
-    const std::map<std::tuple<vertex_idx, unsigned, unsigned>, unsigned int> originalCommSchedule = schedule->getCommunicationSchedule();
-    for (vertex_idx node = 0; node < N; ++node)
+    const std::map<std::tuple<vertex_idx, unsigned, unsigned>, unsigned int> originalCommSchedule
+        = schedule->getCommunicationSchedule();
+    for (vertex_idx node = 0; node < N; ++node) {
         for (unsigned proc = 0; proc < P; ++proc) {
-            if (commSchedule[node][proc] == UINT_MAX)
+            if (commSchedule[node][proc] == UINT_MAX) {
                 continue;
+            }
 
             const auto comm_schedule_key = std::make_tuple(node, schedule->assignedProcessor(node), proc);
             auto mapIterator = originalCommSchedule.find(comm_schedule_key);
             if (mapIterator != originalCommSchedule.end()) {
                 unsigned originalStep = mapIterator->second;
-                if (originalStep >= commBounds[node][proc].first && originalStep <= commBounds[node][proc].second)
+                if (originalStep >= commBounds[node][proc].first && originalStep <= commBounds[node][proc].second) {
                     commSchedule[node][proc] = originalStep;
+                }
             }
 
             unsigned step = commSchedule[node][proc];
             commSchedSendLists[step][schedule->assignedProcessor(node)].emplace_front(node, proc);
-            commSchedSendListPointer[node][proc] =
-                commSchedSendLists[step][schedule->assignedProcessor(node)].begin();
+            commSchedSendListPointer[node][proc] = commSchedSendLists[step][schedule->assignedProcessor(node)].begin();
             commSchedRecLists[step][proc].emplace_front(node, proc);
-            commSchedRecListPointer[node][proc] =
-                commSchedRecLists[step][proc].begin();
+            commSchedRecListPointer[node][proc] = commSchedRecLists[step][proc].begin();
 
-            sent[step][schedule->assignedProcessor(node)] +=
-                schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc);
-            received[step][proc] +=
-                schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc);
+            sent[step][schedule->assignedProcessor(node)]
+                += schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                   * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc);
+            received[step][proc] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                    * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc);
         }
+    }
 
-    for (unsigned step = 0; step < M - 1; ++step)
+    for (unsigned step = 0; step < M - 1; ++step) {
         for (unsigned proc = 0; proc < P; ++proc) {
             commCost[step][proc] = std::max(sent[step][proc], received[step][proc]);
             commCostPointer[step][proc] = commCostList[step].emplace(commCost[step][proc], proc).first;
         }
+    }
 
     // set minimum cost - differs for BSP and MaxBSP
     minimum_cost_per_superstep.clear();
-    if (schedule->getStaleness() == 1)
+    if (schedule->getStaleness() == 1) {
         minimum_cost_per_superstep.resize(M - 1, 0);
-    else {
+    } else {
         minimum_cost_per_superstep = cost_helpers::compute_max_work_per_step(*schedule);
         minimum_cost_per_superstep.erase(minimum_cost_per_superstep.begin());
     }
 }
 
 // compute cost change incurred by a potential move
-template<typename Graph_t>
+template <typename Graph_t>
 int HillClimbingForCommSteps<Graph_t>::moveCostChange(const vertex_idx node, const unsigned p, const unsigned step) {
     const unsigned oldStep = commSchedule[node][p];
     const unsigned sourceProc = schedule->assignedProcessor(node);
@@ -224,20 +233,27 @@ int HillClimbingForCommSteps<Graph_t>::moveCostChange(const vertex_idx node, con
 
     // Change at old place
     auto itr = commCostList[oldStep].rbegin();
-    cost_type oldMax = std::max(itr->first * schedule->getInstance().getArchitecture().communicationCosts(), minimum_cost_per_superstep[oldStep]) + schedule->getInstance().getArchitecture().synchronisationCosts();
-    cost_type maxSource =
-        std::max(sent[oldStep][sourceProc] - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p),
-                 received[oldStep][sourceProc]);
+    cost_type oldMax = std::max(itr->first * schedule->getInstance().getArchitecture().communicationCosts(),
+                                minimum_cost_per_superstep[oldStep])
+                       + schedule->getInstance().getArchitecture().synchronisationCosts();
+    cost_type maxSource = std::max(sent[oldStep][sourceProc]
+                                       - schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                             * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p),
+                                   received[oldStep][sourceProc]);
     cost_type maxTarget = std::max(sent[oldStep][p],
-                                   received[oldStep][p] - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p));
+                                   received[oldStep][p]
+                                       - schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                             * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p));
     cost_type maxOther = 0;
-    for (; itr != commCostList[oldStep].rend(); ++itr)
+    for (; itr != commCostList[oldStep].rend(); ++itr) {
         if (itr->second != sourceProc && itr->second != p) {
             maxOther = itr->first;
             break;
         }
+    }
 
-    cost_type newMax = std::max(std::max(maxSource, maxTarget), maxOther) * schedule->getInstance().getArchitecture().communicationCosts();
+    cost_type newMax
+        = std::max(std::max(maxSource, maxTarget), maxOther) * schedule->getInstance().getArchitecture().communicationCosts();
     cost_type newSync = (newMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0;
     newMax = std::max(newMax, minimum_cost_per_superstep[oldStep]) + newSync;
     change += static_cast<int>(newMax) - static_cast<int>(oldMax);
@@ -246,19 +262,24 @@ int HillClimbingForCommSteps<Graph_t>::moveCostChange(const vertex_idx node, con
     oldMax = commCostList[step].rbegin()->first * schedule->getInstance().getArchitecture().communicationCosts();
     cost_type oldSync = (oldMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0;
     oldMax = std::max(oldMax, minimum_cost_per_superstep[step]);
-    maxSource = schedule->getInstance().getArchitecture().communicationCosts() *
-                    (sent[step][sourceProc] + schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p));
-    maxTarget = schedule->getInstance().getArchitecture().communicationCosts() *
-                    (received[step][p] + schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p));
+    maxSource = schedule->getInstance().getArchitecture().communicationCosts()
+                * (sent[step][sourceProc]
+                   + schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                         * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p));
+    maxTarget = schedule->getInstance().getArchitecture().communicationCosts()
+                * (received[step][p]
+                   + schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                         * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p));
 
     newMax = std::max(std::max(oldMax, maxSource), maxTarget);
-    change += static_cast<int>(newMax + schedule->getInstance().getArchitecture().synchronisationCosts()) - static_cast<int>(oldMax + oldSync);
+    change += static_cast<int>(newMax + schedule->getInstance().getArchitecture().synchronisationCosts())
+              - static_cast<int>(oldMax + oldSync);
 
     return change;
 }
 
 // execute a move, updating the comm. schedule and the data structures
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingForCommSteps<Graph_t>::executeMove(vertex_idx node, unsigned p, const unsigned step, const int changeCost) {
     const unsigned oldStep = commSchedule[node][p];
     const unsigned sourceProc = schedule->assignedProcessor(node);
@@ -267,31 +288,37 @@ void HillClimbingForCommSteps<Graph_t>::executeMove(vertex_idx node, unsigned p,
     // Old step update
     if (sent[oldStep][sourceProc] > received[oldStep][sourceProc]) {
         commCostList[oldStep].erase(commCostPointer[oldStep][sourceProc]);
-        sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
+        sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                     * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
         commCost[oldStep][sourceProc] = std::max(sent[oldStep][sourceProc], received[oldStep][sourceProc]);
-        commCostPointer[oldStep][sourceProc] =
-            commCostList[oldStep].emplace(commCost[oldStep][sourceProc], sourceProc).first;
-    } else
-        sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
+        commCostPointer[oldStep][sourceProc] = commCostList[oldStep].emplace(commCost[oldStep][sourceProc], sourceProc).first;
+    } else {
+        sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                     * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
+    }
 
     if (received[oldStep][p] > sent[oldStep][p]) {
         commCostList[oldStep].erase(commCostPointer[oldStep][p]);
-        received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
+        received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
         commCost[oldStep][p] = std::max(sent[oldStep][p], received[oldStep][p]);
         commCostPointer[oldStep][p] = commCostList[oldStep].emplace(commCost[oldStep][p], p).first;
-    } else
-        received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
+    } else {
+        received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                                * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
+    }
 
     // New step update
-    sent[step][sourceProc] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
+    sent[step][sourceProc] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                              * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
     if (sent[step][sourceProc] > received[step][sourceProc]) {
         commCostList[step].erase(commCostPointer[step][sourceProc]);
         commCost[step][sourceProc] = sent[step][sourceProc];
-        commCostPointer[step][sourceProc] =
-            commCostList[step].emplace(commCost[step][sourceProc], sourceProc).first;
+        commCostPointer[step][sourceProc] = commCostList[step].emplace(commCost[step][sourceProc], sourceProc).first;
     }
 
-    received[step][p] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
+    received[step][p] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node)
+                         * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p);
     if (received[step][p] > sent[step][p]) {
         commCostList[step].erase(commCostPointer[step][p]);
         commCost[step][p] = received[step][p];
@@ -312,9 +339,8 @@ void HillClimbingForCommSteps<Graph_t>::executeMove(vertex_idx node, unsigned p,
 }
 
 // Single comm. schedule hill climbing step
-template<typename Graph_t>
+template <typename Graph_t>
 bool HillClimbingForCommSteps<Graph_t>::Improve() {
-
     const unsigned M = static_cast<unsigned>(schedule->numberOfSupersteps());
     int bestDiff = 0;
     vertex_idx bestNode = 0;
@@ -325,31 +351,34 @@ bool HillClimbingForCommSteps<Graph_t>::Improve() {
     while (true) {
         auto itr = commCostList[nextSupstep].rbegin();
 
-        if (itr == commCostList[nextSupstep].crend())
+        if (itr == commCostList[nextSupstep].crend()) {
             break;
+        }
 
         // find maximal comm cost that dominates the h-relation
         const cost_type commMax = itr->first;
         if (commMax == 0) {
             nextSupstep = (nextSupstep + 1) % (M - 1);
-            if (nextSupstep == startingSupstep)
+            if (nextSupstep == startingSupstep) {
                 break;
-            else
+            } else {
                 continue;
+            }
         }
 
         // go over all processors that incur this maximal comm cost in superstep nextSupstep
         for (; itr != commCostList[nextSupstep].rend() && itr->first == commMax; ++itr) {
             const unsigned maxProc = itr->second;
 
-            if (sent[nextSupstep][maxProc] == commMax)
+            if (sent[nextSupstep][maxProc] == commMax) {
                 for (const std::pair<vertex_idx, unsigned> &entry : commSchedSendLists[nextSupstep][maxProc]) {
                     const vertex_idx node = entry.first;
                     const unsigned p = entry.second;
                     // iterate over alternative supsteps to place this communication step
                     for (unsigned step = commBounds[node][p].first; step < commBounds[node][p].second; ++step) {
-                        if (step == commSchedule[node][p])
+                        if (step == commSchedule[node][p]) {
                             continue;
+                        }
 
                         const int costDiff = moveCostChange(node, p, step);
 
@@ -364,15 +393,17 @@ bool HillClimbingForCommSteps<Graph_t>::Improve() {
                         }
                     }
                 }
+            }
 
-            if (received[nextSupstep][maxProc] == commMax)
+            if (received[nextSupstep][maxProc] == commMax) {
                 for (const std::pair<vertex_idx, unsigned> &entry : commSchedRecLists[nextSupstep][maxProc]) {
                     const vertex_idx node = entry.first;
                     const unsigned p = entry.second;
                     // iterate over alternative supsteps to place this communication step
                     for (unsigned step = commBounds[node][p].first; step < commBounds[node][p].second; ++step) {
-                        if (step == commSchedule[node][p])
+                        if (step == commSchedule[node][p]) {
                             continue;
+                        }
 
                         const int costDiff = moveCostChange(node, p, step);
 
@@ -388,24 +419,26 @@ bool HillClimbingForCommSteps<Graph_t>::Improve() {
                         }
                     }
                 }
+            }
         }
 
         nextSupstep = (nextSupstep + 1) % (M - 1);
-        if (nextSupstep == startingSupstep)
+        if (nextSupstep == startingSupstep) {
             break;
+        }
     }
 
-    if (bestDiff == 0)
+    if (bestDiff == 0) {
         return false;
+    }
 
     executeMove(bestNode, bestProc, bestStep, bestDiff);
 
     return true;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingForCommSteps<Graph_t>::CreateSupstepLists() {
-
     const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors();
     const Graph_t &G = schedule->getInstance().getComputationalDag();
 
@@ -416,25 +449,28 @@ void HillClimbingForCommSteps<Graph_t>::CreateSupstepLists() {
     supsteplists.resize(M, std::vector<std::list<vertex_idx>>(P));
 
     const std::vector<vertex_idx> topOrder = GetTopOrder(G);
-    for (vertex_idx node : topOrder)
+    for (vertex_idx node : topOrder) {
         supsteplists[schedule->assignedSuperstep(node)][schedule->assignedProcessor(node)].push_back(node);
+    }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void HillClimbingForCommSteps<Graph_t>::ConvertCommSchedule() {
     const vertex_idx N = static_cast<vertex_idx>(schedule->getInstance().getComputationalDag().num_vertices());
     const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors();
 
     std::map<std::tuple<vertex_idx, unsigned, unsigned>, unsigned> newCommSchedule;
 
-    for (vertex_idx node = 0; node < N; ++node)
-        for (unsigned proc = 0; proc < P; ++proc)
+    for (vertex_idx node = 0; node < N; ++node) {
+        for (unsigned proc = 0; proc < P; ++proc) {
             if (commSchedule[node][proc] != UINT_MAX) {
                 const auto comm_schedule_key = std::make_tuple(node, schedule->assignedProcessor(node), proc);
                 newCommSchedule[comm_schedule_key] = commSchedule[node][proc];
             }
+        }
+    }
 
     schedule->setCommunicationSchedule(newCommSchedule);
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp
index 7d378d1b..f9a921e4 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp
@@ -19,6 +19,7 @@ limitations under the License.
 #pragma once
 
 #include <algorithm>
+#include <boost/heap/fibonacci_heap.hpp>
 #include <chrono>
 #include <limits>
 #include <numeric>
@@ -28,25 +29,21 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include <boost/heap/fibonacci_heap.hpp>
-
+#include "kl_current_schedule.hpp"
 #include "osp/auxiliary/misc.hpp"
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
-#include "kl_current_schedule.hpp"
-
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 
 // #define KL_PRINT_SCHEDULE
 
 #ifdef KL_PRINT_SCHEDULE
-#include "file_interactions/DotFileWriter.hpp"
+#    include "file_interactions/DotFileWriter.hpp"
 #endif
 
 namespace osp {
 
 struct kl_base_parameter {
-
     double max_div_best_sol_base_percent = 1.05;
     double max_div_best_sol_rate_percent = 0.002;
 
@@ -74,9 +71,8 @@ struct kl_base_parameter {
     unsigned violations_threshold = 0;
 };
 
-template<typename Graph_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename MemoryConstraint_t>
 class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
-
     static_assert(is_directed_graph_edge_desc_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     static_assert(has_hashable_edge_desc_v<Graph_t>, "Graph_t must satisfy the has_hashable_edge_desc concept");
     static_assert(is_computational_dag_v<Graph_t>, "Graph_t must satisfy the computational_dag concept");
@@ -120,9 +116,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     std::vector<unsigned> unlock;
 
     bool unlock_node(VertexType node) {
-
         if (super_locked_nodes.find(node) == super_locked_nodes.end()) {
-
             if (locked_nodes.find(node) == locked_nodes.end()) {
                 return true;
             } else if (locked_nodes.find(node) != locked_nodes.end() && unlock[node] > 0) {
@@ -137,18 +131,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     bool check_node_unlocked(VertexType node) {
-
-        if (super_locked_nodes.find(node) == super_locked_nodes.end() &&
-            locked_nodes.find(node) == locked_nodes.end()) {
+        if (super_locked_nodes.find(node) == super_locked_nodes.end() && locked_nodes.find(node) == locked_nodes.end()) {
             return true;
         }
         return false;
     };
 
     void reset_locked_nodes() {
-
         for (const auto &i : locked_nodes) {
-
             unlock[i] = parameters.max_num_unlocks;
         }
 
@@ -156,23 +146,20 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     bool check_violation_locked() {
-
-        if (current_schedule.current_violations.empty())
+        if (current_schedule.current_violations.empty()) {
             return false;
+        }
 
         for (auto &edge : current_schedule.current_violations) {
-
             const auto &source_v = source(edge, current_schedule.instance->getComputationalDag());
             const auto &target_v = target(edge, current_schedule.instance->getComputationalDag());
 
-            if (locked_nodes.find(source_v) == locked_nodes.end() ||
-                locked_nodes.find(target_v) == locked_nodes.end()) {
+            if (locked_nodes.find(source_v) == locked_nodes.end() || locked_nodes.find(target_v) == locked_nodes.end()) {
                 return false;
             }
 
             bool abort = false;
             if (locked_nodes.find(source_v) != locked_nodes.end()) {
-
                 if (unlock_node(source_v)) {
                     nodes_to_update.insert(source_v);
                     node_selection.insert(source_v);
@@ -182,7 +169,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
 
             if (locked_nodes.find(target_v) != locked_nodes.end()) {
-
                 if (unlock_node(target_v)) {
                     nodes_to_update.insert(target_v);
                     node_selection.insert(target_v);
@@ -199,13 +185,11 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void reset_gain_heap() {
-
         max_gain_heap.clear();
         node_heap_handles.clear();
     }
 
     virtual void initialize_datastructures() {
-
 #ifdef KL_DEBUG
         std::cout << "KLBase initialize datastructures" << std::endl;
 #endif
@@ -225,31 +209,23 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     std::unordered_set<VertexType> nodes_to_update;
 
     void compute_nodes_to_update(kl_move<Graph_t> move) {
-
         nodes_to_update.clear();
 
         for (const auto &target : current_schedule.instance->getComputationalDag().children(move.node)) {
-
-            if (node_selection.find(target) != node_selection.end() &&
-                locked_nodes.find(target) == locked_nodes.end() &&
-                super_locked_nodes.find(target) == super_locked_nodes.end()) {
-
+            if (node_selection.find(target) != node_selection.end() && locked_nodes.find(target) == locked_nodes.end()
+                && super_locked_nodes.find(target) == super_locked_nodes.end()) {
                 nodes_to_update.insert(target);
             }
         }
 
         for (const auto &source : current_schedule.instance->getComputationalDag().parents(move.node)) {
-
-            if (node_selection.find(source) != node_selection.end() &&
-                locked_nodes.find(source) == locked_nodes.end() &&
-                super_locked_nodes.find(source) == super_locked_nodes.end()) {
-
+            if (node_selection.find(source) != node_selection.end() && locked_nodes.find(source) == locked_nodes.end()
+                && super_locked_nodes.find(source) == super_locked_nodes.end()) {
                 nodes_to_update.insert(source);
             }
         }
 
-        const unsigned start_step =
-            std::min(move.from_step, move.to_step) == 0 ? 0 : std::min(move.from_step, move.to_step) - 1;
+        const unsigned start_step = std::min(move.from_step, move.to_step) == 0 ? 0 : std::min(move.from_step, move.to_step) - 1;
         const unsigned end_step = std::min(current_schedule.num_steps(), std::max(move.from_step, move.to_step) + 2);
 
 #ifdef KL_DEBUG
@@ -257,15 +233,10 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #endif
 
         for (unsigned step = start_step; step < end_step; step++) {
-
             for (unsigned proc = 0; proc < num_procs; proc++) {
-
                 for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) {
-
-                    if (node_selection.find(node) != node_selection.end() &&
-                        locked_nodes.find(node) == locked_nodes.end() &&
-                        super_locked_nodes.find(node) == super_locked_nodes.end()) {
-
+                    if (node_selection.find(node) != node_selection.end() && locked_nodes.find(node) == locked_nodes.end()
+                        && super_locked_nodes.find(node) == super_locked_nodes.end()) {
                         nodes_to_update.insert(node);
                     }
                 }
@@ -274,7 +245,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void initialize_gain_heap(const std::unordered_set<VertexType> &nodes) {
-
         reset_gain_heap();
 
         for (const auto &node : nodes) {
@@ -284,14 +254,10 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void initialize_gain_heap_unlocked_nodes(const std::unordered_set<VertexType> &nodes) {
-
         reset_gain_heap();
 
         for (const auto &node : nodes) {
-
-            if (locked_nodes.find(node) == locked_nodes.end() &&
-                super_locked_nodes.find(node) == super_locked_nodes.end()) {
-
+            if (locked_nodes.find(node) == locked_nodes.end() && super_locked_nodes.find(node) == super_locked_nodes.end()) {
                 compute_node_gain(node);
                 compute_max_gain_insert_or_update_heap(node);
             }
@@ -299,14 +265,11 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void compute_node_gain(VertexType node) {
-
         const unsigned &current_proc = current_schedule.vector_schedule.assignedProcessor(node);
         const unsigned &current_step = current_schedule.vector_schedule.assignedSuperstep(node);
 
         for (unsigned new_proc = 0; new_proc < num_procs; new_proc++) {
-
             if (current_schedule.instance->isCompatible(node, new_proc)) {
-
                 node_gains[node][new_proc][0] = 0.0;
                 node_gains[node][new_proc][1] = 0.0;
                 node_gains[node][new_proc][2] = 0.0;
@@ -319,7 +282,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 compute_work_gain(node, current_step, current_proc, new_proc);
 
                 if constexpr (current_schedule.use_memory_constraint) {
-
                     if (not current_schedule.memory_constraint.can_move(
                             node, new_proc, current_schedule.vector_schedule.assignedSuperstep(node))) {
                         node_gains[node][new_proc][1] = std::numeric_limits<double>::lowest();
@@ -340,7 +302,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 }
 
             } else {
-
                 node_gains[node][new_proc][0] = std::numeric_limits<double>::lowest();
                 node_gains[node][new_proc][1] = std::numeric_limits<double>::lowest();
                 node_gains[node][new_proc][2] = std::numeric_limits<double>::lowest();
@@ -349,7 +310,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     double compute_max_gain_insert_or_update_heap(VertexType node) {
-
         double node_max_gain = std::numeric_limits<double>::lowest();
         double node_change_in_cost = 0;
         unsigned node_best_step = 0;
@@ -360,14 +320,11 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         unsigned best_step = 0;
 
         for (unsigned proc = 0; proc < num_procs; proc++) {
-
             int rand_count = 0;
 
-            if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 &&
-                current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
-
+            if (current_schedule.vector_schedule.assignedSuperstep(node) > 0
+                && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
                 if (node_gains[node][proc][0] > node_gains[node][proc][1]) {
-
                     if (node_gains[node][proc][0] > node_gains[node][proc][2]) {
                         proc_max = node_gains[node][proc][0];
                         proc_change_in_cost = node_change_in_costs[node][proc][0];
@@ -380,45 +337,36 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                     }
 
                 } else {
-
                     if (node_gains[node][proc][1] > node_gains[node][proc][2]) {
-
                         proc_max = node_gains[node][proc][1];
                         proc_change_in_cost = node_change_in_costs[node][proc][1];
                         best_step = 1;
                     } else {
-
                         proc_max = node_gains[node][proc][2];
                         proc_change_in_cost = node_change_in_costs[node][proc][2];
                         best_step = 2;
                     }
                 }
 
-            } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 &&
-                       current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
-
+            } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0
+                       && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
                 if (node_gains[node][proc][2] > node_gains[node][proc][1]) {
-
                     proc_max = node_gains[node][proc][2];
                     proc_change_in_cost = node_change_in_costs[node][proc][2];
                     best_step = 2;
                 } else {
-
                     proc_max = node_gains[node][proc][1];
                     proc_change_in_cost = node_change_in_costs[node][proc][1];
                     best_step = 1;
                 }
 
-            } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 &&
-                       current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) {
-
+            } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0
+                       && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) {
                 if (node_gains[node][proc][1] > node_gains[node][proc][0]) {
-
                     proc_max = node_gains[node][proc][1];
                     proc_change_in_cost = node_change_in_costs[node][proc][1];
                     best_step = 1;
                 } else {
-
                     proc_max = node_gains[node][proc][0];
                     proc_change_in_cost = node_change_in_costs[node][proc][0];
                     best_step = 0;
@@ -430,14 +378,13 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
 
             if (node_max_gain < proc_max) {
-
                 node_max_gain = proc_max;
                 node_change_in_cost = proc_change_in_cost;
                 node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1;
                 node_best_proc = proc;
                 rand_count = 0;
 
-            } else if (node_max_gain <= proc_max) { // only ==
+            } else if (node_max_gain <= proc_max) {    // only ==
 
                 if (rand() % (2 + rand_count) == 0) {
                     node_max_gain = proc_max;
@@ -450,26 +397,27 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         }
 
         if (node_heap_handles.find(node) != node_heap_handles.end()) {
-
             (*node_heap_handles[node]).to_proc = node_best_proc;
             (*node_heap_handles[node]).to_step = node_best_step;
             (*node_heap_handles[node]).change_in_cost = node_change_in_cost;
 
             if ((*node_heap_handles[node]).gain >= node_max_gain) {
-
                 (*node_heap_handles[node]).gain = node_max_gain;
                 max_gain_heap.update(node_heap_handles[node]);
             }
 
         } else {
-
             // if (node_max_gain < parameters.gain_threshold && node_change_in_cost >
             // parameters.change_in_cost_threshold)
             //     return node_max_gain;
 
-            kl_move<Graph_t> move(
-                node, node_max_gain, node_change_in_cost, current_schedule.vector_schedule.assignedProcessor(node),
-                current_schedule.vector_schedule.assignedSuperstep(node), node_best_proc, node_best_step);
+            kl_move<Graph_t> move(node,
+                                  node_max_gain,
+                                  node_change_in_cost,
+                                  current_schedule.vector_schedule.assignedProcessor(node),
+                                  current_schedule.vector_schedule.assignedSuperstep(node),
+                                  node_best_proc,
+                                  node_best_step);
             node_heap_handles[node] = max_gain_heap.push(move);
         }
 
@@ -477,57 +425,46 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void compute_work_gain(VertexType node, unsigned current_step, unsigned current_proc, unsigned new_proc) {
-
         if (current_proc == new_proc) {
-
             node_gains[node][current_proc][1] = std::numeric_limits<double>::lowest();
 
         } else {
-
-            if (current_schedule.step_max_work[current_step] ==
-                    current_schedule.step_processor_work[current_step][current_proc] &&
-                current_schedule.step_processor_work[current_step][current_proc] >
-                    current_schedule.step_second_max_work[current_step]) {
-
+            if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc]
+                && current_schedule.step_processor_work[current_step][current_proc]
+                       > current_schedule.step_second_max_work[current_step]) {
                 // new max
-                const double new_max_work =
-                    std::max(current_schedule.step_processor_work[current_step][current_proc] -
-                                 current_schedule.instance->getComputationalDag().vertex_work_weight(node),
-                             current_schedule.step_second_max_work[current_step]);
-
-                if (current_schedule.step_processor_work[current_step][new_proc] +
-                        current_schedule.instance->getComputationalDag().vertex_work_weight(node) >
-                    new_max_work) {
-
-                    const double gain =
-                        static_cast<double>(current_schedule.step_max_work[current_step]) -
-                        (static_cast<double>(current_schedule.step_processor_work[current_step][new_proc]) +
-                         static_cast<double>(
-                             current_schedule.instance->getComputationalDag().vertex_work_weight(node)));
+                const double new_max_work
+                    = std::max(current_schedule.step_processor_work[current_step][current_proc]
+                                   - current_schedule.instance->getComputationalDag().vertex_work_weight(node),
+                               current_schedule.step_second_max_work[current_step]);
+
+                if (current_schedule.step_processor_work[current_step][new_proc]
+                        + current_schedule.instance->getComputationalDag().vertex_work_weight(node)
+                    > new_max_work) {
+                    const double gain
+                        = static_cast<double>(current_schedule.step_max_work[current_step])
+                          - (static_cast<double>(current_schedule.step_processor_work[current_step][new_proc])
+                             + static_cast<double>(current_schedule.instance->getComputationalDag().vertex_work_weight(node)));
 
                     node_gains[node][new_proc][1] += gain;
                     node_change_in_costs[node][new_proc][1] -= gain;
 
                 } else {
-
-                    const double gain = static_cast<double>(current_schedule.step_max_work[current_step]) -
-                                        static_cast<double>(new_max_work);
+                    const double gain
+                        = static_cast<double>(current_schedule.step_max_work[current_step]) - static_cast<double>(new_max_work);
 
                     node_gains[node][new_proc][1] += gain;
                     node_change_in_costs[node][new_proc][1] -= gain;
                 }
 
             } else {
-
-                if (current_schedule.step_max_work[current_step] <
-                    current_schedule.instance->getComputationalDag().vertex_work_weight(node) +
-                        current_schedule.step_processor_work[current_step][new_proc]) {
-
-                    const double gain =
-                        (static_cast<double>(
-                             current_schedule.instance->getComputationalDag().vertex_work_weight(node)) +
-                         static_cast<double>(current_schedule.step_processor_work[current_step][new_proc]) -
-                         static_cast<double>(current_schedule.step_max_work[current_step]));
+                if (current_schedule.step_max_work[current_step]
+                    < current_schedule.instance->getComputationalDag().vertex_work_weight(node)
+                          + current_schedule.step_processor_work[current_step][new_proc]) {
+                    const double gain
+                        = (static_cast<double>(current_schedule.instance->getComputationalDag().vertex_work_weight(node))
+                           + static_cast<double>(current_schedule.step_processor_work[current_step][new_proc])
+                           - static_cast<double>(current_schedule.step_max_work[current_step]));
 
                     node_gains[node][new_proc][1] -= gain;
                     node_change_in_costs[node][new_proc][1] += gain;
@@ -536,38 +473,31 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         }
 
         if (current_step > 0) {
-
-            if (current_schedule.step_max_work[current_step - 1] <
-                current_schedule.step_processor_work[current_step - 1][new_proc] +
-                    current_schedule.instance->getComputationalDag().vertex_work_weight(node)) {
-
-                const double gain =
-                    static_cast<double>(current_schedule.step_processor_work[current_step - 1][new_proc]) +
-                    static_cast<double>(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) -
-                    static_cast<double>(current_schedule.step_max_work[current_step - 1]);
+            if (current_schedule.step_max_work[current_step - 1]
+                < current_schedule.step_processor_work[current_step - 1][new_proc]
+                      + current_schedule.instance->getComputationalDag().vertex_work_weight(node)) {
+                const double gain = static_cast<double>(current_schedule.step_processor_work[current_step - 1][new_proc])
+                                    + static_cast<double>(current_schedule.instance->getComputationalDag().vertex_work_weight(node))
+                                    - static_cast<double>(current_schedule.step_max_work[current_step - 1]);
 
                 node_gains[node][new_proc][0] -= gain;
 
                 node_change_in_costs[node][new_proc][0] += gain;
             }
 
-            if (current_schedule.step_max_work[current_step] ==
-                    current_schedule.step_processor_work[current_step][current_proc] &&
-                current_schedule.step_processor_work[current_step][current_proc] >
-                    current_schedule.step_second_max_work[current_step]) {
-
-                if (current_schedule.step_max_work[current_step] -
-                        current_schedule.instance->getComputationalDag().vertex_work_weight(node) >
-                    current_schedule.step_second_max_work[current_step]) {
-
+            if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc]
+                && current_schedule.step_processor_work[current_step][current_proc]
+                       > current_schedule.step_second_max_work[current_step]) {
+                if (current_schedule.step_max_work[current_step]
+                        - current_schedule.instance->getComputationalDag().vertex_work_weight(node)
+                    > current_schedule.step_second_max_work[current_step]) {
                     const double gain = current_schedule.instance->getComputationalDag().vertex_work_weight(node);
                     node_gains[node][new_proc][0] += gain;
                     node_change_in_costs[node][new_proc][0] -= gain;
 
                 } else {
-
-                    const double gain = current_schedule.step_max_work[current_step] -
-                                        current_schedule.step_second_max_work[current_step];
+                    const double gain
+                        = current_schedule.step_max_work[current_step] - current_schedule.step_second_max_work[current_step];
 
                     node_gains[node][new_proc][0] += gain;
                     node_change_in_costs[node][new_proc][0] -= gain;
@@ -575,73 +505,59 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
 
         } else {
-
             node_gains[node][new_proc][0] = std::numeric_limits<double>::lowest();
         }
 
         if (current_step < current_schedule.num_steps() - 1) {
-
-            if (current_schedule.step_max_work[current_step + 1] <
-                current_schedule.step_processor_work[current_step + 1][new_proc] +
-                    current_schedule.instance->getComputationalDag().vertex_work_weight(node)) {
-
-                const double gain =
-                    static_cast<double>(current_schedule.step_processor_work[current_step + 1][new_proc]) +
-                    static_cast<double>(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) -
-                    static_cast<double>(current_schedule.step_max_work[current_step + 1]);
+            if (current_schedule.step_max_work[current_step + 1]
+                < current_schedule.step_processor_work[current_step + 1][new_proc]
+                      + current_schedule.instance->getComputationalDag().vertex_work_weight(node)) {
+                const double gain = static_cast<double>(current_schedule.step_processor_work[current_step + 1][new_proc])
+                                    + static_cast<double>(current_schedule.instance->getComputationalDag().vertex_work_weight(node))
+                                    - static_cast<double>(current_schedule.step_max_work[current_step + 1]);
 
                 node_gains[node][new_proc][2] -= gain;
                 node_change_in_costs[node][new_proc][2] += gain;
             }
 
-            if (current_schedule.step_max_work[current_step] ==
-                    current_schedule.step_processor_work[current_step][current_proc] &&
-                current_schedule.step_processor_work[current_step][current_proc] >
-                    current_schedule.step_second_max_work[current_step]) {
-
-                if ((current_schedule.step_max_work[current_step] -
-                     current_schedule.instance->getComputationalDag().vertex_work_weight(node)) >
-                    current_schedule.step_second_max_work[current_step]) {
-
+            if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc]
+                && current_schedule.step_processor_work[current_step][current_proc]
+                       > current_schedule.step_second_max_work[current_step]) {
+                if ((current_schedule.step_max_work[current_step]
+                     - current_schedule.instance->getComputationalDag().vertex_work_weight(node))
+                    > current_schedule.step_second_max_work[current_step]) {
                     const double gain = current_schedule.instance->getComputationalDag().vertex_work_weight(node);
 
                     node_gains[node][new_proc][2] += gain;
                     node_change_in_costs[node][new_proc][2] -= gain;
 
                 } else {
-
-                    const double gain = current_schedule.step_max_work[current_step] -
-                                        current_schedule.step_second_max_work[current_step];
+                    const double gain
+                        = current_schedule.step_max_work[current_step] - current_schedule.step_second_max_work[current_step];
 
                     node_gains[node][new_proc][2] += gain;
                     node_change_in_costs[node][new_proc][2] -= gain;
                 }
             }
         } else {
-
             node_gains[node][new_proc][2] = std::numeric_limits<double>::lowest();
         }
     }
 
-    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node, unsigned current_step, unsigned current_proc,
-                                   unsigned new_proc) = 0;
+    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node, unsigned current_step, unsigned current_proc, unsigned new_proc) = 0;
 
     void update_node_gains(const std::unordered_set<VertexType> &nodes) {
-
         for (const auto &node : nodes) {
-
             compute_node_gain(node);
             compute_max_gain_insert_or_update_heap(node);
         }
     };
 
     kl_move<Graph_t> find_best_move() {
-
         const unsigned local_max = 50;
         std::vector<VertexType> max_nodes(local_max);
         unsigned count = 0;
         for (auto iter = max_gain_heap.ordered_begin(); iter != max_gain_heap.ordered_end(); ++iter) {
-
             if (iter->gain >= max_gain_heap.top().gain && count < local_max) {
                 max_nodes[count] = (iter->node);
                 count++;
@@ -663,7 +579,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     kl_move<Graph_t> compute_best_move(VertexType node) {
-
         double node_max_gain = std::numeric_limits<double>::lowest();
         double node_change_in_cost = 0;
         unsigned node_best_step = 0;
@@ -673,14 +588,11 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         double proc_max = 0;
         unsigned best_step = 0;
         for (unsigned proc = 0; proc < num_procs; proc++) {
-
             unsigned rand_count = 0;
 
-            if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 &&
-                current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
-
+            if (current_schedule.vector_schedule.assignedSuperstep(node) > 0
+                && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
                 if (node_gains[node][proc][0] > node_gains[node][proc][1]) {
-
                     if (node_gains[node][proc][0] > node_gains[node][proc][2]) {
                         proc_max = node_gains[node][proc][0];
                         proc_change_in_cost = node_change_in_costs[node][proc][0];
@@ -693,45 +605,36 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                     }
 
                 } else {
-
                     if (node_gains[node][proc][1] > node_gains[node][proc][2]) {
-
                         proc_max = node_gains[node][proc][1];
                         proc_change_in_cost = node_change_in_costs[node][proc][1];
                         best_step = 1;
                     } else {
-
                         proc_max = node_gains[node][proc][2];
                         proc_change_in_cost = node_change_in_costs[node][proc][2];
                         best_step = 2;
                     }
                 }
 
-            } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 &&
-                       current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
-
+            } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0
+                       && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
                 if (node_gains[node][proc][2] > node_gains[node][proc][1]) {
-
                     proc_max = node_gains[node][proc][2];
                     proc_change_in_cost = node_change_in_costs[node][proc][2];
                     best_step = 2;
                 } else {
-
                     proc_max = node_gains[node][proc][1];
                     proc_change_in_cost = node_change_in_costs[node][proc][1];
                     best_step = 1;
                 }
 
-            } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 &&
-                       current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) {
-
+            } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0
+                       && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) {
                 if (node_gains[node][proc][1] > node_gains[node][proc][0]) {
-
                     proc_max = node_gains[node][proc][1];
                     proc_change_in_cost = node_change_in_costs[node][proc][1];
                     best_step = 1;
                 } else {
-
                     proc_max = node_gains[node][proc][0];
                     proc_change_in_cost = node_change_in_costs[node][proc][0];
                     best_step = 0;
@@ -743,7 +646,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
 
             if (node_max_gain < proc_max) {
-
                 node_max_gain = proc_max;
                 node_change_in_cost = proc_change_in_cost;
                 node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1;
@@ -751,7 +653,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 rand_count = 0;
 
             } else if (node_max_gain <= proc_max) {
-
                 if (rand() % (2 + rand_count) == 0) {
                     node_max_gain = proc_max;
                     node_change_in_cost = proc_change_in_cost;
@@ -762,13 +663,16 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
         }
 
-        return kl_move<Graph_t>(
-            node, node_max_gain, node_change_in_cost, current_schedule.vector_schedule.assignedProcessor(node),
-            current_schedule.vector_schedule.assignedSuperstep(node), node_best_proc, node_best_step);
+        return kl_move<Graph_t>(node,
+                                node_max_gain,
+                                node_change_in_cost,
+                                current_schedule.vector_schedule.assignedProcessor(node),
+                                current_schedule.vector_schedule.assignedSuperstep(node),
+                                node_best_proc,
+                                node_best_step);
     }
 
     kl_move<Graph_t> best_move_change_superstep(VertexType node) {
-
         double node_max_gain = std::numeric_limits<double>::lowest();
         double node_change_in_cost = 0;
         unsigned node_best_step = 0;
@@ -778,10 +682,8 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         double proc_max = 0;
         unsigned best_step = 0;
         for (unsigned proc = 0; proc < num_procs; proc++) {
-
-            if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 &&
-                current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
-
+            if (current_schedule.vector_schedule.assignedSuperstep(node) > 0
+                && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
                 if (node_gains[node][proc][0] > node_gains[node][proc][2]) {
                     proc_max = node_gains[node][proc][0];
                     proc_change_in_cost = node_change_in_costs[node][proc][0];
@@ -793,16 +695,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                     best_step = 2;
                 }
 
-            } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 &&
-                       current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
-
+            } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0
+                       && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) {
                 proc_max = node_gains[node][proc][2];
                 proc_change_in_cost = node_change_in_costs[node][proc][2];
                 best_step = 2;
 
-            } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 &&
-                       current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) {
-
+            } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0
+                       && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) {
                 proc_max = node_gains[node][proc][0];
                 proc_change_in_cost = node_change_in_costs[node][proc][0];
                 best_step = 0;
@@ -812,7 +712,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
 
             if (node_max_gain < proc_max) {
-
                 node_max_gain = proc_max;
                 node_change_in_cost = proc_change_in_cost;
                 node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1;
@@ -820,15 +719,17 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
         }
 
-        return kl_move<Graph_t>(
-            node, node_max_gain, node_change_in_cost, current_schedule.vector_schedule.assignedProcessor(node),
-            current_schedule.vector_schedule.assignedSuperstep(node), node_best_proc, node_best_step);
+        return kl_move<Graph_t>(node,
+                                node_max_gain,
+                                node_change_in_cost,
+                                current_schedule.vector_schedule.assignedProcessor(node),
+                                current_schedule.vector_schedule.assignedSuperstep(node),
+                                node_best_proc,
+                                node_best_step);
     }
 
     void save_best_schedule(const IBspSchedule<Graph_t> &schedule) {
-
         for (const auto &node : current_schedule.instance->vertices()) {
-
             best_schedule->setAssignedProcessor(node, schedule.assignedProcessor(node));
             best_schedule->setAssignedSuperstep(node, schedule.assignedSuperstep(node));
         }
@@ -843,12 +744,11 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     std::unordered_set<VertexType> node_selection;
 
     void select_nodes() {
-
         if (parameters.select_all_nodes) {
-
             for (const auto &node : current_schedule.instance->vertices()) {
-                if (super_locked_nodes.find(node) == super_locked_nodes.end())
+                if (super_locked_nodes.find(node) == super_locked_nodes.end()) {
                     node_selection.insert(node);
+                }
             }
 
         } else {
@@ -857,28 +757,22 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     virtual void select_nodes_comm() {
-
         for (const auto &node : current_schedule.instance->vertices()) {
-
             if (super_locked_nodes.find(node) != super_locked_nodes.end()) {
                 continue;
             }
 
             for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) {
-
-                if (current_schedule.vector_schedule.assignedProcessor(node) !=
-                    current_schedule.vector_schedule.assignedProcessor(source)) {
-
+                if (current_schedule.vector_schedule.assignedProcessor(node)
+                    != current_schedule.vector_schedule.assignedProcessor(source)) {
                     node_selection.insert(node);
                     break;
                 }
             }
 
             for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) {
-
-                if (current_schedule.vector_schedule.assignedProcessor(node) !=
-                    current_schedule.vector_schedule.assignedProcessor(target)) {
-
+                if (current_schedule.vector_schedule.assignedProcessor(node)
+                    != current_schedule.vector_schedule.assignedProcessor(target)) {
                     node_selection.insert(node);
                     break;
                 }
@@ -887,11 +781,9 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void select_nodes_threshold(std::size_t threshold) {
-
         std::uniform_int_distribution<vertex_idx_t<Graph_t>> dis(0, num_nodes - 1);
 
         while (node_selection.size() < threshold) {
-
             auto node = dis(gen);
 
             if (super_locked_nodes.find(node) == super_locked_nodes.end()) {
@@ -901,28 +793,25 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void select_nodes_permutation_threshold(std::size_t threshold) {
-
         std::vector<VertexType> permutation(num_nodes);
         std::iota(std::begin(permutation), std::end(permutation), 0);
 
         std::shuffle(permutation.begin(), permutation.end(), gen);
 
         for (std::size_t i = 0; i < threshold; i++) {
-
-            if (super_locked_nodes.find(permutation[i]) == super_locked_nodes.end())
+            if (super_locked_nodes.find(permutation[i]) == super_locked_nodes.end()) {
                 node_selection.insert(permutation[i]);
+            }
         }
     }
 
     void select_nodes_violations() {
-
         if (current_schedule.current_violations.empty()) {
             select_nodes();
             return;
         }
 
         for (const auto &edge : current_schedule.current_violations) {
-
             const auto &source_v = source(edge, current_schedule.instance->getComputationalDag());
             const auto &target_v = target(edge, current_schedule.instance->getComputationalDag());
 
@@ -956,9 +845,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void select_nodes_conseque_max_work(bool do_not_select_super_locked_nodes = false) {
-
         if (step_selection_epoch_counter > parameters.max_step_selection_epochs) {
-
 #ifdef KL_DEBUG
             std::cout << "step selection epoch counter exceeded. conseque max work" << std::endl;
 #endif
@@ -973,7 +860,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         unsigned second_max_step = 0;
 
         for (unsigned proc = 0; proc < num_procs; proc++) {
-
             if (current_schedule.step_processor_work[step_selection_counter][proc] > max_work_step) {
                 second_max_work_step = max_work_step;
                 second_max_step = max_step;
@@ -986,35 +872,31 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
         }
 
-        if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].size() <
-            parameters.selection_threshold * .66) {
-
-            node_selection.insert(
-                current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].begin(),
-                current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].end());
+        if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].size()
+            < parameters.selection_threshold * .66) {
+            node_selection.insert(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].begin(),
+                                  current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].end());
 
         } else {
-
             std::sample(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].begin(),
                         current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].end(),
                         std::inserter(node_selection, node_selection.end()),
-                        static_cast<unsigned>(std::round(parameters.selection_threshold * .66)), gen);
+                        static_cast<unsigned>(std::round(parameters.selection_threshold * .66)),
+                        gen);
         }
 
-        if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].size() <
-            parameters.selection_threshold * .33) {
-
+        if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].size()
+            < parameters.selection_threshold * .33) {
             node_selection.insert(
                 current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].begin(),
                 current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].end());
 
         } else {
-
-            std::sample(
-                current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].begin(),
-                current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].end(),
-                std::inserter(node_selection, node_selection.end()),
-                static_cast<unsigned>(std::round(parameters.selection_threshold * .33)), gen);
+            std::sample(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].begin(),
+                        current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].end(),
+                        std::inserter(node_selection, node_selection.end()),
+                        static_cast<unsigned>(std::round(parameters.selection_threshold * .33)),
+                        gen);
         }
 
         if (do_not_select_super_locked_nodes) {
@@ -1037,9 +919,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void select_nodes_check_remove_superstep() {
-
         if (step_selection_epoch_counter > parameters.max_step_selection_epochs) {
-
 #ifdef KL_DEBUG
             std::cout << "step selection epoch counter exceeded, remove supersteps" << std::endl;
 #endif
@@ -1048,24 +928,18 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             return;
         }
 
-        for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps();
-             step_to_remove++) {
-
+        for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps(); step_to_remove++) {
 #ifdef KL_DEBUG
-            std::cout << "checking step to remove " << step_to_remove << " / " << current_schedule.num_steps()
-                      << std::endl;
+            std::cout << "checking step to remove " << step_to_remove << " / " << current_schedule.num_steps() << std::endl;
 #endif
 
             if (check_remove_superstep(step_to_remove)) {
-
 #ifdef KL_DEBUG
                 std::cout << "trying to remove superstep " << step_to_remove << std::endl;
 #endif
 
                 if (scatter_nodes_remove_superstep(step_to_remove)) {
-
                     for (unsigned proc = 0; proc < num_procs; proc++) {
-
                         if (step_to_remove < current_schedule.num_steps()) {
                             node_selection.insert(
                                 current_schedule.set_schedule.step_processor_vertices[step_to_remove][proc].begin(),
@@ -1114,7 +988,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     bool reset_superstep = false;
 
     virtual bool check_remove_superstep(unsigned step) {
-
         if (current_schedule.num_steps() <= 2) {
             return false;
         }
@@ -1122,7 +995,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         v_workw_t<Graph_t> total_work = 0;
 
         for (unsigned proc = 0; proc < num_procs; proc++) {
-
             total_work += current_schedule.step_processor_work[step][proc];
         }
 
@@ -1133,7 +1005,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     bool scatter_nodes_remove_superstep(unsigned step) {
-
         assert(step < current_schedule.num_steps());
 
         std::vector<kl_move<Graph_t>> moves;
@@ -1142,7 +1013,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
         for (unsigned proc = 0; proc < num_procs; proc++) {
             for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) {
-
                 compute_node_gain(node);
                 moves.push_back(best_move_change_superstep(node));
 
@@ -1152,10 +1022,8 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 }
 
                 if constexpr (current_schedule.use_memory_constraint) {
-                    current_schedule.memory_constraint.apply_move(node, proc, step, moves.back().to_proc,
-                                                                  moves.back().to_step);
+                    current_schedule.memory_constraint.apply_move(node, proc, step, moves.back().to_proc, moves.back().to_step);
                 }
-               
             }
 
             if (abort) {
@@ -1168,8 +1036,9 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
 #ifdef KL_DEBUG
             BspSchedule<Graph_t> tmp_schedule(current_schedule.set_schedule);
-            if (not tmp_schedule.satisfiesMemoryConstraints())
+            if (not tmp_schedule.satisfiesMemoryConstraints()) {
                 std::cout << "Mem const violated" << std::endl;
+            }
 #endif
 
             return false;
@@ -1180,10 +1049,8 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         }
 
         for (const auto &move : moves) {
-
 #ifdef KL_DEBUG
-            std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step
-                      << std::endl;
+            std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step << std::endl;
 #endif
 
             current_schedule.vector_schedule.setAssignedSuperstep(move.node, move.to_step);
@@ -1195,17 +1062,16 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
 #ifdef KL_DEBUG
         BspSchedule<Graph_t> tmp_schedule(current_schedule.set_schedule);
-        if (not tmp_schedule.satisfiesMemoryConstraints())
+        if (not tmp_schedule.satisfiesMemoryConstraints()) {
             std::cout << "Mem const violated" << std::endl;
+        }
 #endif
 
         return true;
     }
 
     void select_nodes_check_reset_superstep() {
-
         if (step_selection_epoch_counter > parameters.max_step_selection_epochs) {
-
 #ifdef KL_DEBUG
             std::cout << "step selection epoch counter exceeded, reset supersteps" << std::endl;
 #endif
@@ -1214,24 +1080,18 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             return;
         }
 
-        for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps();
-             step_to_remove++) {
-
+        for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps(); step_to_remove++) {
 #ifdef KL_DEBUG
-            std::cout << "checking step to reset " << step_to_remove << " / " << current_schedule.num_steps()
-                      << std::endl;
+            std::cout << "checking step to reset " << step_to_remove << " / " << current_schedule.num_steps() << std::endl;
 #endif
 
             if (check_reset_superstep(step_to_remove)) {
-
 #ifdef KL_DEBUG
                 std::cout << "trying to reset superstep " << step_to_remove << std::endl;
 #endif
 
                 if (scatter_nodes_reset_superstep(step_to_remove)) {
-
                     for (unsigned proc = 0; proc < num_procs; proc++) {
-
                         if (step_to_remove < current_schedule.num_steps() - 1) {
                             node_selection.insert(
                                 current_schedule.set_schedule.step_processor_vertices[step_to_remove + 1][proc].begin(),
@@ -1273,7 +1133,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     virtual bool check_reset_superstep(unsigned step) {
-
         if (current_schedule.num_steps() <= 2) {
             return false;
         }
@@ -1291,14 +1150,13 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #ifdef KL_DEBUG
 
         std::cout << " avg "
-                  << static_cast<double>(total_work) /
-                         static_cast<double>(current_schedule.instance->numberOfProcessors())
+                  << static_cast<double>(total_work) / static_cast<double>(current_schedule.instance->numberOfProcessors())
                   << " max " << max_total_work << " min " << min_total_work << std::endl;
 #endif
 
-        if (static_cast<double>(total_work) / static_cast<double>(current_schedule.instance->numberOfProcessors()) -
-                static_cast<double>(min_total_work) >
-            0.1 * static_cast<double>(min_total_work)) {
+        if (static_cast<double>(total_work) / static_cast<double>(current_schedule.instance->numberOfProcessors())
+                - static_cast<double>(min_total_work)
+            > 0.1 * static_cast<double>(min_total_work)) {
             return true;
         }
 
@@ -1306,7 +1164,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     bool scatter_nodes_reset_superstep(unsigned step) {
-
         assert(step < current_schedule.num_steps());
 
         std::vector<kl_move<Graph_t>> moves;
@@ -1315,7 +1172,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
         for (unsigned proc = 0; proc < num_procs; proc++) {
             for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) {
-
                 compute_node_gain(node);
                 moves.push_back(best_move_change_superstep(node));
 
@@ -1325,9 +1181,9 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 }
 
                 if constexpr (current_schedule.use_memory_constraint) {
-                    current_schedule.memory_constraint.apply_forward_move(node, proc, step, moves.back().to_proc,
-                                                                          moves.back().to_step);
-                }               
+                    current_schedule.memory_constraint.apply_forward_move(
+                        node, proc, step, moves.back().to_proc, moves.back().to_step);
+                }
             }
 
             if (abort) {
@@ -1336,7 +1192,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         }
 
         if (abort) {
-
             current_schedule.recompute_neighboring_supersteps(step);
             return false;
         }
@@ -1346,10 +1201,8 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         }
 
         for (const auto &move : moves) {
-
 #ifdef KL_DEBUG
-            std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step
-                      << std::endl;
+            std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step << std::endl;
 #endif
 
             current_schedule.vector_schedule.setAssignedSuperstep(move.node, move.to_step);
@@ -1363,20 +1216,15 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void select_unlock_neighbors(VertexType node) {
-
         for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) {
-
             if (check_node_unlocked(target)) {
-
                 node_selection.insert(target);
                 nodes_to_update.insert(target);
             }
         }
 
         for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) {
-
             if (check_node_unlocked(source)) {
-
                 node_selection.insert(source);
                 nodes_to_update.insert(source);
             }
@@ -1384,47 +1232,39 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     void set_parameters() {
-
         if (num_nodes < 250) {
-
             parameters.max_outer_iterations = 300;
 
             parameters.select_all_nodes = true;
             parameters.selection_threshold = num_nodes;
 
         } else if (num_nodes < 1000) {
-
             parameters.max_outer_iterations = static_cast<unsigned>(num_nodes / 2);
 
             parameters.select_all_nodes = true;
             parameters.selection_threshold = num_nodes;
 
         } else if (num_nodes < 5000) {
-
             parameters.max_outer_iterations = 4 * static_cast<unsigned>(std::sqrt(num_nodes));
 
             parameters.selection_threshold = num_nodes / 3;
 
         } else if (num_nodes < 10000) {
-
             parameters.max_outer_iterations = 3 * static_cast<unsigned>(std::sqrt(num_nodes));
 
             parameters.selection_threshold = num_nodes / 3;
 
         } else if (num_nodes < 50000) {
-
             parameters.max_outer_iterations = static_cast<unsigned>(std::sqrt(num_nodes));
 
             parameters.selection_threshold = num_nodes / 5;
 
         } else if (num_nodes < 100000) {
-
             parameters.max_outer_iterations = 2 * static_cast<unsigned>(std::log(num_nodes));
 
             parameters.selection_threshold = num_nodes / 10;
 
         } else {
-
             parameters.max_outer_iterations = static_cast<unsigned>(std::min(10000.0, std::log(num_nodes)));
 
             parameters.selection_threshold = num_nodes / 10;
@@ -1444,15 +1284,15 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
         }
 
 #ifdef KL_DEBUG
-        if (parameters.select_all_nodes)
+        if (parameters.select_all_nodes) {
             std::cout << "KLBase set parameters, select all nodes" << std::endl;
-        else
+        } else {
             std::cout << "KLBase set parameters, selection threshold: " << parameters.selection_threshold << std::endl;
+        }
 #endif
     }
 
     virtual void cleanup_datastructures() {
-
         node_change_in_costs.clear();
         node_gains.clear();
 
@@ -1472,7 +1312,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 
     bool run_local_search_without_violations() {
-
         penalty = std::numeric_limits<double>::max() * .24;
 
         double initial_costs = current_schedule.current_cost;
@@ -1497,42 +1336,37 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             unsigned inner_counter = 0;
 
             while (failed_branches < 3 && inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) {
-
                 inner_counter++;
 
                 const double iter_costs = current_schedule.current_cost;
 
-                kl_move<Graph_t> best_move = find_best_move(); // O(log n)
+                kl_move<Graph_t> best_move = find_best_move();    // O(log n)
 
                 if (best_move.gain < -std::numeric_limits<double>::max() * .25) {
                     continue;
                 }
 
-                current_schedule.apply_move(best_move); // O(p + log n)
+                current_schedule.apply_move(best_move);    // O(p + log n)
 
                 locked_nodes.insert(best_move.node);
 
 #ifdef KL_DEBUG
                 double tmp_costs = current_schedule.current_cost;
                 if (tmp_costs != compute_current_costs()) {
-
-                    std::cout << "current costs: " << current_schedule.current_cost
-                              << " best move gain: " << best_move.gain
-                              << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs
-                              << std::endl;
+                    std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain
+                              << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl;
 
                     std::cout << "! costs not equal " << std::endl;
                 }
 #endif
 
                 if (best_move.change_in_cost > 0 && current_schedule.current_feasible) {
-
                     if (best_schedule_costs > iter_costs) {
 #ifdef KL_DEBUG
                         std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl;
 #endif
                         best_schedule_costs = iter_costs;
-                        save_best_schedule(current_schedule.vector_schedule); // O(n)
+                        save_best_schedule(current_schedule.vector_schedule);    // O(n)
                         reverse_move_best_schedule(best_move);
                     }
                 }
@@ -1545,8 +1379,8 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
 #ifdef KL_DEBUG
                 std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost "
-                          << best_move.change_in_cost << " from step " << best_move.from_step << " to "
-                          << best_move.to_step << ", from proc " << best_move.from_proc << " to " << best_move.to_proc
+                          << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step
+                          << ", from proc " << best_move.from_proc << " to " << best_move.to_proc
                           << " violations: " << current_schedule.current_violations.size() << " cost "
                           << current_schedule.current_cost << std::endl;
 #endif
@@ -1554,11 +1388,10 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 // if (not current_schedule.current_feasible) {
 
                 if (current_schedule.current_cost > (1.04 + outer_counter * 0.002) * best_schedule_costs) {
-
 #ifdef KL_DEBUG
                     std::cout << "current cost " << current_schedule.current_cost
-                              << " too far away from best schedule costs: " << best_schedule_costs
-                              << " rollback to best schedule" << std::endl;
+                              << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule"
+                              << std::endl;
 #endif
 
                     current_schedule.set_current_schedule(*best_schedule);
@@ -1570,13 +1403,12 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 }
                 //}
 
-            } // while
+            }    // while
 
 #ifdef KL_DEBUG
             std::cout << "end inner loop current cost: " << current_schedule.current_cost << " with "
-                      << current_schedule.current_violations.size()
-                      << " violation, best sol cost: " << best_schedule_costs << " with "
-                      << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/"
+                      << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs
+                      << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/"
                       << parameters.max_outer_iterations << std::endl;
 #endif
 
@@ -1602,7 +1434,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #endif
 
             if (compute_with_time_limit) {
-
                 auto finish_time = std::chrono::high_resolution_clock::now();
                 auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
@@ -1611,18 +1442,18 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 }
             }
 
-        } // for
+        }    // for
 
         cleanup_datastructures();
 
-        if (initial_costs > current_schedule.current_cost)
+        if (initial_costs > current_schedule.current_cost) {
             return true;
-        else
+        } else {
             return false;
+        }
     }
 
     bool run_local_search_simple() {
-
         set_initial_reward_penalty();
 
         const double initial_costs = current_schedule.current_cost;
@@ -1653,15 +1484,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
             unsigned inner_counter = 0;
 
-            while (failed_branches < parameters.max_num_failed_branches &&
-                   inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) {
-
+            while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations
+                   && max_gain_heap.size() > 0) {
                 inner_counter++;
 
                 const bool iter_feasible = current_schedule.current_feasible;
                 const double iter_costs = current_schedule.current_cost;
 
-                kl_move<Graph_t> best_move = find_best_move(); // O(log n)
+                kl_move<Graph_t> best_move = find_best_move();    // O(log n)
 
                 if (best_move.gain < -std::numeric_limits<double>::max() * .25) {
 #ifdef KL_DEBUG
@@ -1672,13 +1502,13 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
 #ifdef KL_DEBUG
                 std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost "
-                          << best_move.change_in_cost << " from step " << best_move.from_step << " to "
-                          << best_move.to_step << ", from proc " << best_move.from_proc << " to " << best_move.to_proc
+                          << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step
+                          << ", from proc " << best_move.from_proc << " to " << best_move.to_proc
                           << " violations: " << current_schedule.current_violations.size() << " cost "
                           << current_schedule.current_cost << std::endl;
 #endif
 
-                current_schedule.apply_move(best_move); // O(p + log n)
+                current_schedule.apply_move(best_move);    // O(p + log n)
 
                 update_reward_penalty();
                 locked_nodes.insert(best_move.node);
@@ -1686,18 +1516,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #ifdef KL_DEBUG
                 double tmp_costs = current_schedule.current_cost;
                 if (tmp_costs != compute_current_costs()) {
-
-                    std::cout << "current costs: " << current_schedule.current_cost
-                              << " best move gain: " << best_move.gain
-                              << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs
-                              << std::endl;
+                    std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain
+                              << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl;
 
                     std::cout << "! costs not equal " << std::endl;
                 }
 #endif
 
                 if (iter_feasible != current_schedule.current_feasible) {
-
                     if (iter_feasible) {
 #ifdef KL_DEBUG
                         std::cout << "===> current schedule changed from feasible to infeasible" << std::endl;
@@ -1710,7 +1536,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                             std::cout << "save best schedule with costs " << iter_costs << std::endl;
 #endif
                             best_schedule_costs = iter_costs;
-                            save_best_schedule(current_schedule.vector_schedule); // O(n)
+                            save_best_schedule(current_schedule.vector_schedule);    // O(n)
                             reverse_move_best_schedule(best_move);
                         }
 
@@ -1720,13 +1546,12 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #endif
                     }
                 } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) {
-
                     if (iter_costs < best_schedule_costs) {
 #ifdef KL_DEBUG
                         std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl;
 #endif
                         best_schedule_costs = iter_costs;
-                        save_best_schedule(current_schedule.vector_schedule); // O(n)
+                        save_best_schedule(current_schedule.vector_schedule);    // O(n)
                         reverse_move_best_schedule(best_move);
                     }
                 }
@@ -1736,28 +1561,26 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 select_unlock_neighbors(best_move.node);
 
                 if (check_violation_locked()) {
-
                     if (iter_feasible != current_schedule.current_feasible && iter_feasible) {
                         node_causing_first_violation = best_move.node;
                     }
                     super_locked_nodes.insert(node_causing_first_violation);
 #ifdef KL_DEBUG
-                    std::cout << "abort iteration on locked violation, super locking node "
-                              << node_causing_first_violation << std::endl;
+                    std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation
+                              << std::endl;
 #endif
                     break;
                 }
 
                 update_node_gains(nodes_to_update);
 
-                if (current_schedule.current_cost > (parameters.max_div_best_sol_base_percent +
-                                                     outer_counter * parameters.max_div_best_sol_rate_percent) *
-                                                        best_schedule_costs) {
-
+                if (current_schedule.current_cost
+                    > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent)
+                          * best_schedule_costs) {
 #ifdef KL_DEBUG
                     std::cout << "current cost " << current_schedule.current_cost
-                              << " too far away from best schedule costs: " << best_schedule_costs
-                              << " rollback to best schedule" << std::endl;
+                              << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule"
+                              << std::endl;
 #endif
 
                     current_schedule.set_current_schedule(*best_schedule);
@@ -1768,13 +1591,12 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                     failed_branches++;
                 }
 
-            } // while
+            }    // while
 
 #ifdef KL_DEBUG
             std::cout << "end inner loop current cost: " << current_schedule.current_cost << " with "
-                      << current_schedule.current_violations.size()
-                      << " violation, best sol cost: " << best_schedule_costs << " with "
-                      << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/"
+                      << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs
+                      << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/"
                       << parameters.max_outer_iterations << std::endl;
 #endif
 
@@ -1835,24 +1657,23 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 improvement_counter = 0;
             }
 
-        } // for
+        }    // for
 
         cleanup_datastructures();
 
 #ifdef KL_DEBUG
-        std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs"
-                  << std::endl;
+        std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs" << std::endl;
         assert(best_schedule->satisfiesPrecedenceConstraints());
 #endif
 
-        if (initial_costs > current_schedule.current_cost)
+        if (initial_costs > current_schedule.current_cost) {
             return true;
-        else
+        } else {
             return false;
+        }
     }
 
     bool run_local_search_remove_supersteps() {
-
         const double initial_costs = current_schedule.current_cost;
 
 #ifdef KL_DEBUG
@@ -1871,8 +1692,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
         for (unsigned outer_counter = 0; outer_counter < parameters.max_outer_iterations; outer_counter++) {
 #ifdef KL_DEBUG
-            std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost
-                      << std::endl;
+            std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost << std::endl;
             if (max_gain_heap.size() == 0) {
                 std::cout << "max gain heap empty" << std::endl;
             }
@@ -1887,15 +1707,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
             unsigned inner_counter = 0;
 
-            while (failed_branches < parameters.max_num_failed_branches &&
-                   inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) {
-
+            while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations
+                   && max_gain_heap.size() > 0) {
                 inner_counter++;
 
                 const bool iter_feasible = current_schedule.current_feasible;
                 const double iter_costs = current_schedule.current_cost;
 
-                kl_move<Graph_t> best_move = find_best_move(); // O(log n)
+                kl_move<Graph_t> best_move = find_best_move();    // O(log n)
 
                 if (best_move.gain < -std::numeric_limits<double>::max() * .25) {
 #ifdef KL_DEBUG
@@ -1906,14 +1725,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
 #ifdef KL_DEBUG
                 std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost "
-                          << best_move.change_in_cost << " from step " << best_move.from_step << " to "
-                          << best_move.to_step << ", from proc " << best_move.from_proc << " to " << best_move.to_proc
+                          << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step
+                          << ", from proc " << best_move.from_proc << " to " << best_move.to_proc
                           << " violations: " << current_schedule.current_violations.size() << " old cost "
                           << current_schedule.current_cost << " new cost "
                           << current_schedule.current_cost + best_move.change_in_cost << std::endl;
 #endif
 
-                current_schedule.apply_move(best_move); // O(p + log n)
+                current_schedule.apply_move(best_move);    // O(p + log n)
 
                 update_reward_penalty();
                 locked_nodes.insert(best_move.node);
@@ -1921,18 +1740,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #ifdef KL_DEBUG
                 double tmp_costs = current_schedule.current_cost;
                 if (tmp_costs != compute_current_costs()) {
-
-                    std::cout << "current costs: " << current_schedule.current_cost
-                              << " best move gain: " << best_move.gain
-                              << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs
-                              << std::endl;
+                    std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain
+                              << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl;
 
                     std::cout << "! costs not equal " << std::endl;
                 }
 #endif
 
                 if (iter_feasible != current_schedule.current_feasible) {
-
                     if (iter_feasible) {
 #ifdef KL_DEBUG
                         std::cout << "===> current schedule changed from feasible to infeasible" << std::endl;
@@ -1945,7 +1760,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                             std::cout << "save best schedule with costs " << iter_costs << std::endl;
 #endif
                             best_schedule_costs = iter_costs;
-                            save_best_schedule(current_schedule.vector_schedule); // O(n)
+                            save_best_schedule(current_schedule.vector_schedule);    // O(n)
                             reverse_move_best_schedule(best_move);
 #ifdef KL_DEBUG
                             std::cout << "KLBase save best schedule with (source node comm) cost "
@@ -1960,13 +1775,12 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #endif
                     }
                 } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) {
-
                     if (iter_costs < best_schedule_costs) {
 #ifdef KL_DEBUG
                         std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl;
 #endif
                         best_schedule_costs = iter_costs;
-                        save_best_schedule(current_schedule.vector_schedule); // O(n)
+                        save_best_schedule(current_schedule.vector_schedule);    // O(n)
                         reverse_move_best_schedule(best_move);
 #ifdef KL_DEBUG
                         std::cout << "KLBase save best schedule with (source node comm) cost "
@@ -1981,28 +1795,26 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 select_unlock_neighbors(best_move.node);
 
                 if (check_violation_locked()) {
-
                     if (iter_feasible != current_schedule.current_feasible && iter_feasible) {
                         node_causing_first_violation = best_move.node;
                     }
                     super_locked_nodes.insert(node_causing_first_violation);
 #ifdef KL_DEBUG
-                    std::cout << "abort iteration on locked violation, super locking node "
-                              << node_causing_first_violation << std::endl;
+                    std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation
+                              << std::endl;
 #endif
                     break;
                 }
 
                 update_node_gains(nodes_to_update);
 
-                if (current_schedule.current_cost > (parameters.max_div_best_sol_base_percent +
-                                                     outer_counter * parameters.max_div_best_sol_rate_percent) *
-                                                        best_schedule_costs) {
-
+                if (current_schedule.current_cost
+                    > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent)
+                          * best_schedule_costs) {
 #ifdef KL_DEBUG
                     std::cout << "current cost " << current_schedule.current_cost
-                              << " too far away from best schedule costs: " << best_schedule_costs
-                              << " rollback to best schedule" << std::endl;
+                              << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule"
+                              << std::endl;
 #endif
 
                     current_schedule.set_current_schedule(*best_schedule);
@@ -2017,13 +1829,12 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                     failed_branches++;
                 }
 
-            } // while
+            }    // while
 
 #ifdef KL_DEBUG
-            std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost
-                      << " with " << current_schedule.current_violations.size()
-                      << " violation, best sol cost: " << best_schedule_costs << " with "
-                      << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/"
+            std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost << " with "
+                      << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs
+                      << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/"
                       << parameters.max_outer_iterations << std::endl;
 #endif
 
@@ -2072,11 +1883,9 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             }
 
             if (best_iter_costs <= current_schedule.current_cost) {
-
                 no_improvement_iter_counter++;
 
                 if (no_improvement_iter_counter > parameters.reset_epoch_counter_threshold) {
-
                     step_selection_epoch_counter = 0;
                     parameters.reset_epoch_counter_threshold += current_schedule.num_steps();
 #ifdef KL_DEBUG
@@ -2087,7 +1896,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 }
 
                 if (no_improvement_iter_counter > 10) {
-
                     parameters.initial_penalty = 0.0;
                     parameters.violations_threshold = 3;
 #ifdef KL_DEBUG
@@ -2097,7 +1905,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 }
 
                 if (no_improvement_iter_counter == 35) {
-
                     parameters.max_div_best_sol_base_percent *= 1.02;
 #ifdef KL_DEBUG
                     std::cout << "no improvement for " << no_improvement_iter_counter
@@ -2117,28 +1924,28 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 no_improvement_iter_counter = 0;
             }
 
-        } // for
+        }    // for
 
         cleanup_datastructures();
 
 #ifdef KL_DEBUG
-        std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs"
-                  << std::endl;
+        std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs" << std::endl;
         assert(best_schedule->satisfiesPrecedenceConstraints());
 #endif
 
-        if (initial_costs > current_schedule.current_cost)
+        if (initial_costs > current_schedule.current_cost) {
             return true;
-        else
+        } else {
             return false;
+        }
     }
 
     bool run_local_search_unlock_delay() {
-
         const double initial_costs = current_schedule.current_cost;
 
 #ifdef KL_DEBUG_1
-        std::cout << "Initial costs " << initial_costs << " with " << best_schedule->numberOfSupersteps() << " supersteps."<< std::endl;
+        std::cout << "Initial costs " << initial_costs << " with " << best_schedule->numberOfSupersteps() << " supersteps."
+                  << std::endl;
 #endif
 
 #ifdef KL_PRINT_SCHEDULE
@@ -2157,8 +1964,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
         for (unsigned outer_counter = 0; outer_counter < parameters.max_outer_iterations; outer_counter++) {
 #ifdef KL_DEBUG
-            std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost
-                      << std::endl;
+            std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost << std::endl;
             if (max_gain_heap.size() == 0) {
                 std::cout << "max gain heap empty" << std::endl;
             }
@@ -2173,9 +1979,8 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
             unsigned inner_counter = 0;
 
-            while (failed_branches < parameters.max_num_failed_branches &&
-                   inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) {
-
+            while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations
+                   && max_gain_heap.size() > 0) {
                 inner_counter++;
 
                 const bool iter_feasible = current_schedule.current_feasible;
@@ -2183,7 +1988,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #ifdef KL_DEBUG
                 print_heap();
 #endif
-                kl_move<Graph_t> best_move = find_best_move(); // O(log n)
+                kl_move<Graph_t> best_move = find_best_move();    // O(log n)
 
                 if (best_move.gain < -std::numeric_limits<double>::max() * .25) {
 #ifdef KL_DEBUG
@@ -2194,23 +1999,22 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
 #ifdef KL_DEBUG
                 std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost "
-                          << best_move.change_in_cost << " from step " << best_move.from_step << " to "
-                          << best_move.to_step << ", from proc " << best_move.from_proc << " to " << best_move.to_proc
+                          << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step
+                          << ", from proc " << best_move.from_proc << " to " << best_move.to_proc
                           << " violations: " << current_schedule.current_violations.size() << " old cost "
                           << current_schedule.current_cost << " new cost "
                           << current_schedule.current_cost + best_move.change_in_cost << std::endl;
 
                 if constexpr (current_schedule.use_memory_constraint) {
                     std::cout << "memory to step/proc "
-                              << current_schedule.memory_constraint
-                                     .step_processor_memory[best_move.to_step][best_move.to_proc]
+                              << current_schedule.memory_constraint.step_processor_memory[best_move.to_step][best_move.to_proc]
                               << std::endl;
                 }
 
                 printSetScheduleWorkMemNodesGrid(std::cout, current_schedule.set_schedule, true);
 #endif
 
-                current_schedule.apply_move(best_move); // O(p + log n)
+                current_schedule.apply_move(best_move);    // O(p + log n)
 
                 //             if (best_move.gain <= 0.000000001) {
                 //                 conseq_no_gain_moves_counter++;
@@ -2234,8 +2038,9 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
 #ifdef KL_DEBUG
                 BspSchedule<Graph_t> tmp_schedule(current_schedule.set_schedule);
-                if (not tmp_schedule.satisfiesMemoryConstraints())
+                if (not tmp_schedule.satisfiesMemoryConstraints()) {
                     std::cout << "Mem const violated" << std::endl;
+                }
 #endif
 
                 update_reward_penalty();
@@ -2244,18 +2049,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #ifdef KL_DEBUG
                 double tmp_costs = current_schedule.current_cost;
                 if (tmp_costs != compute_current_costs()) {
-
-                    std::cout << "current costs: " << current_schedule.current_cost
-                              << " best move gain: " << best_move.gain
-                              << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs
-                              << std::endl;
+                    std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain
+                              << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl;
 
                     std::cout << "! costs not equal " << std::endl;
                 }
 #endif
 
                 if (iter_feasible != current_schedule.current_feasible) {
-
                     if (iter_feasible) {
 #ifdef KL_DEBUG
                         std::cout << "===> current schedule changed from feasible to infeasible" << std::endl;
@@ -2268,7 +2069,7 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                             std::cout << "save best schedule with costs " << iter_costs << std::endl;
 #endif
                             best_schedule_costs = iter_costs;
-                            save_best_schedule(current_schedule.vector_schedule); // O(n)
+                            save_best_schedule(current_schedule.vector_schedule);    // O(n)
                             reverse_move_best_schedule(best_move);
 #ifdef KL_DEBUG
                             std::cout << "KLBase save best schedule with (source node comm) cost "
@@ -2283,13 +2084,12 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #endif
                     }
                 } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) {
-
                     if (iter_costs < best_schedule_costs) {
 #ifdef KL_DEBUG
                         std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl;
 #endif
                         best_schedule_costs = iter_costs;
-                        save_best_schedule(current_schedule.vector_schedule); // O(n)
+                        save_best_schedule(current_schedule.vector_schedule);    // O(n)
                         reverse_move_best_schedule(best_move);
 #ifdef KL_DEBUG
                         std::cout << "KLBase save best schedule with (source node comm) cost "
@@ -2325,14 +2125,13 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 select_unlock_neighbors(best_move.node);
 
                 if (check_violation_locked()) {
-
                     if (iter_feasible != current_schedule.current_feasible && iter_feasible) {
                         node_causing_first_violation = best_move.node;
                     }
                     super_locked_nodes.insert(node_causing_first_violation);
 #ifdef KL_DEBUG
-                    std::cout << "abort iteration on locked violation, super locking node "
-                              << node_causing_first_violation << std::endl;
+                    std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation
+                              << std::endl;
 #endif
                     break;
                 }
@@ -2347,11 +2146,9 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
                 update_node_gains(nodes_to_update);
 
-                if (not(current_schedule.current_violations.size() > 4) && not iter_feasible &&
-                    not max_gain_heap.empty()) {
+                if (not(current_schedule.current_violations.size() > 4) && not iter_feasible && not max_gain_heap.empty()) {
                     const auto &iter = max_gain_heap.ordered_begin();
                     if (iter->gain < parameters.gain_threshold) {
-
                         node_selection.clear();
                         locked_nodes.clear();
                         super_locked_nodes.clear();
@@ -2367,14 +2164,13 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                     }
                 }
 
-                if (current_schedule.current_cost > (parameters.max_div_best_sol_base_percent +
-                                                     outer_counter * parameters.max_div_best_sol_rate_percent) *
-                                                        best_schedule_costs) {
-
+                if (current_schedule.current_cost
+                    > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent)
+                          * best_schedule_costs) {
 #ifdef KL_DEBUG
                     std::cout << "current cost " << current_schedule.current_cost
-                              << " too far away from best schedule costs: " << best_schedule_costs
-                              << " rollback to best schedule" << std::endl;
+                              << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule"
+                              << std::endl;
 #endif
 
                     current_schedule.set_current_schedule(*best_schedule);
@@ -2389,13 +2185,12 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                     failed_branches++;
                 }
 
-            } // while
+            }    // while
 
 #ifdef KL_DEBUG
-            std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost
-                      << " with " << current_schedule.current_violations.size()
-                      << " violation, best sol cost: " << best_schedule_costs << " with "
-                      << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/"
+            std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost << " with "
+                      << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs
+                      << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/"
                       << parameters.max_outer_iterations << std::endl;
 #endif
 
@@ -2404,9 +2199,8 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                     save_best_schedule(current_schedule.vector_schedule);
                     best_schedule_costs = current_schedule.current_cost;
 #ifdef KL_DEBUG
-                    std::cout << "KLBase save best schedule with (source node comm) cost "
-                              << best_schedule->computeTotalCosts() << " and number of supersteps "
-                              << best_schedule->numberOfSupersteps() << std::endl;
+                    std::cout << "KLBase save best schedule with (source node comm) cost " << best_schedule->computeTotalCosts()
+                              << " and number of supersteps " << best_schedule->numberOfSupersteps() << std::endl;
 #endif
                 } else {
                     current_schedule.set_current_schedule(*best_schedule);
@@ -2455,11 +2249,9 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 #endif
 
             if (best_iter_costs <= current_schedule.current_cost) {
-
                 no_improvement_iter_counter++;
 
                 if (no_improvement_iter_counter > parameters.reset_epoch_counter_threshold) {
-
                     step_selection_epoch_counter = 0;
                     parameters.reset_epoch_counter_threshold += current_schedule.num_steps();
 #ifdef KL_DEBUG
@@ -2485,17 +2277,14 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 //             }
 
                 if (no_improvement_iter_counter > 50 && no_improvement_iter_counter % 3 == 0) {
-
                     parameters.initial_penalty = 0.0;
                     parameters.violations_threshold = 5;
 
                 } else if (no_improvement_iter_counter > 30 && no_improvement_iter_counter % 5 == 0) {
-
                     parameters.initial_penalty = 0.0;
                     parameters.violations_threshold = 4;
 
                 } else if (no_improvement_iter_counter > 9 && no_improvement_iter_counter % 10 == 0) {
-
                     parameters.initial_penalty = 0.0;
                     parameters.violations_threshold = 3;
 #ifdef KL_DEBUG
@@ -2505,7 +2294,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
                 }
 
                 if (no_improvement_iter_counter == 35) {
-
                     parameters.max_div_best_sol_base_percent *= 1.02;
 #ifdef KL_DEBUG
                     std::cout << "no improvement for " << no_improvement_iter_counter
@@ -2529,20 +2317,21 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
             std::cout << "end of while, current cost " << current_schedule.current_cost << std::endl;
 #endif
 
-        } // for
+        }    // for
 
         cleanup_datastructures();
 
 #ifdef KL_DEBUG_1
-        std::cout << "kl done, current cost " << best_schedule_costs << " with " << best_schedule->numberOfSupersteps() << " supersteps vs " << initial_costs << " initial costs"
-                  << std::endl;
+        std::cout << "kl done, current cost " << best_schedule_costs << " with " << best_schedule->numberOfSupersteps()
+                  << " supersteps vs " << initial_costs << " initial costs" << std::endl;
         assert(best_schedule->satisfiesPrecedenceConstraints());
 #endif
 
-        if (initial_costs > current_schedule.current_cost)
+        if (initial_costs > current_schedule.current_cost) {
             return true;
-        else
+        } else {
             return false;
+        }
     }
 
     // virtual void checkMergeSupersteps();
@@ -2551,14 +2340,13 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     // virtual void insertSuperstep(unsigned step);
 
     void print_heap() {
-
         std::cout << "heap current size: " << max_gain_heap.size() << std::endl;
         std::cout << "heap top node " << max_gain_heap.top().node << " gain " << max_gain_heap.top().gain << std::endl;
 
         unsigned count = 0;
         for (auto it = max_gain_heap.ordered_begin(); it != max_gain_heap.ordered_end(); ++it) {
-            std::cout << "node " << it->node << " gain " << it->gain << " to proc " << it->to_proc << " to step "
-                      << it->to_step << std::endl;
+            std::cout << "node " << it->node << " gain " << it->gain << " to proc " << it->to_proc << " to step " << it->to_step
+                      << std::endl;
 
             if (count++ > 15 || it->gain <= 0.0) {
                 break;
@@ -2583,7 +2371,6 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     virtual ~kl_base() = default;
 
     virtual RETURN_STATUS improveSchedule(BspSchedule<Graph_t> &schedule) override {
-
         reset_run_datastructures();
 
         best_schedule = &schedule;
@@ -2597,10 +2384,11 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
 
         bool improvement_found = run_local_search_unlock_delay();
 
-        if (improvement_found)
+        if (improvement_found) {
             return RETURN_STATUS::OSP_SUCCESS;
-        else
+        } else {
             return RETURN_STATUS::BEST_FOUND;
+        }
     }
 
     virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule<Graph_t> &schedule) override {
@@ -2622,4 +2410,4 @@ class kl_base : public ImprovementScheduler<Graph_t>, public Ikl_cost_function {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
index 1c544fd1..f8ded91e 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
@@ -28,9 +28,8 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct kl_move {
-
     vertex_idx_t<Graph_t> node;
 
     double gain;
@@ -43,19 +42,28 @@ struct kl_move {
     unsigned to_step;
 
     kl_move() : node(0), gain(0), change_in_cost(0), from_proc(0), from_step(0), to_proc(0), to_step(0) {}
-    kl_move(vertex_idx_t<Graph_t> _node, double _gain, double _change_cost, unsigned _from_proc, unsigned _from_step,
-            unsigned _to_proc, unsigned _to_step)
-        : node(_node), gain(_gain), change_in_cost(_change_cost), from_proc(_from_proc), from_step(_from_step),
-          to_proc(_to_proc), to_step(_to_step) {}
+
+    kl_move(vertex_idx_t<Graph_t> _node,
+            double _gain,
+            double _change_cost,
+            unsigned _from_proc,
+            unsigned _from_step,
+            unsigned _to_proc,
+            unsigned _to_step)
+        : node(_node),
+          gain(_gain),
+          change_in_cost(_change_cost),
+          from_proc(_from_proc),
+          from_step(_from_step),
+          to_proc(_to_proc),
+          to_step(_to_step) {}
 
     bool operator<(kl_move const &rhs) const {
-        return (gain < rhs.gain) or (gain <= rhs.gain and change_in_cost < rhs.change_in_cost) or
-               (gain <= rhs.gain and change_in_cost <= rhs.change_in_cost and node > rhs.node);
+        return (gain < rhs.gain) or (gain <= rhs.gain and change_in_cost < rhs.change_in_cost)
+               or (gain <= rhs.gain and change_in_cost <= rhs.change_in_cost and node > rhs.node);
     }
 
-    kl_move reverse_move() const {
-        return kl_move(node, -gain, -change_in_cost, to_proc, to_step, from_proc, from_step);
-    }
+    kl_move reverse_move() const { return kl_move(node, -gain, -change_in_cost, to_proc, to_step, from_proc, from_step); }
 };
 
 class Ikl_cost_function {
@@ -65,16 +73,14 @@ class Ikl_cost_function {
     virtual ~Ikl_cost_function() = default;
 };
 
-template<typename Graph_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename MemoryConstraint_t>
 class kl_current_schedule {
-
   private:
     using VertexType = vertex_idx_t<Graph_t>;
     using EdgeType = edge_desc_t<Graph_t>;
 
   public:
     kl_current_schedule(Ikl_cost_function *cost_f_) : cost_f(cost_f_) {
-
 #ifdef KL_DEBUG
         if constexpr (use_memory_constraint) {
             std::cout << "KLCurrentSchedule constructor with memory constraint" << std::endl;
@@ -105,13 +111,12 @@ class kl_current_schedule {
     double current_cost = 0;
 
     bool current_feasible = true;
-    std::unordered_set<EdgeType> current_violations; // edges
+    std::unordered_set<EdgeType> current_violations;    // edges
 
     std::unordered_map<VertexType, EdgeType> new_violations;
     std::unordered_set<EdgeType> resolved_violations;
 
     void remove_superstep(unsigned step) {
-
         if (step > 0) {
             vector_schedule.mergeSupersteps(step - 1, step);
             set_schedule.mergeSupersteps(step - 1, step);
@@ -126,12 +131,10 @@ class kl_current_schedule {
         }
 
         for (unsigned i = step + 1; i < num_steps(); i++) {
-
             step_max_work[i] = step_max_work[i + 1];
             step_second_max_work[i] = step_second_max_work[i + 1];
 
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 step_processor_work[i][proc] = step_processor_work[i + 1][proc];
 
                 if constexpr (use_memory_constraint) {
@@ -152,7 +155,6 @@ class kl_current_schedule {
     }
 
     void reset_superstep(unsigned step) {
-
         if (step > 0) {
             compute_work_memory_datastructures(step - 1, step - 1);
             if (step < num_steps() - 1) {
@@ -190,9 +192,7 @@ class kl_current_schedule {
     inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); }
 
     virtual void set_current_schedule(const IBspSchedule<Graph_t> &schedule) {
-
         if (num_steps() == schedule.numberOfSupersteps()) {
-
 #ifdef KL_DEBUG
             std::cout << "KLCurrentSchedule set current schedule, same nr supersteps" << std::endl;
 #endif
@@ -204,16 +204,14 @@ class kl_current_schedule {
             }
 
             for (const auto &node : instance->getComputationalDag().vertices()) {
-
                 vector_schedule.setAssignedProcessor(node, schedule.assignedProcessor(node));
                 vector_schedule.setAssignedSuperstep(node, schedule.assignedSuperstep(node));
 
-                set_schedule.step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)]
-                    .insert(node);
+                set_schedule.step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].insert(
+                    node);
             }
 
         } else {
-
 #ifdef KL_DEBUG
             std::cout << "KLCurrentSchedule set current schedule, different nr supersteps" << std::endl;
 #endif
@@ -236,7 +234,6 @@ class kl_current_schedule {
     }
 
     virtual void initialize_superstep_datastructures() {
-
 #ifdef KL_DEBUG
         std::cout << "KLCurrentSchedule initialize datastructures" << std::endl;
 #endif
@@ -244,18 +241,16 @@ class kl_current_schedule {
         const unsigned num_procs = instance->numberOfProcessors();
 
         if constexpr (use_memory_constraint) {
-
             memory_constraint.initialize(set_schedule, vector_schedule);
         }
 
-        step_processor_work =
-            std::vector<std::vector<v_workw_t<Graph_t>>>(num_steps(), std::vector<v_workw_t<Graph_t>>(num_procs, 0));
+        step_processor_work
+            = std::vector<std::vector<v_workw_t<Graph_t>>>(num_steps(), std::vector<v_workw_t<Graph_t>>(num_procs, 0));
         step_max_work = std::vector<v_workw_t<Graph_t>>(num_steps(), 0);
         step_second_max_work = std::vector<v_workw_t<Graph_t>>(num_steps(), 0);
     }
 
     virtual void cleanup_superstep_datastructures() {
-
         step_processor_work.clear();
         step_max_work.clear();
         step_second_max_work.clear();
@@ -266,18 +261,15 @@ class kl_current_schedule {
     }
 
     virtual void compute_work_memory_datastructures(unsigned start_step, unsigned end_step) {
-
         if constexpr (use_memory_constraint) {
             memory_constraint.compute_memory_datastructure(start_step, end_step);
         }
 
         for (unsigned step = start_step; step <= end_step; step++) {
-
             step_max_work[step] = 0;
             step_second_max_work[step] = 0;
 
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 step_processor_work[step][proc] = 0;
 
                 for (const auto &node : set_schedule.step_processor_vertices[step][proc]) {
@@ -285,12 +277,10 @@ class kl_current_schedule {
                 }
 
                 if (step_processor_work[step][proc] > step_max_work[step]) {
-
                     step_second_max_work[step] = step_max_work[step];
                     step_max_work[step] = step_processor_work[step][proc];
 
                 } else if (step_processor_work[step][proc] > step_second_max_work[step]) {
-
                     step_second_max_work[step] = step_processor_work[step][proc];
                 }
             }
@@ -298,7 +288,6 @@ class kl_current_schedule {
     }
 
     virtual void recompute_current_violations() {
-
         current_violations.clear();
 
 #ifdef KL_DEBUG
@@ -306,15 +295,12 @@ class kl_current_schedule {
 #endif
 
         for (const auto &edge : edges(instance->getComputationalDag())) {
-
             const auto &source_v = source(edge, instance->getComputationalDag());
             const auto &target_v = target(edge, instance->getComputationalDag());
 
             if (vector_schedule.assignedSuperstep(source_v) >= vector_schedule.assignedSuperstep(target_v)) {
-
-                if (vector_schedule.assignedProcessor(source_v) != vector_schedule.assignedProcessor(target_v) ||
-                    vector_schedule.assignedSuperstep(source_v) > vector_schedule.assignedSuperstep(target_v)) {
-
+                if (vector_schedule.assignedProcessor(source_v) != vector_schedule.assignedProcessor(target_v)
+                    || vector_schedule.assignedSuperstep(source_v) > vector_schedule.assignedSuperstep(target_v)) {
                     current_violations.insert(edge);
 
 #ifdef KL_DEBUG
@@ -336,7 +322,6 @@ class kl_current_schedule {
     };
 
     virtual void apply_move(kl_move<Graph_t> move) {
-
         vector_schedule.setAssignedProcessor(move.node, move.to_proc);
         vector_schedule.setAssignedSuperstep(move.node, move.to_step);
 
@@ -345,22 +330,18 @@ class kl_current_schedule {
 
         current_cost += move.change_in_cost;
 
-        step_processor_work[move.to_step][move.to_proc] +=
-            instance->getComputationalDag().vertex_work_weight(move.node);
-        step_processor_work[move.from_step][move.from_proc] -=
-            instance->getComputationalDag().vertex_work_weight(move.node);
+        step_processor_work[move.to_step][move.to_proc] += instance->getComputationalDag().vertex_work_weight(move.node);
+        step_processor_work[move.from_step][move.from_proc] -= instance->getComputationalDag().vertex_work_weight(move.node);
 
         update_max_work_datastructures(move);
         update_violations(move.node);
 
         if constexpr (use_memory_constraint) {
-
             memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step);
         }
     }
 
     virtual void initialize_current_schedule(const IBspSchedule<Graph_t> &schedule) {
-
 #ifdef KL_DEBUG
         std::cout << "KLCurrentSchedule initialize current schedule" << std::endl;
 #endif
@@ -378,32 +359,24 @@ class kl_current_schedule {
 
   private:
     void update_violations(VertexType node) {
-
         new_violations.clear();
         resolved_violations.clear();
 
         for (const auto &edge : out_edges(node, instance->getComputationalDag())) {
-
             const auto &child = target(edge, instance->getComputationalDag());
 
             if (current_violations.find(edge) == current_violations.end()) {
-
                 if (vector_schedule.assignedSuperstep(node) >= vector_schedule.assignedSuperstep(child)) {
-
-                    if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(child) ||
-                        vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(child)) {
-
+                    if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(child)
+                        || vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(child)) {
                         current_violations.insert(edge);
                         new_violations[child] = edge;
                     }
                 }
             } else {
-
                 if (vector_schedule.assignedSuperstep(node) <= vector_schedule.assignedSuperstep(child)) {
-
-                    if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(child) ||
-                        vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(child)) {
-
+                    if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(child)
+                        || vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(child)) {
                         current_violations.erase(edge);
                         resolved_violations.insert(edge);
                     }
@@ -412,27 +385,20 @@ class kl_current_schedule {
         }
 
         for (const auto &edge : in_edges(node, instance->getComputationalDag())) {
-
             const auto &parent = source(edge, instance->getComputationalDag());
 
             if (current_violations.find(edge) == current_violations.end()) {
-
                 if (vector_schedule.assignedSuperstep(node) <= vector_schedule.assignedSuperstep(parent)) {
-
-                    if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(parent) ||
-                        vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(parent)) {
-
+                    if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(parent)
+                        || vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(parent)) {
                         current_violations.insert(edge);
                         new_violations[parent] = edge;
                     }
                 }
             } else {
-
                 if (vector_schedule.assignedSuperstep(node) >= vector_schedule.assignedSuperstep(parent)) {
-
-                    if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(parent) ||
-                        vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(parent)) {
-
+                    if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(parent)
+                        || vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(parent)) {
                         current_violations.erase(edge);
                         resolved_violations.insert(edge);
                     }
@@ -468,41 +434,33 @@ class kl_current_schedule {
     }
 
     void update_max_work_datastructures(kl_move<Graph_t> move) {
-
         if (move.from_step == move.to_step) {
-
             recompute_superstep_max_work(move.from_step);
 
         } else {
-
             recompute_superstep_max_work(move.from_step);
             recompute_superstep_max_work(move.to_step);
         }
     }
 
     void recompute_superstep_max_work(unsigned step) {
-
         step_max_work[step] = 0;
         step_second_max_work[step] = 0;
 
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
             if (step_processor_work[step][proc] > step_max_work[step]) {
-
                 step_second_max_work[step] = step_max_work[step];
                 step_max_work[step] = step_processor_work[step][proc];
 
             } else if (step_processor_work[step][proc] > step_second_max_work[step]) {
-
                 step_second_max_work[step] = step_processor_work[step][proc];
             }
         }
     }
 };
 
-template<typename Graph_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename MemoryConstraint_t>
 class kl_current_schedule_max_comm : public kl_current_schedule<Graph_t, MemoryConstraint_t> {
-
   public:
     std::vector<std::vector<v_commw_t<Graph_t>>> step_processor_send;
     std::vector<v_commw_t<Graph_t>> step_max_send;
@@ -513,4 +471,4 @@ class kl_current_schedule_max_comm : public kl_current_schedule<Graph_t, MemoryC
     std::vector<v_commw_t<Graph_t>> step_second_max_receive;
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp
index 7f36fefa..81841b0e 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp
@@ -28,18 +28,17 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename MemoryConstraint_t>
 class kl_hyper_total_comm : public kl_total<Graph_t, MemoryConstraint_t> {
-
   protected:
-    
-    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node, unsigned current_step, unsigned current_proc,
+    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node,
+                                   unsigned current_step,
+                                   unsigned current_proc,
                                    unsigned new_proc) override {
         throw std::runtime_error("Not implemented yet");
     }
 
     virtual double compute_current_costs() override {
-
         double work_costs = 0;
         for (unsigned step = 0; step < current_schedule.num_steps(); step++) {
             work_costs += current_schedule.step_max_work[step];
@@ -48,14 +47,13 @@ class kl_hyper_total_comm : public kl_total<Graph_t, MemoryConstraint_t> {
         double comm_costs = 0;
 
         for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) {
-
-            if (is_sink(node, current_schedule.instance->getComputationalDag()))
+            if (is_sink(node, current_schedule.instance->getComputationalDag())) {
                 continue;
+            }
 
             std::unordered_set<unsigned> intersects;
 
             for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) {
-
                 const unsigned &target_proc = current_schedule.vector_schedule.assignedProcessor(target);
 
                 if (current_schedule.vector_schedule.assignedProcessor(node) != target_proc) {
@@ -63,13 +61,11 @@ class kl_hyper_total_comm : public kl_total<Graph_t, MemoryConstraint_t> {
                 }
             }
 
-            comm_costs +=
-                intersects.size() * current_schedule.instance->getComputationalDag().vertex_comm_weight(node);
+            comm_costs += intersects.size() * current_schedule.instance->getComputationalDag().vertex_comm_weight(node);
         }
 
-        current_schedule.current_cost =
-            work_costs + comm_costs * current_schedule.comm_multiplier +
-            (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts();
+        current_schedule.current_cost = work_costs + comm_costs * current_schedule.comm_multiplier
+                                        + (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts();
 
         return current_schedule.current_cost;
     }
@@ -82,4 +78,4 @@ class kl_hyper_total_comm : public kl_total<Graph_t, MemoryConstraint_t> {
     virtual std::string getScheduleName() const override { return "KLHyperTotalComm"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp
index 365f9e85..6d4a15fe 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp
@@ -27,17 +27,18 @@ limitations under the License.
 #include "kl_total.hpp"
 
 namespace osp {
-template<typename Graph_t, typename MemoryConstraint_t>
-class kl_hyper_total_cut : public kl_total<Graph_t, MemoryConstraint_t> {
 
+template <typename Graph_t, typename MemoryConstraint_t>
+class kl_hyper_total_cut : public kl_total<Graph_t, MemoryConstraint_t> {
   protected:
-    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node, unsigned current_step, unsigned current_proc,
+    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node,
+                                   unsigned current_step,
+                                   unsigned current_proc,
                                    unsigned new_proc) override {
         throw std::runtime_error("Not implemented yet");
     }
 
     virtual double compute_current_costs() override {
-
         double work_costs = 0;
         for (unsigned step = 0; step < current_schedule.num_steps(); step++) {
             work_costs += current_schedule.step_max_work[step];
@@ -46,19 +47,18 @@ class kl_hyper_total_cut : public kl_total<Graph_t, MemoryConstraint_t> {
         double comm_costs = 0;
 
         for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) {
-
-            if (is_sink(node, current_schedule.instance->getComputationalDag()))
+            if (is_sink(node, current_schedule.instance->getComputationalDag())) {
                 continue;
+            }
 
             std::unordered_set<unsigned> intersects;
 
             for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) {
-
                 const unsigned &target_proc = current_schedule.vector_schedule.assignedProcessor(target);
                 const unsigned &target_step = current_schedule.vector_schedule.assignedSuperstep(target);
 
-                if (current_schedule.vector_schedule.assignedProcessor(node) != target_proc ||
-                    current_schedule.vector_schedule.assignedSuperstep(node) != target_step) {
+                if (current_schedule.vector_schedule.assignedProcessor(node) != target_proc
+                    || current_schedule.vector_schedule.assignedSuperstep(node) != target_step) {
                     intersects.insert(current_schedule.instance->numberOfProcessors() * target_step + target_proc);
                 }
             }
@@ -66,9 +66,8 @@ class kl_hyper_total_cut : public kl_total<Graph_t, MemoryConstraint_t> {
             comm_costs += intersects.size() * current_schedule.instance->getComputationalDag().vertex_comm_weight(node);
         }
 
-        current_schedule.current_cost =
-            work_costs + comm_costs * current_schedule.comm_multiplier +
-            (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts();
+        current_schedule.current_cost = work_costs + comm_costs * current_schedule.comm_multiplier
+                                        + (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts();
 
         return current_schedule.current_cost;
     }
@@ -81,4 +80,4 @@ class kl_hyper_total_cut : public kl_total<Graph_t, MemoryConstraint_t> {
     virtual std::string getScheduleName() const override { return "KLHyperTotalCut"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp
index 7b7c7dca..0e3f5d65 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp
@@ -28,28 +28,24 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename MemoryConstraint_t, bool use_node_communication_costs_arg>
+template <typename Graph_t, typename MemoryConstraint_t, bool use_node_communication_costs_arg>
 class kl_current_schedule_total : public kl_current_schedule<Graph_t, MemoryConstraint_t> {
- 
   public:
-    kl_current_schedule_total(Ikl_cost_function *cost_f_)
-        : kl_current_schedule<Graph_t, MemoryConstraint_t>(cost_f_) {}
+    kl_current_schedule_total(Ikl_cost_function *cost_f_) : kl_current_schedule<Graph_t, MemoryConstraint_t>(cost_f_) {}
 
-    double comm_multiplier = 1.0;    
+    double comm_multiplier = 1.0;
     constexpr static bool use_node_communication_costs = use_node_communication_costs_arg || not has_edge_weights_v<Graph_t>;
-
 };
 
-template<typename Graph_t, typename MemoryConstraint_t, bool use_node_communication_costs_arg>
+template <typename Graph_t, typename MemoryConstraint_t, bool use_node_communication_costs_arg>
 class kl_total : public kl_base<Graph_t, MemoryConstraint_t> {
-
   protected:
     kl_current_schedule_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg> current_schedule;
 
     v_commw_t<Graph_t> node_comm_selection_threshold = 0;
     double max_edge_weight = 0.0;
-    virtual void initialize_datastructures() override {
 
+    virtual void initialize_datastructures() override {
 #ifdef KL_DEBUG
         std::cout << "KLTotal initialize datastructures" << std::endl;
 #endif
@@ -60,39 +56,36 @@ class kl_total : public kl_base<Graph_t, MemoryConstraint_t> {
         v_workw_t<Graph_t> max_node_weight_ = 0;
 
         for (const auto vertex : current_schedule.instance->getComputationalDag().vertices()) {
-
-            if (is_sink(vertex, current_schedule.instance->getComputationalDag()))
+            if (is_sink(vertex, current_schedule.instance->getComputationalDag())) {
                 continue;
+            }
 
-            max_edge_weight_ =
-                std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().vertex_comm_weight(vertex));
+            max_edge_weight_
+                = std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().vertex_comm_weight(vertex));
 
-            max_node_weight_ =
-                std::max(max_node_weight_, current_schedule.instance->getComputationalDag().vertex_work_weight(vertex));
+            max_node_weight_
+                = std::max(max_node_weight_, current_schedule.instance->getComputationalDag().vertex_work_weight(vertex));
         }
-       
 
         if constexpr (not current_schedule.use_node_communication_costs) {
-
             max_edge_weight_ = 0;
 
             for (const auto &edge : edges(current_schedule.instance->getComputationalDag())) {
-                max_edge_weight_ = std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().edge_comm_weight(edge));
+                max_edge_weight_
+                    = std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().edge_comm_weight(edge));
             }
         }
-        
 
         max_edge_weight = max_edge_weight_ + max_node_weight_;
 
-        kl_base<Graph_t, MemoryConstraint_t>::parameters.initial_penalty =
-            max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts();
+        kl_base<Graph_t, MemoryConstraint_t>::parameters.initial_penalty
+            = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts();
 
-        kl_base<Graph_t, MemoryConstraint_t>::parameters.gain_threshold =
-            max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts();
+        kl_base<Graph_t, MemoryConstraint_t>::parameters.gain_threshold
+            = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts();
     }
 
     virtual void update_reward_penalty() override {
-
         if (current_schedule.current_violations.size() <= kl_base<Graph_t, MemoryConstraint_t>::parameters.violations_threshold) {
             kl_base<Graph_t, MemoryConstraint_t>::penalty = kl_base<Graph_t, MemoryConstraint_t>::parameters.initial_penalty;
             kl_base<Graph_t, MemoryConstraint_t>::reward = 0.0;
@@ -100,52 +93,42 @@ class kl_total : public kl_base<Graph_t, MemoryConstraint_t> {
         } else {
             kl_base<Graph_t, MemoryConstraint_t>::parameters.violations_threshold = 0;
 
-            kl_base<Graph_t, MemoryConstraint_t>::penalty = std::log((current_schedule.current_violations.size())) * max_edge_weight *
-                                        current_schedule.comm_multiplier *
-                                        current_schedule.instance->communicationCosts();
+            kl_base<Graph_t, MemoryConstraint_t>::penalty = std::log((current_schedule.current_violations.size()))
+                                                            * max_edge_weight * current_schedule.comm_multiplier
+                                                            * current_schedule.instance->communicationCosts();
 
-            kl_base<Graph_t, MemoryConstraint_t>::reward = std::sqrt((current_schedule.current_violations.size() + 4)) * max_edge_weight *
-                                       current_schedule.comm_multiplier *
-                                       current_schedule.instance->communicationCosts();
+            kl_base<Graph_t, MemoryConstraint_t>::reward = std::sqrt((current_schedule.current_violations.size() + 4))
+                                                           * max_edge_weight * current_schedule.comm_multiplier
+                                                           * current_schedule.instance->communicationCosts();
         }
     }
 
     virtual void set_initial_reward_penalty() override {
-
         kl_base<Graph_t, MemoryConstraint_t>::penalty = kl_base<Graph_t, MemoryConstraint_t>::parameters.initial_penalty;
-        kl_base<Graph_t, MemoryConstraint_t>::reward =
-            max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts();
+        kl_base<Graph_t, MemoryConstraint_t>::reward
+            = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts();
     }
 
     virtual void select_nodes_comm() override {
-
         if constexpr (current_schedule.use_node_communication_costs) {
-
             for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) {
-
                 for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) {
-
-                    if (current_schedule.vector_schedule.assignedProcessor(node) !=
-                        current_schedule.vector_schedule.assignedProcessor(source)) {
-
-                        if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node) >
-                            node_comm_selection_threshold) {
-
-                              kl_base<Graph_t, MemoryConstraint_t>::node_selection.insert(node);
+                    if (current_schedule.vector_schedule.assignedProcessor(node)
+                        != current_schedule.vector_schedule.assignedProcessor(source)) {
+                        if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node)
+                            > node_comm_selection_threshold) {
+                            kl_base<Graph_t, MemoryConstraint_t>::node_selection.insert(node);
                             break;
                         }
                     }
                 }
 
                 for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) {
-
-                    if (current_schedule.vector_schedule.assignedProcessor(node) !=
-                        current_schedule.vector_schedule.assignedProcessor(target)) {
-
-                        if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node) >
-                            node_comm_selection_threshold) {
-
-                              kl_base<Graph_t, MemoryConstraint_t>::node_selection.insert(node);
+                    if (current_schedule.vector_schedule.assignedProcessor(node)
+                        != current_schedule.vector_schedule.assignedProcessor(target)) {
+                        if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node)
+                            > node_comm_selection_threshold) {
+                            kl_base<Graph_t, MemoryConstraint_t>::node_selection.insert(node);
                             break;
                         }
                     }
@@ -154,16 +137,12 @@ class kl_total : public kl_base<Graph_t, MemoryConstraint_t> {
 
         } else {
             for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) {
-
                 for (const auto &in_edge : in_edges(node, current_schedule.instance->getComputationalDag())) {
-
                     const auto &source_v = source(in_edge, current_schedule.instance->getComputationalDag());
-                    if (current_schedule.vector_schedule.assignedProcessor(node) !=
-                        current_schedule.vector_schedule.assignedProcessor(source_v)) {
-
-                        if (current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge) >
-                            node_comm_selection_threshold) {
-
+                    if (current_schedule.vector_schedule.assignedProcessor(node)
+                        != current_schedule.vector_schedule.assignedProcessor(source_v)) {
+                        if (current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)
+                            > node_comm_selection_threshold) {
                             kl_base<Graph_t, MemoryConstraint_t>::node_selection.insert(node);
                             break;
                         }
@@ -171,14 +150,11 @@ class kl_total : public kl_base<Graph_t, MemoryConstraint_t> {
                 }
 
                 for (const auto &out_edge : out_edges(node, current_schedule.instance->getComputationalDag())) {
-
                     const auto &target_v = target(out_edge, current_schedule.instance->getComputationalDag());
-                    if (current_schedule.vector_schedule.assignedProcessor(node) !=
-                        current_schedule.vector_schedule.assignedProcessor(target_v)) {
-
-                        if (current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge) >
-                            node_comm_selection_threshold) {
-
+                    if (current_schedule.vector_schedule.assignedProcessor(node)
+                        != current_schedule.vector_schedule.assignedProcessor(target_v)) {
+                        if (current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)
+                            > node_comm_selection_threshold) {
                             kl_base<Graph_t, MemoryConstraint_t>::node_selection.insert(node);
                             break;
                         }
@@ -189,11 +165,9 @@ class kl_total : public kl_base<Graph_t, MemoryConstraint_t> {
     }
 
   public:
-    kl_total()
-        : kl_base<Graph_t, MemoryConstraint_t>(current_schedule), current_schedule(this) {}
+    kl_total() : kl_base<Graph_t, MemoryConstraint_t>(current_schedule), current_schedule(this) {}
 
     virtual ~kl_total() = default;
-
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp
index 912650bb..b5f16bf0 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp
@@ -27,582 +27,829 @@ limitations under the License.
 #include "kl_total.hpp"
 
 namespace osp {
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, bool use_node_communication_costs_arg = true>
-class kl_total_comm : public kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg> {
 
+template <typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, bool use_node_communication_costs_arg = true>
+class kl_total_comm : public kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg> {
   protected:
-    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node, unsigned current_step, unsigned current_proc,
+    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node,
+                                   unsigned current_step,
+                                   unsigned current_proc,
                                    unsigned new_proc) override {
-
-        if constexpr (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.use_node_communication_costs) {
-
+        if constexpr (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                          .use_node_communication_costs) {
             if (current_proc == new_proc) {
-
                 for (const auto &target :
-                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().children(node)) {
-
-                    if ((current_step + 1 ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) &&
-                         current_proc !=
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target)) ||
-                        (current_step ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) &&
-                         current_proc ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target))) {
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                    } else if ((current_step ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) &&
-                                current_proc !=
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target)) ||
-                               (current_step - 1 ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) &&
-                                current_proc ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][0] +=
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .vertex_comm_weight(node)) +
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                         ->getComputationalDag()
+                         .children(node)) {
+                    if ((current_step + 1
+                             == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .vector_schedule.assignedSuperstep(target)
+                         && current_proc
+                                != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedProcessor(target))
+                        || (current_step
+                                == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                            && current_proc
+                                   == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .vector_schedule.assignedProcessor(target))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][2]
+                            -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                    } else if ((current_step
+                                    == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .vector_schedule.assignedSuperstep(target)
+                                && current_proc
+                                       != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(target))
+                               || (current_step - 1
+                                       == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(target)
+                                   && current_proc
+                                          == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                                 .vector_schedule.assignedProcessor(target))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][0]
+                            += static_cast<double>(
+                                   kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .instance->getComputationalDag()
+                                       .vertex_comm_weight(node))
+                               + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                     }
                 }
 
                 for (const auto &source :
-                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().parents(node)) {
-
-                    if ((current_step - 1 ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) &&
-                         current_proc !=
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source)) ||
-                        (current_step ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) &&
-                         current_proc ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source))) {
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                    } else if ((current_step ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) &&
-                                current_proc !=
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source)) ||
-                               (current_step + 1 ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) &&
-                                current_proc ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][2] +=
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .vertex_comm_weight(source)) +
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                         ->getComputationalDag()
+                         .parents(node)) {
+                    if ((current_step - 1
+                             == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .vector_schedule.assignedSuperstep(source)
+                         && current_proc
+                                != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedProcessor(source))
+                        || (current_step
+                                == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                            && current_proc
+                                   == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .vector_schedule.assignedProcessor(source))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][0]
+                            -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                    } else if ((current_step
+                                    == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .vector_schedule.assignedSuperstep(source)
+                                && current_proc
+                                       != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(source))
+                               || (current_step + 1
+                                       == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(source)
+                                   && current_proc
+                                          == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                                 .vector_schedule.assignedProcessor(source))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][2]
+                            += static_cast<double>(
+                                   kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .instance->getComputationalDag()
+                                       .vertex_comm_weight(source))
+                               + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                     }
                 }
             } else {
-
                 // current_proc != new_proc
 
                 for (const auto &target :
-                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().children(node)) {
-
-                    const unsigned &target_proc =
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target);
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                         ->getComputationalDag()
+                         .children(node)) {
+                    const unsigned &target_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(target);
                     if (target_proc == current_proc) {
-
-                        const double loss =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .vertex_comm_weight(node)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, target_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) ==
-                            current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) ==
-                                   current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
+                        const double loss
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .vertex_comm_weight(node))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(new_proc, target_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            -= loss;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            += loss;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
                         }
 
                     } else if (target_proc == new_proc) {
-
-                        const double gain = static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                                .vertex_comm_weight(node)) *
-                                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(
-                                                current_proc, target_proc) *
-                                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) ==
-                            current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .vertex_comm_weight(node)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .vertex_comm_weight(node)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) <
-                                   current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .vertex_comm_weight(node)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .vertex_comm_weight(node))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, target_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(node))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(node))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                                   < current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(node))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
 
                     } else {
-
                         assert(target_proc != current_proc && target_proc != new_proc);
 
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc,
-                                                                                                      target_proc) -
-                                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc,
-                                                                                                      target_proc)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(
-                                node) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) ==
-                            current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) ==
-                                   current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .vertex_comm_weight(node)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->communicationCosts(new_proc, target_proc)
+                                  - kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, target_proc))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->getComputationalDag()
+                                    .vertex_comm_weight(node)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target)
+                            == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                                   == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(node))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
                     }
                 }
 
                 for (const auto &source :
-                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().parents(node)) {
-
-                    const unsigned &source_proc =
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source);
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                         ->getComputationalDag()
+                         .parents(node)) {
+                    const unsigned &source_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(source);
                     if (source_proc == current_proc) {
-
-                        const double loss =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .vertex_comm_weight(source)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, new_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) ==
-                            current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) ==
-                                   current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
+                        const double loss
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .vertex_comm_weight(source))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, new_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            -= loss;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            += loss;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
                         }
 
                     } else if (source_proc == new_proc) {
-
                         assert(source_proc != current_proc);
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .vertex_comm_weight(source)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, new_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) ==
-                            current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .vertex_comm_weight(source)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .vertex_comm_weight(source)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) ==
-                                   current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .vertex_comm_weight(source)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .vertex_comm_weight(source))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, new_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(source))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(source))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(source))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
 
                     } else {
-
                         assert(source_proc != current_proc && source_proc != new_proc);
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc,
-                                                                                                      source_proc) -
-                                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc,
-                                                                                                      source_proc)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(
-                                source) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) ==
-                            current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) ==
-                                   current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .vertex_comm_weight(source)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->communicationCosts(new_proc, source_proc)
+                                  - kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, source_proc))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->getComputationalDag()
+                                    .vertex_comm_weight(source)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source)
+                            == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                                   == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(source))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
                     }
                 }
             }
         } else {
-
             if (current_proc == new_proc) {
-
                 for (const auto &out_edge :
-                     out_edges(node, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-                    const auto &target_v =
-                        target(out_edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
+                     out_edges(node,
+                               kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                   ->getComputationalDag())) {
+                    const auto &target_v
+                        = target(out_edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag());
                     // for (const auto &target :
-                    // kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().children(node)) {
-
-                    if ((current_step + 1 ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) &&
-                         current_proc !=
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v)) ||
-                        (current_step ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) &&
-                         current_proc ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                    } else if ((current_step ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) &&
-                                current_proc !=
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v)) ||
-                               (current_step - 1 ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) &&
-                                current_proc ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][0] +=
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .edge_comm_weight(out_edge)) +
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                    // kl_total<Graph_t,
+                    // MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().children(node)) {
+
+                    if ((current_step + 1
+                             == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .vector_schedule.assignedSuperstep(target_v)
+                         && current_proc
+                                != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedProcessor(target_v))
+                        || (current_step
+                                == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                            && current_proc
+                                   == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .vector_schedule.assignedProcessor(target_v))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][2]
+                            -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                    } else if ((current_step
+                                    == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .vector_schedule.assignedSuperstep(target_v)
+                                && current_proc
+                                       != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(target_v))
+                               || (current_step - 1
+                                       == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(target_v)
+                                   && current_proc
+                                          == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                                 .vector_schedule.assignedProcessor(target_v))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][0]
+                            += static_cast<double>(
+                                   kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .instance->getComputationalDag()
+                                       .edge_comm_weight(out_edge))
+                               + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                     }
                 }
 
                 for (const auto &in_edge :
-                     in_edges(node, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-
-                    const auto &source_v =
-                        source(in_edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
+                     in_edges(node,
+                              kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                  ->getComputationalDag())) {
+                    const auto &source_v
+                        = source(in_edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag());
                     // for (const auto &source :
-                    // kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().parents(node)) {
-
-                    if ((current_step - 1 ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) &&
-                         current_proc !=
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v)) ||
-                        (current_step ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) &&
-                         current_proc ==
-                             kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                    } else if ((current_step ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) &&
-                                current_proc !=
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v)) ||
-                               (current_step + 1 ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) &&
-                                current_proc ==
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][2] +=
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .edge_comm_weight(in_edge)) +
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                    // kl_total<Graph_t,
+                    // MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().parents(node)) {
+
+                    if ((current_step - 1
+                             == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .vector_schedule.assignedSuperstep(source_v)
+                         && current_proc
+                                != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedProcessor(source_v))
+                        || (current_step
+                                == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                            && current_proc
+                                   == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .vector_schedule.assignedProcessor(source_v))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][0]
+                            -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                    } else if ((current_step
+                                    == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .vector_schedule.assignedSuperstep(source_v)
+                                && current_proc
+                                       != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(source_v))
+                               || (current_step + 1
+                                       == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(source_v)
+                                   && current_proc
+                                          == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                                 .vector_schedule.assignedProcessor(source_v))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][2]
+                            += static_cast<double>(
+                                   kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .instance->getComputationalDag()
+                                       .edge_comm_weight(in_edge))
+                               + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                     }
                 }
             } else {
-
                 // current_proc != new_proc
 
                 for (const auto &out_edge :
-                     out_edges(node, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-
-                    const auto &target_v =
-                        target(out_edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-                    const unsigned &target_proc =
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v);
+                     out_edges(node,
+                               kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                   ->getComputationalDag())) {
+                    const auto &target_v
+                        = target(out_edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag());
+                    const unsigned &target_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(target_v);
 
                     if (target_proc == current_proc) {
-
-                        const double loss =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .edge_comm_weight(out_edge)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, target_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) ==
-                            current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) ==
-                                   current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
+                        const double loss
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .edge_comm_weight(out_edge))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(new_proc, target_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            -= loss;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            += loss;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
                         }
 
                     } else if (target_proc == new_proc) {
-
-                        const double gain = static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                                .edge_comm_weight(out_edge)) *
-                                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(
-                                                current_proc, target_proc) *
-                                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) ==
-                            current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .edge_comm_weight(out_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .edge_comm_weight(out_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) ==
-                                   current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .edge_comm_weight(out_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .edge_comm_weight(out_edge))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, target_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(out_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(out_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(out_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
 
                     } else {
-
                         assert(target_proc != current_proc && target_proc != new_proc);
 
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc,
-                                                                                                      target_proc) -
-                                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc,
-                                                                                                      target_proc)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(
-                                out_edge) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) ==
-                            current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) ==
-                                   current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .edge_comm_weight(out_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->communicationCosts(new_proc, target_proc)
+                                  - kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, target_proc))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->getComputationalDag()
+                                    .edge_comm_weight(out_edge)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target_v)
+                            == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                                   == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(out_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
                     }
                 }
 
-                for (const auto &in_edge : in_edges(node, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-                    const auto &source_v =
-                        source(in_edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-
-                    const unsigned &source_proc =
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v);
+                for (const auto &in_edge :
+                     in_edges(node,
+                              kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                  ->getComputationalDag())) {
+                    const auto &source_v
+                        = source(in_edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag());
+
+                    const unsigned &source_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(source_v);
                     if (source_proc == current_proc) {
-
-                        const double loss =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .edge_comm_weight(in_edge)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, new_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) ==
-                            current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) ==
-                                   current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
+                        const double loss
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .edge_comm_weight(in_edge))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, new_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            -= loss;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            += loss;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
                         }
                     } else if (source_proc == new_proc) {
-
                         assert(source_proc != current_proc);
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                .edge_comm_weight(in_edge)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, new_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) ==
-                            current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .edge_comm_weight(in_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .edge_comm_weight(in_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) ==
-                                   current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .edge_comm_weight(in_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .edge_comm_weight(in_edge))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, new_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(in_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(in_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(in_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
 
                     } else {
-
                         assert(source_proc != current_proc && source_proc != new_proc);
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc,
-                                                                                                      source_proc) -
-                                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc,
-                                                                                                      source_proc)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(
-                                in_edge) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) ==
-                            current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) ==
-                                   current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()
-                                    .edge_comm_weight(in_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->communicationCosts(new_proc, source_proc)
+                                  - kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, source_proc))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->getComputationalDag()
+                                    .edge_comm_weight(in_edge)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source_v)
+                            == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                                   == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(in_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
                     }
                 }
@@ -611,40 +858,60 @@ class kl_total_comm : public kl_total<Graph_t, MemoryConstraint_t, use_node_comm
     }
 
     virtual double compute_current_costs() override {
-
         double work_costs = 0;
-        for (unsigned step = 0; step < kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.num_steps(); step++) {
-            work_costs += kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.step_max_work[step];
+        for (unsigned step = 0;
+             step < kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.num_steps();
+             step++) {
+            work_costs
+                += kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.step_max_work[step];
         }
 
         double comm_costs = 0;
-        for (const auto &edge : edges(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-
-            const auto &source_v = source(edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-            const unsigned &source_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v);
-            const unsigned &target_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(
-                target(edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag()));
+        for (const auto &edge : edges(kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .instance->getComputationalDag())) {
+            const auto &source_v = source(edge,
+                                          kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .instance->getComputationalDag());
+            const unsigned &source_proc = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(source_v);
+            const unsigned &target_proc
+                = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.vector_schedule
+                      .assignedProcessor(
+                          target(edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag()));
 
             if (source_proc != target_proc) {
-
-                if constexpr (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.use_node_communication_costs) {
-                    comm_costs +=
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source_v) *
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(source_proc, target_proc);
+                if constexpr (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                  .use_node_communication_costs) {
+                    comm_costs
+                        += kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                               ->getComputationalDag()
+                               .vertex_comm_weight(source_v)
+                           * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                 ->communicationCosts(source_proc, target_proc);
                 } else {
-                    comm_costs +=
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(edge) *
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(source_proc, target_proc);
+                    comm_costs
+                        += kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                               ->getComputationalDag()
+                               .edge_comm_weight(edge)
+                           * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                 ->communicationCosts(source_proc, target_proc);
                 }
             }
         }
 
-        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.current_cost =
-            work_costs + comm_costs * kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier +
-            (static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.num_steps()) - 1) *
-                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->synchronisationCosts();
-
-        return kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.current_cost;
+        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.current_cost
+            = work_costs
+              + comm_costs
+                    * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.comm_multiplier
+              + (static_cast<double>(
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.num_steps())
+                 - 1)
+                    * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                          ->synchronisationCosts();
+
+        return kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.current_cost;
     }
 
   public:
@@ -655,9 +922,8 @@ class kl_total_comm : public kl_total<Graph_t, MemoryConstraint_t, use_node_comm
     virtual std::string getScheduleName() const override { return "KLTotalComm"; }
 };
 
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, bool use_node_communication_costs_arg = true>
+template <typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, bool use_node_communication_costs_arg = true>
 class kl_total_comm_test : public kl_total_comm<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg> {
-
   public:
     kl_total_comm_test() : kl_total_comm<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>() {}
 
@@ -670,10 +936,15 @@ class kl_total_comm_test : public kl_total_comm<Graph_t, MemoryConstraint_t, use
     }
 
     auto &get_node_gains() { return kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains; }
-    auto &get_node_change_in_costs() { return kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs; }
+
+    auto &get_node_change_in_costs() {
+        return kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs;
+    }
+
     auto &get_max_gain_heap() { return kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::max_gain_heap; }
 
-    void initialize_gain_heap_test(const std::unordered_set<vertex_idx_t<Graph_t>> &nodes, double reward_ = 0.0,
+    void initialize_gain_heap_test(const std::unordered_set<vertex_idx_t<Graph_t>> &nodes,
+                                   double reward_ = 0.0,
                                    double penalty_ = 0.0) {
         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward = reward_;
         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty = penalty_;
@@ -682,59 +953,64 @@ class kl_total_comm_test : public kl_total_comm<Graph_t, MemoryConstraint_t, use
     }
 
     void test_setup_schedule(BspSchedule<Graph_t> &schedule) {
-
-        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance = &schedule.getInstance();
+        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+            = &schedule.getInstance();
 
         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::best_schedule = &schedule;
 
-        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::num_nodes = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance->numberOfVertices();
-        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::num_procs = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance->numberOfProcessors();
+        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::num_nodes
+            = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance->numberOfVertices();
+        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::num_procs
+            = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                  ->numberOfProcessors();
 
         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::set_parameters();
         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::initialize_datastructures();
     }
 
     RETURN_STATUS improve_schedule_test_1(BspSchedule<Graph_t> &schedule) {
-
-        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance = &schedule.getInstance();
+        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+            = &schedule.getInstance();
 
         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::best_schedule = &schedule;
-        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::num_nodes = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance->numberOfVertices();
-        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::num_procs = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance->numberOfProcessors();
+        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::num_nodes
+            = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance->numberOfVertices();
+        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::num_procs
+            = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                  ->numberOfProcessors();
 
         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::set_parameters();
         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::initialize_datastructures();
 
         bool improvement_found = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::run_local_search_simple();
 
-        
-
-        if (improvement_found)
+        if (improvement_found) {
             return RETURN_STATUS::OSP_SUCCESS;
-        else
+        } else {
             return RETURN_STATUS::BEST_FOUND;
+        }
     }
 
     RETURN_STATUS improve_schedule_test_2(BspSchedule<Graph_t> &schedule) {
-
         kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance = &schedule.getInstance();
 
         kl_total<Graph_t, MemoryConstraint_t, true>::best_schedule = &schedule;
-        kl_total<Graph_t, MemoryConstraint_t, true>::num_nodes = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfVertices();
-        kl_total<Graph_t, MemoryConstraint_t, true>::num_procs = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfProcessors();
+        kl_total<Graph_t, MemoryConstraint_t, true>::num_nodes
+            = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfVertices();
+        kl_total<Graph_t, MemoryConstraint_t, true>::num_procs
+            = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfProcessors();
 
         kl_total<Graph_t, MemoryConstraint_t, true>::set_parameters();
         kl_total<Graph_t, MemoryConstraint_t, true>::initialize_datastructures();
 
         bool improvement_found = kl_total<Graph_t, MemoryConstraint_t, true>::run_local_search_unlock_delay();
 
-       
-
-        if (improvement_found)
+        if (improvement_found) {
             return RETURN_STATUS::OSP_SUCCESS;
-        else
+        } else {
             return RETURN_STATUS::BEST_FOUND;
+        }
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp
index eea392dc..d10c6109 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp
@@ -27,589 +27,1061 @@ limitations under the License.
 #include "kl_total.hpp"
 
 namespace osp {
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, bool use_node_communication_costs_arg = true>
-class kl_total_cut : public kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg> {
 
+template <typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, bool use_node_communication_costs_arg = true>
+class kl_total_cut : public kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg> {
   protected:
     double max_edge_weight = 0.0;
 
-    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node, unsigned current_step, unsigned current_proc,
+    virtual void compute_comm_gain(vertex_idx_t<Graph_t> node,
+                                   unsigned current_step,
+                                   unsigned current_proc,
                                    unsigned new_proc) override {
-
-        if constexpr (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.use_node_communication_costs) {
-
+        if constexpr (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                          .use_node_communication_costs) {
             if (current_proc == new_proc) {
-
-                for (const auto &target : kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().children(node)) {
-
-                    const unsigned &target_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target);
-                    const double loss =
-                        static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) *
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, target_proc) *
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                    if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
-
-                    } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step + 1) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= loss;
-
-                    } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step - 1) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= loss;
+                for (const auto &target :
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                         ->getComputationalDag()
+                         .children(node)) {
+                    const unsigned &target_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(target);
+                    const double loss
+                        = static_cast<double>(
+                              kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                  ->getComputationalDag()
+                                  .vertex_comm_weight(node))
+                          * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                ->communicationCosts(new_proc, target_proc)
+                          * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.comm_multiplier;
+
+                    if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.vector_schedule
+                            .assignedSuperstep(target)
+                        == current_step) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            += loss;
+
+                    } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                   .vector_schedule.assignedSuperstep(target)
+                               == current_step + 1) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= loss;
+
+                    } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                   .vector_schedule.assignedSuperstep(target)
+                               == current_step - 1) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= loss;
                     }
 
-                    if ((current_step + 1 == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) &&
-                         current_proc != kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target)) ||
-                        (current_step == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) &&
-                         current_proc == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target))) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                    } else if ((current_step == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) &&
-                                current_proc != kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target)) ||
-                               (current_step - 1 == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) &&
-                                current_proc == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target))) {
-
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][0] +=
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) + kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                    if ((current_step + 1
+                             == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .vector_schedule.assignedSuperstep(target)
+                         && current_proc
+                                != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedProcessor(target))
+                        || (current_step
+                                == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                            && current_proc
+                                   == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .vector_schedule.assignedProcessor(target))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][2]
+                            -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                    } else if ((current_step
+                                    == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .vector_schedule.assignedSuperstep(target)
+                                && current_proc
+                                       != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(target))
+                               || (current_step - 1
+                                       == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(target)
+                                   && current_proc
+                                          == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                                 .vector_schedule.assignedProcessor(target))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][0]
+                            += static_cast<double>(
+                                   kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .instance->getComputationalDag()
+                                       .vertex_comm_weight(node))
+                               + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                     }
                 }
 
-                for (const auto &source : kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().parents(node)) {
-
-                    const unsigned &source_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source);
-                    const double loss =
-                        static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) *
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, source_proc) *
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                    if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
-
-                    } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step + 1) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= loss;
-
-                    } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step - 1) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= loss;
+                for (const auto &source :
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                         ->getComputationalDag()
+                         .parents(node)) {
+                    const unsigned &source_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(source);
+                    const double loss
+                        = static_cast<double>(
+                              kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                  ->getComputationalDag()
+                                  .vertex_comm_weight(source))
+                          * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                ->communicationCosts(new_proc, source_proc)
+                          * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.comm_multiplier;
+
+                    if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.vector_schedule
+                            .assignedSuperstep(source)
+                        == current_step) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            += loss;
+
+                    } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                   .vector_schedule.assignedSuperstep(source)
+                               == current_step + 1) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= loss;
+
+                    } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                   .vector_schedule.assignedSuperstep(source)
+                               == current_step - 1) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= loss;
                     }
 
-                    if ((current_step - 1 == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) &&
-                         current_proc != kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source)) ||
-                        (current_step == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) &&
-                         current_proc == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source))) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                    } else if ((current_step == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) &&
-                                current_proc != kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source)) ||
-                               (current_step + 1 == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) &&
-                                current_proc == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source))) {
-
-                                    kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][2] +=
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) +
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                    if ((current_step - 1
+                             == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .vector_schedule.assignedSuperstep(source)
+                         && current_proc
+                                != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedProcessor(source))
+                        || (current_step
+                                == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                            && current_proc
+                                   == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .vector_schedule.assignedProcessor(source))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][0]
+                            -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                    } else if ((current_step
+                                    == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .vector_schedule.assignedSuperstep(source)
+                                && current_proc
+                                       != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(source))
+                               || (current_step + 1
+                                       == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(source)
+                                   && current_proc
+                                          == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                                 .vector_schedule.assignedProcessor(source))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][2]
+                            += static_cast<double>(
+                                   kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .instance->getComputationalDag()
+                                       .vertex_comm_weight(source))
+                               + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                     }
                 }
             } else {
-
                 // current_proc != new_proc
 
-                for (const auto &target : kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().children(node)) {
-
-                    const unsigned &target_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target);
+                for (const auto &target :
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                         ->getComputationalDag()
+                         .children(node)) {
+                    const unsigned &target_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(target);
                     if (target_proc == current_proc) {
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) {
-
-                            const double loss =
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) *
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, target_proc) *
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= loss;
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] += loss;
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target)
+                            == current_step) {
+                            const double loss
+                                = static_cast<double>(
+                                      kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .instance->getComputationalDag()
+                                          .vertex_comm_weight(node))
+                                  * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(new_proc, target_proc)
+                                  * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .comm_multiplier;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= loss;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][0]
+                                += loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][1]
+                                += loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][2]
+                                += loss;
                         }
 
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
                         }
 
                     } else if (target_proc == new_proc) {
-
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, target_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step + 1) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step - 1) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .vertex_comm_weight(node))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, target_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][1]
+                                -= gain;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][2]
+                                -= gain;
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][0]
+                                -= gain;
                         }
 
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) < current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(node))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(node))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                                   < current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(node))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
 
                     } else {
-
                         assert(target_proc != current_proc && target_proc != new_proc);
 
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, target_proc) -
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, target_proc)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->communicationCosts(new_proc, target_proc)
+                                  - kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, target_proc))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->getComputationalDag()
+                                    .vertex_comm_weight(node)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target)
+                            == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target)
+                                   == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(node))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
                     }
                 }
 
-                for (const auto &source : kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().parents(node)) {
-
-                    const unsigned &source_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source);
+                for (const auto &source :
+                     kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                         ->getComputationalDag()
+                         .parents(node)) {
+                    const unsigned &source_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(source);
                     if (source_proc == current_proc) {
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) {
-                            const double loss =
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) *
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, new_proc) *
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= loss;
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] += loss;
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source)
+                            == current_step) {
+                            const double loss
+                                = static_cast<double>(
+                                      kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .instance->getComputationalDag()
+                                          .vertex_comm_weight(source))
+                                  * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, new_proc)
+                                  * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .comm_multiplier;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= loss;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][0]
+                                += loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][1]
+                                += loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][2]
+                                += loss;
                         }
 
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
                         }
 
                     } else if (source_proc == new_proc) {
-
                         assert(source_proc != current_proc);
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, new_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step - 1) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step + 1) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .vertex_comm_weight(source))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, new_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][1]
+                                -= gain;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][0]
+                                -= gain;
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][2]
+                                -= gain;
                         }
 
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(source))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(source))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(source))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
 
                     } else {
-
                         assert(source_proc != current_proc && source_proc != new_proc);
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, source_proc) -
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, source_proc)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->communicationCosts(new_proc, source_proc)
+                                  - kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, source_proc))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->getComputationalDag()
+                                    .vertex_comm_weight(source)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source)
+                            == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source)
+                                   == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .vertex_comm_weight(source))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
                     }
                 }
             }
         } else {
-
             if (current_proc == new_proc) {
-
-                for (const auto &out_edge : out_edges(node, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-                    const auto &target_v = target(out_edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-                    const unsigned &target_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v);
-
-                    const double loss =
-                        static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) *
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, target_proc) * 
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                    if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
-
-                    } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step + 1) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= loss;
-
-                    } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step - 1) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= loss;
+                for (const auto &out_edge :
+                     out_edges(node,
+                               kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                   ->getComputationalDag())) {
+                    const auto &target_v
+                        = target(out_edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag());
+                    const unsigned &target_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(target_v);
+
+                    const double loss
+                        = static_cast<double>(
+                              kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                  ->getComputationalDag()
+                                  .edge_comm_weight(out_edge))
+                          * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                ->communicationCosts(new_proc, target_proc)
+                          * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.comm_multiplier;
+
+                    if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.vector_schedule
+                            .assignedSuperstep(target_v)
+                        == current_step) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            += loss;
+
+                    } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                   .vector_schedule.assignedSuperstep(target_v)
+                               == current_step + 1) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= loss;
+
+                    } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                   .vector_schedule.assignedSuperstep(target_v)
+                               == current_step - 1) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= loss;
                     }
 
-                    if ((current_step + 1 == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) &&
-                         current_proc != kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v)) ||
-                        (current_step == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) &&
-                         current_proc == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                    } else if ((current_step == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) &&
-                                current_proc != kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v)) ||
-                               (current_step - 1 == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) &&
-                                current_proc == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][0] +=
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) +
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                    if ((current_step + 1
+                             == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .vector_schedule.assignedSuperstep(target_v)
+                         && current_proc
+                                != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedProcessor(target_v))
+                        || (current_step
+                                == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                            && current_proc
+                                   == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .vector_schedule.assignedProcessor(target_v))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][2]
+                            -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                    } else if ((current_step
+                                    == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .vector_schedule.assignedSuperstep(target_v)
+                                && current_proc
+                                       != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(target_v))
+                               || (current_step - 1
+                                       == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(target_v)
+                                   && current_proc
+                                          == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                                 .vector_schedule.assignedProcessor(target_v))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][0]
+                            += static_cast<double>(
+                                   kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .instance->getComputationalDag()
+                                       .edge_comm_weight(out_edge))
+                               + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                     }
                 }
 
-                for (const auto &in_edge : in_edges(node, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-                    const auto &source_v = source(in_edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-                    const unsigned &source_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v);
-
-                    const double loss =
-                        static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) * 
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, source_proc) *
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                    if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
-
-                    } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step + 1) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= loss;
-
-                    } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step - 1) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += loss;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= loss;
+                for (const auto &in_edge :
+                     in_edges(node,
+                              kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                  ->getComputationalDag())) {
+                    const auto &source_v
+                        = source(in_edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag());
+                    const unsigned &source_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(source_v);
+
+                    const double loss
+                        = static_cast<double>(
+                              kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                  ->getComputationalDag()
+                                  .edge_comm_weight(in_edge))
+                          * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                ->communicationCosts(new_proc, source_proc)
+                          * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.comm_multiplier;
+
+                    if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.vector_schedule
+                            .assignedSuperstep(source_v)
+                        == current_step) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            -= loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            += loss;
+
+                    } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                   .vector_schedule.assignedSuperstep(source_v)
+                               == current_step + 1) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= loss;
+
+                    } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                   .vector_schedule.assignedSuperstep(source_v)
+                               == current_step - 1) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += loss;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= loss;
                     }
 
-                    if ((current_step - 1 == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) &&
-                         current_proc != kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v)) ||
-                        (current_step == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) &&
-                         current_proc == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                    } else if ((current_step == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) &&
-                                current_proc != kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v)) ||
-                               (current_step + 1 == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) &&
-                                current_proc == kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v))) {
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][current_proc][2] +=
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) + kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                    if ((current_step - 1
+                             == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .vector_schedule.assignedSuperstep(source_v)
+                         && current_proc
+                                != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedProcessor(source_v))
+                        || (current_step
+                                == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                            && current_proc
+                                   == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .vector_schedule.assignedProcessor(source_v))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][0]
+                            -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                    } else if ((current_step
+                                    == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .vector_schedule.assignedSuperstep(source_v)
+                                && current_proc
+                                       != kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(source_v))
+                               || (current_step + 1
+                                       == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(source_v)
+                                   && current_proc
+                                          == kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                                 .vector_schedule.assignedProcessor(source_v))) {
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][current_proc][2]
+                            += static_cast<double>(
+                                   kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .instance->getComputationalDag()
+                                       .edge_comm_weight(in_edge))
+                               + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                     }
                 }
             } else {
-
                 // current_proc != new_proc
 
-                for (const auto &out_edge : out_edges(node, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-
-                    const auto &target_v = target(out_edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-                    const unsigned &target_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v);
+                for (const auto &out_edge :
+                     out_edges(node,
+                               kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                   ->getComputationalDag())) {
+                    const auto &target_v
+                        = target(out_edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag());
+                    const unsigned &target_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(target_v);
 
                     if (target_proc == current_proc) {
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) {
-
-                            const double loss =
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) *
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, target_proc) *
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= loss;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] += loss;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target_v)
+                            == current_step) {
+                            const double loss
+                                = static_cast<double>(
+                                      kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .instance->getComputationalDag()
+                                          .edge_comm_weight(out_edge))
+                                  * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(new_proc, target_proc)
+                                  * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .comm_multiplier;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= loss;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][0]
+                                += loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][1]
+                                += loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][2]
+                                += loss;
                         }
 
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
                         }
 
                     } else if (target_proc == new_proc) {
-
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, target_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step + 1) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step - 1) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .edge_comm_weight(out_edge))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, target_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][1]
+                                -= gain;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][2]
+                                -= gain;
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][0]
+                                -= gain;
                         }
 
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(out_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(out_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(out_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
 
                     } else {
-
                         assert(target_proc != current_proc && target_proc != new_proc);
 
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, target_proc) -
-                                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, target_proc)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->communicationCosts(new_proc, target_proc)
+                                  - kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, target_proc))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->getComputationalDag()
+                                    .edge_comm_weight(out_edge)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(target_v)
+                            == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(target_v)
+                                   == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(out_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
                     }
                 }
 
-                for (const auto &in_edge : in_edges(node, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-                    const auto &source_v = source(in_edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-
-                    const unsigned &source_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v);
+                for (const auto &in_edge :
+                     in_edges(node,
+                              kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                  ->getComputationalDag())) {
+                    const auto &source_v
+                        = source(in_edge,
+                                 kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                     .instance->getComputationalDag());
+
+                    const unsigned &source_proc
+                        = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                              .vector_schedule.assignedProcessor(source_v);
                     if (source_proc == current_proc) {
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) {
-
-                            const double loss =
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) *
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, new_proc) *
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= loss;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= loss;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] -= loss;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] += loss;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] += loss;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] += loss;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source_v)
+                            == current_step) {
+                            const double loss
+                                = static_cast<double>(
+                                      kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .instance->getComputationalDag()
+                                          .edge_comm_weight(in_edge))
+                                  * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, new_proc)
+                                  * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .comm_multiplier;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                -= loss;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][0]
+                                += loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][1]
+                                += loss;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][2]
+                                += loss;
                         }
 
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
                         }
                     } else if (source_proc == new_proc) {
-
                         assert(source_proc != current_proc);
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, new_proc) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step - 1) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step + 1) {
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->getComputationalDag()
+                                      .edge_comm_weight(in_edge))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->communicationCosts(current_proc, new_proc)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][1]
+                                -= gain;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                                   == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][0]
+                                -= gain;
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += gain;
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                         [new_proc][2]
+                                -= gain;
                         }
 
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step + 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source_v)
+                            == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(in_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(in_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                                   == current_step + 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(in_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
 
                     } else {
-
                         assert(source_proc != current_proc && source_proc != new_proc);
-                        const double gain =
-                            static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(new_proc, source_proc) -
-                                     kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(current_proc, source_proc)) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge) *
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][1] += gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] += gain;
-
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][0] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][1] -= gain;
-                        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_change_in_costs[node][new_proc][2] -= gain;
-
-                        if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step - 1) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][0] -= kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::penalty;
-
-                        } else if (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) {
-
-                            kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::node_gains[node][new_proc][2] +=
-                                static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) +
-                                kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::reward;
+                        const double gain
+                            = static_cast<double>(
+                                  kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                      .instance->communicationCosts(new_proc, source_proc)
+                                  - kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                        .instance->communicationCosts(current_proc, source_proc))
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .instance->getComputationalDag()
+                                    .edge_comm_weight(in_edge)
+                              * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                    .comm_multiplier;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][1]
+                            += gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                            += gain;
+
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][0]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][1]
+                            -= gain;
+                        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_change_in_costs[node]
+                                                                                                                     [new_proc][2]
+                            -= gain;
+
+                        if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                .vector_schedule.assignedSuperstep(source_v)
+                            == current_step - 1) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][0]
+                                -= kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::penalty;
+
+                        } else if (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                       .vector_schedule.assignedSuperstep(source_v)
+                                   == current_step) {
+                            kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::node_gains[node][new_proc][2]
+                                += static_cast<double>(
+                                       kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                           .instance->getComputationalDag()
+                                           .edge_comm_weight(in_edge))
+                                   + kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::reward;
                         }
                     }
                 }
@@ -618,52 +1090,75 @@ class kl_total_cut : public kl_total<Graph_t, MemoryConstraint_t, use_node_commu
     }
 
     virtual double compute_current_costs() override {
-
         double work_costs = 0;
-        for (unsigned step = 0; step < kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.num_steps(); step++) {
-            work_costs += kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.step_max_work[step];
+        for (unsigned step = 0;
+             step < kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.num_steps();
+             step++) {
+            work_costs
+                += kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.step_max_work[step];
         }
 
         double comm_costs = 0;
-        for (const auto &edge : edges(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag())) {
-
-            const vertex_idx_t<Graph_t> &source_v = source(edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-            const vertex_idx_t<Graph_t> &target_v = target(edge, kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag());
-            const unsigned &source_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(source_v);
-            const unsigned &target_proc = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedProcessor(target_v);
-            const unsigned &source_step = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(source_v);
-            const unsigned &target_step = kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.vector_schedule.assignedSuperstep(target_v);
+        for (const auto &edge : edges(kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                          .instance->getComputationalDag())) {
+            const vertex_idx_t<Graph_t> &source_v
+                = source(edge,
+                         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                             ->getComputationalDag());
+            const vertex_idx_t<Graph_t> &target_v
+                = target(edge,
+                         kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                             ->getComputationalDag());
+            const unsigned &source_proc = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(source_v);
+            const unsigned &target_proc = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedProcessor(target_v);
+            const unsigned &source_step = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(source_v);
+            const unsigned &target_step = kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .vector_schedule.assignedSuperstep(target_v);
 
             if (source_proc != target_proc || source_step != target_step) {
-
-                if constexpr (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.use_node_communication_costs) {
-                    comm_costs += kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().vertex_comm_weight(source_v) *
-                                  kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(source_proc, target_proc);
+                if constexpr (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                  .use_node_communication_costs) {
+                    comm_costs
+                        += kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                               ->getComputationalDag()
+                               .vertex_comm_weight(source_v)
+                           * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                 ->communicationCosts(source_proc, target_proc);
                 } else {
-                    comm_costs += kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->getComputationalDag().edge_comm_weight(edge) *
-                                  kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->communicationCosts(source_proc, target_proc);
+                    comm_costs
+                        += kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                               ->getComputationalDag()
+                               .edge_comm_weight(edge)
+                           * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.instance
+                                 ->communicationCosts(source_proc, target_proc);
                 }
             }
         }
 
-        kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.current_cost =
-            work_costs + comm_costs * kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.comm_multiplier +
-            (kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.num_steps() - 1) * static_cast<double>(kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.instance->synchronisationCosts());
+        kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.current_cost
+            = work_costs
+              + comm_costs
+                    * kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.comm_multiplier
+              + (kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.num_steps() - 1)
+                    * static_cast<double>(kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule
+                                              .instance->synchronisationCosts());
 
-        return kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>::current_schedule.current_cost;
+        return kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>::current_schedule.current_cost;
     }
 
   public:
-    kl_total_cut() : kl_total<Graph_t, MemoryConstraint_t,use_node_communication_costs_arg>() {}
+    kl_total_cut() : kl_total<Graph_t, MemoryConstraint_t, use_node_communication_costs_arg>() {}
 
     virtual ~kl_total_cut() = default;
 
     virtual std::string getScheduleName() const override { return "KLTotalCut"; }
 };
 
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint>
+template <typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint>
 class kl_total_cut_test : public kl_total_cut<Graph_t, MemoryConstraint_t, true> {
-
   public:
     kl_total_cut_test() : kl_total_cut<Graph_t, MemoryConstraint_t, true>() {}
 
@@ -671,13 +1166,18 @@ class kl_total_cut_test : public kl_total_cut<Graph_t, MemoryConstraint_t, true>
 
     virtual std::string getScheduleName() const override { return "KLTotalCutTest"; }
 
-    kl_current_schedule_total<Graph_t, MemoryConstraint_t, true> &get_current_schedule() { return kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule; }
+    kl_current_schedule_total<Graph_t, MemoryConstraint_t, true> &get_current_schedule() {
+        return kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule;
+    }
 
     auto &get_node_gains() { return kl_total<Graph_t, MemoryConstraint_t, true>::node_gains; }
+
     auto &get_node_change_in_costs() { return kl_total<Graph_t, MemoryConstraint_t, true>::node_change_in_costs; }
+
     auto &get_max_gain_heap() { return kl_total<Graph_t, MemoryConstraint_t, true>::max_gain_heap; }
 
-    void initialize_gain_heap_test(const std::unordered_set<vertex_idx_t<Graph_t>> &nodes, double reward_ = 0.0,
+    void initialize_gain_heap_test(const std::unordered_set<vertex_idx_t<Graph_t>> &nodes,
+                                   double reward_ = 0.0,
                                    double penalty_ = 0.0) {
         kl_total<Graph_t, MemoryConstraint_t, true>::reward = reward_;
         kl_total<Graph_t, MemoryConstraint_t, true>::penalty = penalty_;
@@ -686,55 +1186,60 @@ class kl_total_cut_test : public kl_total_cut<Graph_t, MemoryConstraint_t, true>
     }
 
     void test_setup_schedule(BspSchedule<Graph_t> &schedule) {
-
         kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance = &schedule.getInstance();
 
         kl_total<Graph_t, MemoryConstraint_t, true>::best_schedule = &schedule;
 
-        kl_total<Graph_t, MemoryConstraint_t, true>::num_nodes = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfVertices();
-        kl_total<Graph_t, MemoryConstraint_t, true>::num_procs = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfProcessors();
+        kl_total<Graph_t, MemoryConstraint_t, true>::num_nodes
+            = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfVertices();
+        kl_total<Graph_t, MemoryConstraint_t, true>::num_procs
+            = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfProcessors();
 
         kl_total<Graph_t, MemoryConstraint_t, true>::set_parameters();
         kl_total<Graph_t, MemoryConstraint_t, true>::initialize_datastructures();
     }
 
     RETURN_STATUS improve_schedule_test_1(BspSchedule<Graph_t> &schedule) {
-
         kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance = &schedule.getInstance();
 
         kl_total<Graph_t, MemoryConstraint_t, true>::best_schedule = &schedule;
-        kl_total<Graph_t, MemoryConstraint_t, true>::num_nodes = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfVertices();
-        kl_total<Graph_t, MemoryConstraint_t, true>::num_procs = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfProcessors();
+        kl_total<Graph_t, MemoryConstraint_t, true>::num_nodes
+            = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfVertices();
+        kl_total<Graph_t, MemoryConstraint_t, true>::num_procs
+            = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfProcessors();
 
         kl_total<Graph_t, MemoryConstraint_t, true>::set_parameters();
         kl_total<Graph_t, MemoryConstraint_t, true>::initialize_datastructures();
 
         bool improvement_found = kl_total<Graph_t, MemoryConstraint_t, true>::run_local_search_simple();
 
-        if (improvement_found)
+        if (improvement_found) {
             return RETURN_STATUS::OSP_SUCCESS;
-        else
+        } else {
             return RETURN_STATUS::BEST_FOUND;
+        }
     }
 
     RETURN_STATUS improve_schedule_test_2(BspSchedule<Graph_t> &schedule) {
-
         kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance = &schedule.getInstance();
 
         kl_total<Graph_t, MemoryConstraint_t, true>::best_schedule = &schedule;
-        kl_total<Graph_t, MemoryConstraint_t, true>::num_nodes = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfVertices();
-        kl_total<Graph_t, MemoryConstraint_t, true>::num_procs = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfProcessors();
+        kl_total<Graph_t, MemoryConstraint_t, true>::num_nodes
+            = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfVertices();
+        kl_total<Graph_t, MemoryConstraint_t, true>::num_procs
+            = kl_total<Graph_t, MemoryConstraint_t, true>::current_schedule.instance->numberOfProcessors();
 
         kl_total<Graph_t, MemoryConstraint_t, true>::set_parameters();
         kl_total<Graph_t, MemoryConstraint_t, true>::initialize_datastructures();
 
         bool improvement_found = kl_total<Graph_t, MemoryConstraint_t, true>::run_local_search_unlock_delay();
 
-        if (improvement_found)
+        if (improvement_found) {
             return RETURN_STATUS::OSP_SUCCESS;
-        else
+        } else {
             return RETURN_STATUS::BEST_FOUND;
+        }
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp
index 8fb1ceff..07537551 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp
@@ -27,17 +27,25 @@ namespace osp {
 struct EagerCommCostPolicy {
     using ValueType = unsigned;
 
-    template<typename DS, typename comm_weight_t>
-    static inline void attribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step,
-                                               const unsigned u_proc, const unsigned v_proc, const unsigned v_step,
+    template <typename DS, typename comm_weight_t>
+    static inline void attribute_communication(DS &ds,
+                                               const comm_weight_t &cost,
+                                               const unsigned u_step,
+                                               const unsigned u_proc,
+                                               const unsigned v_proc,
+                                               const unsigned v_step,
                                                const ValueType &val) {
         ds.step_proc_receive(u_step, v_proc) += cost;
         ds.step_proc_send(u_step, u_proc) += cost;
     }
 
-    template<typename DS, typename comm_weight_t>
-    static inline void unattribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step,
-                                                 const unsigned u_proc, const unsigned v_proc, const unsigned v_step,
+    template <typename DS, typename comm_weight_t>
+    static inline void unattribute_communication(DS &ds,
+                                                 const comm_weight_t &cost,
+                                                 const unsigned u_step,
+                                                 const unsigned u_proc,
+                                                 const unsigned v_proc,
+                                                 const unsigned v_step,
                                                  const ValueType &val) {
         ds.step_proc_receive(u_step, v_proc) -= cost;
         ds.step_proc_send(u_step, u_proc) -= cost;
@@ -59,9 +67,13 @@ struct EagerCommCostPolicy {
 
     static inline bool is_single_entry(const ValueType &val) { return val == 1; }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_remove(const ValueType &val, unsigned child_step, unsigned parent_step,
-                                              unsigned parent_proc, unsigned child_proc, comm_weight_t cost,
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_remove(const ValueType &val,
+                                              unsigned child_step,
+                                              unsigned parent_step,
+                                              unsigned parent_proc,
+                                              unsigned child_proc,
+                                              comm_weight_t cost,
                                               DeltaTracker &dt) {
         if (val == 1) {
             dt.add(true, parent_step, child_proc, -cost);
@@ -69,9 +81,13 @@ struct EagerCommCostPolicy {
         }
     }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_add(const ValueType &val, unsigned child_step, unsigned parent_step,
-                                           unsigned parent_proc, unsigned child_proc, comm_weight_t cost,
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_add(const ValueType &val,
+                                           unsigned child_step,
+                                           unsigned parent_step,
+                                           unsigned parent_proc,
+                                           unsigned child_proc,
+                                           comm_weight_t cost,
                                            DeltaTracker &dt) {
         if (val == 0) {
             dt.add(true, parent_step, child_proc, cost);
@@ -79,9 +95,9 @@ struct EagerCommCostPolicy {
         }
     }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_outgoing(const ValueType &val, unsigned node_step, unsigned node_proc,
-                                                unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) {
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_outgoing(
+        const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) {
         if (val > 0) {
             comm_weight_t total_cost = cost * val;
             dt.add(true, node_step, child_proc, total_cost);
@@ -93,15 +109,20 @@ struct EagerCommCostPolicy {
 struct LazyCommCostPolicy {
     using ValueType = std::vector<unsigned>;
 
-    template<typename DS, typename comm_weight_t>
-    static inline void attribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step,
-                                               const unsigned u_proc, const unsigned v_proc, const unsigned v_step,
+    template <typename DS, typename comm_weight_t>
+    static inline void attribute_communication(DS &ds,
+                                               const comm_weight_t &cost,
+                                               const unsigned u_step,
+                                               const unsigned u_proc,
+                                               const unsigned v_proc,
+                                               const unsigned v_step,
                                                const ValueType &val) {
         // val contains v_step (already added).
         // Check if v_step is the new minimum.
         unsigned min_step = std::numeric_limits<unsigned>::max();
-        for (unsigned s : val)
+        for (unsigned s : val) {
             min_step = std::min(min_step, s);
+        }
 
         if (min_step == v_step) {
             // Check if it was strictly smaller than previous min.
@@ -123,9 +144,13 @@ struct LazyCommCostPolicy {
         }
     }
 
-    template<typename DS, typename comm_weight_t>
-    static inline void unattribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step,
-                                                 const unsigned u_proc, const unsigned v_proc, const unsigned v_step,
+    template <typename DS, typename comm_weight_t>
+    static inline void unattribute_communication(DS &ds,
+                                                 const comm_weight_t &cost,
+                                                 const unsigned u_step,
+                                                 const unsigned u_proc,
+                                                 const unsigned v_proc,
+                                                 const unsigned v_step,
                                                  const ValueType &val) {
         // val is state AFTER removal.
 
@@ -138,8 +163,9 @@ struct LazyCommCostPolicy {
         } else {
             // Check if v_step was the unique minimum.
             unsigned new_min = val[0];
-            for (unsigned s : val)
+            for (unsigned s : val) {
                 new_min = std::min(new_min, s);
+            }
 
             if (v_step < new_min) {
                 // v_step was the unique minimum.
@@ -157,11 +183,13 @@ struct LazyCommCostPolicy {
 
     static inline bool add_child(ValueType &val, unsigned step) {
         val.push_back(step);
-        if (val.size() == 1)
+        if (val.size() == 1) {
             return true;
+        }
         unsigned min_s = val[0];
-        for (unsigned s : val)
+        for (unsigned s : val) {
             min_s = std::min(min_s, s);
+        }
         return step == min_s;
     }
 
@@ -173,8 +201,9 @@ struct LazyCommCostPolicy {
                 return true;
             }
             unsigned new_min = val[0];
-            for (unsigned s : val)
+            for (unsigned s : val) {
                 new_min = std::min(new_min, s);
+            }
             bool res = step < new_min;
             return res;
         }
@@ -187,21 +216,29 @@ struct LazyCommCostPolicy {
 
     static inline bool is_single_entry(const ValueType &val) { return val.size() == 1; }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_remove(const ValueType &val, unsigned child_step, unsigned parent_step,
-                                              unsigned parent_proc, unsigned child_proc, comm_weight_t cost,
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_remove(const ValueType &val,
+                                              unsigned child_step,
+                                              unsigned parent_step,
+                                              unsigned parent_proc,
+                                              unsigned child_proc,
+                                              comm_weight_t cost,
                                               DeltaTracker &dt) {
-        if (val.empty())
+        if (val.empty()) {
             return;
+        }
         unsigned min_s = val[0];
-        for (unsigned s : val)
+        for (unsigned s : val) {
             min_s = std::min(min_s, s);
+        }
 
         if (child_step == min_s) {
             int count = 0;
-            for (unsigned s : val)
-                if (s == min_s)
+            for (unsigned s : val) {
+                if (s == min_s) {
                     count++;
+                }
+            }
 
             if (count == 1) {
                 if (min_s > 0) {
@@ -211,8 +248,9 @@ struct LazyCommCostPolicy {
                 if (val.size() > 1) {
                     unsigned next_min = std::numeric_limits<unsigned>::max();
                     for (unsigned s : val) {
-                        if (s != min_s)
+                        if (s != min_s) {
                             next_min = std::min(next_min, s);
+                        }
                     }
                     if (next_min != std::numeric_limits<unsigned>::max() && next_min > 0) {
                         dt.add(true, next_min - 1, child_proc, cost);
@@ -223,9 +261,13 @@ struct LazyCommCostPolicy {
         }
     }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_add(const ValueType &val, unsigned child_step, unsigned parent_step,
-                                           unsigned parent_proc, unsigned child_proc, comm_weight_t cost,
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_add(const ValueType &val,
+                                           unsigned child_step,
+                                           unsigned parent_step,
+                                           unsigned parent_proc,
+                                           unsigned child_proc,
+                                           comm_weight_t cost,
                                            DeltaTracker &dt) {
         if (val.empty()) {
             if (child_step > 0) {
@@ -234,8 +276,9 @@ struct LazyCommCostPolicy {
             }
         } else {
             unsigned min_s = val[0];
-            for (unsigned s : val)
+            for (unsigned s : val) {
                 min_s = std::min(min_s, s);
+            }
 
             if (child_step < min_s) {
                 if (min_s > 0) {
@@ -250,9 +293,9 @@ struct LazyCommCostPolicy {
         }
     }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_outgoing(const ValueType &val, unsigned node_step, unsigned node_proc,
-                                                unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) {
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_outgoing(
+        const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) {
         for (unsigned s : val) {
             if (s > 0) {
                 dt.add(true, s - 1, child_proc, cost);
@@ -265,20 +308,26 @@ struct LazyCommCostPolicy {
 struct BufferedCommCostPolicy {
     using ValueType = std::vector<unsigned>;
 
-    template<typename DS, typename comm_weight_t>
-    static inline void attribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step,
-                                               const unsigned u_proc, const unsigned v_proc, const unsigned v_step,
+    template <typename DS, typename comm_weight_t>
+    static inline void attribute_communication(DS &ds,
+                                               const comm_weight_t &cost,
+                                               const unsigned u_step,
+                                               const unsigned u_proc,
+                                               const unsigned v_proc,
+                                               const unsigned v_step,
                                                const ValueType &val) {
         // Buffered: Send at u_step, Receive at v_step - 1.
 
         unsigned min_step = std::numeric_limits<unsigned>::max();
-        for (unsigned s : val)
+        for (unsigned s : val) {
             min_step = std::min(min_step, s);
+        }
 
         if (min_step == v_step) {
             unsigned prev_min = std::numeric_limits<unsigned>::max();
-            for (size_t i = 0; i < val.size() - 1; ++i)
+            for (size_t i = 0; i < val.size() - 1; ++i) {
                 prev_min = std::min(prev_min, val[i]);
+            }
 
             if (v_step < prev_min) {
                 if (prev_min != std::numeric_limits<unsigned>::max() && prev_min > 0) {
@@ -297,23 +346,28 @@ struct BufferedCommCostPolicy {
         }
     }
 
-    template<typename DS, typename comm_weight_t>
-    static inline void unattribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step,
-                                                 const unsigned u_proc, const unsigned v_proc, const unsigned v_step,
+    template <typename DS, typename comm_weight_t>
+    static inline void unattribute_communication(DS &ds,
+                                                 const comm_weight_t &cost,
+                                                 const unsigned u_step,
+                                                 const unsigned u_proc,
+                                                 const unsigned v_proc,
+                                                 const unsigned v_step,
                                                  const ValueType &val) {
         // val is state AFTER removal.
 
         if (val.empty()) {
             // Removed last child.
-            ds.step_proc_send(u_step, u_proc) -= cost; // Send side
+            ds.step_proc_send(u_step, u_proc) -= cost;    // Send side
             if (v_step > 0) {
-                ds.step_proc_receive(v_step - 1, v_proc) -= cost; // Recv side
+                ds.step_proc_receive(v_step - 1, v_proc) -= cost;    // Recv side
             }
         } else {
             // Check if v_step was unique minimum for Recv side.
             unsigned new_min = val[0];
-            for (unsigned s : val)
+            for (unsigned s : val) {
                 new_min = std::min(new_min, s);
+            }
 
             if (v_step < new_min) {
                 if (v_step > 0) {
@@ -329,76 +383,96 @@ struct BufferedCommCostPolicy {
 
     static inline bool add_child(ValueType &val, unsigned step) {
         val.push_back(step);
-        if (val.size() == 1)
-            return true; // Need update for send side
+        if (val.size() == 1) {
+            return true;    // Need update for send side
+        }
         unsigned min_s = val[0];
-        for (unsigned s : val)
+        for (unsigned s : val) {
             min_s = std::min(min_s, s);
-        return step == min_s; // Need update for recv side
+        }
+        return step == min_s;    // Need update for recv side
     }
 
     static inline bool remove_child(ValueType &val, unsigned step) {
         auto it = std::find(val.begin(), val.end(), step);
         if (it != val.end()) {
             val.erase(it);
-            if (val.empty())
-                return true; // Need update for send side
+            if (val.empty()) {
+                return true;    // Need update for send side
+            }
             unsigned new_min = val[0];
-            for (unsigned s : val)
+            for (unsigned s : val) {
                 new_min = std::min(new_min, s);
-            return step < new_min; // Need update for recv side
+            }
+            return step < new_min;    // Need update for recv side
         }
         return false;
     }
 
     static inline void reset(ValueType &val) { val.clear(); }
+
     static inline bool has_entry(const ValueType &val) { return !val.empty(); }
+
     static inline bool is_single_entry(const ValueType &val) { return val.size() == 1; }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_remove(const ValueType &val, unsigned child_step, unsigned parent_step,
-                                              unsigned parent_proc, unsigned child_proc, comm_weight_t cost,
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_remove(const ValueType &val,
+                                              unsigned child_step,
+                                              unsigned parent_step,
+                                              unsigned parent_proc,
+                                              unsigned child_proc,
+                                              comm_weight_t cost,
                                               DeltaTracker &dt) {
         // Lazy: Send and Recv are both at min(child_steps) - 1.
 
-        if (val.empty())
+        if (val.empty()) {
             return;
+        }
 
         unsigned min_s = val[0];
-        for (unsigned s : val)
+        for (unsigned s : val) {
             min_s = std::min(min_s, s);
+        }
 
         if (child_step == min_s) {
             int count = 0;
-            for (unsigned s : val)
-                if (s == min_s)
+            for (unsigned s : val) {
+                if (s == min_s) {
                     count++;
+                }
+            }
 
             if (count == 1) {
                 // Unique min being removed.
                 if (min_s > 0) {
-                    dt.add(true, min_s - 1, child_proc, -cost);   // Remove Recv
-                    dt.add(false, min_s - 1, parent_proc, -cost); // Remove Send
+                    dt.add(true, min_s - 1, child_proc, -cost);      // Remove Recv
+                    dt.add(false, min_s - 1, parent_proc, -cost);    // Remove Send
                 }
 
                 if (val.size() > 1) {
                     unsigned next_min = std::numeric_limits<unsigned>::max();
-                    for (unsigned s : val)
-                        if (s != min_s)
+                    for (unsigned s : val) {
+                        if (s != min_s) {
                             next_min = std::min(next_min, s);
+                        }
+                    }
 
                     if (next_min != std::numeric_limits<unsigned>::max() && next_min > 0) {
-                        dt.add(true, next_min - 1, child_proc, cost);   // Add Recv at new min
-                        dt.add(false, next_min - 1, parent_proc, cost); // Add Send at new min
+                        dt.add(true, next_min - 1, child_proc, cost);      // Add Recv at new min
+                        dt.add(false, next_min - 1, parent_proc, cost);    // Add Send at new min
                     }
                 }
             }
         }
     }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_add(const ValueType &val, unsigned child_step, unsigned parent_step,
-                                           unsigned parent_proc, unsigned child_proc, comm_weight_t cost,
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_add(const ValueType &val,
+                                           unsigned child_step,
+                                           unsigned parent_step,
+                                           unsigned parent_proc,
+                                           unsigned child_proc,
+                                           comm_weight_t cost,
                                            DeltaTracker &dt) {
         // Lazy: Send and Recv are both at min(child_steps) - 1.
 
@@ -410,26 +484,27 @@ struct BufferedCommCostPolicy {
             }
         } else {
             unsigned min_s = val[0];
-            for (unsigned s : val)
+            for (unsigned s : val) {
                 min_s = std::min(min_s, s);
+            }
 
             if (child_step < min_s) {
                 // New global minimum.
                 if (min_s > 0) {
-                    dt.add(true, min_s - 1, child_proc, -cost);   // Remove old Recv
-                    dt.add(false, min_s - 1, parent_proc, -cost); // Remove old Send
+                    dt.add(true, min_s - 1, child_proc, -cost);      // Remove old Recv
+                    dt.add(false, min_s - 1, parent_proc, -cost);    // Remove old Send
                 }
                 if (child_step > 0) {
-                    dt.add(true, child_step - 1, child_proc, cost);   // Add new Recv
-                    dt.add(false, child_step - 1, parent_proc, cost); // Add new Send
+                    dt.add(true, child_step - 1, child_proc, cost);      // Add new Recv
+                    dt.add(false, child_step - 1, parent_proc, cost);    // Add new Send
                 }
             }
         }
     }
 
-    template<typename DeltaTracker, typename comm_weight_t>
-    static inline void calculate_delta_outgoing(const ValueType &val, unsigned node_step, unsigned node_proc,
-                                                unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) {
+    template <typename DeltaTracker, typename comm_weight_t>
+    static inline void calculate_delta_outgoing(
+        const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) {
         // Buffered Outgoing (Node -> Children)
         // Node is parent (sender). Pays at node_step.
         // Children are receivers. Pay at child_step - 1.
@@ -444,8 +519,9 @@ struct BufferedCommCostPolicy {
         // But we only pay at min(val) - 1.
         if (!val.empty()) {
             unsigned min_s = val[0];
-            for (unsigned s : val)
+            for (unsigned s : val) {
                 min_s = std::min(min_s, s);
+            }
 
             if (min_s > 0) {
                 dt.add(true, min_s - 1, child_proc, cost);
@@ -454,4 +530,4 @@ struct BufferedCommCostPolicy {
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp
index 623d51d8..e86baada 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp
@@ -24,12 +24,12 @@ limitations under the License.
 
 namespace osp {
 
-template<typename T>
+template <typename T>
 struct DefaultHasEntry {
     static inline bool has_entry(const T &val) { return val != 0; }
 };
 
-template<typename T>
+template <typename T>
 struct DefaultHasEntry<std::vector<T>> {
     static inline bool has_entry(const std::vector<T> &val) { return !val.empty(); }
 };
@@ -40,9 +40,8 @@ struct DefaultHasEntry<std::vector<T>> {
  * This structure tracks information about children assigned to each processor.
  * It uses a 2D vector for dense data.
  */
-template<typename vertex_idx_t, typename ValueType = unsigned, typename HasEntry = DefaultHasEntry<ValueType>>
+template <typename vertex_idx_t, typename ValueType = unsigned, typename HasEntry = DefaultHasEntry<ValueType>>
 struct generic_lambda_vector_container {
-
     /**
      * @brief Range adapter for iterating over non-zero/non-empty processor entries.
      */
@@ -82,12 +81,14 @@ struct generic_lambda_vector_container {
             value_type operator*() const { return std::make_pair(index_, vec_[index_]); }
 
             bool operator==(const lambda_vector_iterator &other) const { return index_ == other.index_; }
+
             bool operator!=(const lambda_vector_iterator &other) const { return !(*this == other); }
         };
 
         lambda_vector_range(const std::vector<ValueType> &vec) : vec_(vec) {}
 
         lambda_vector_iterator begin() { return lambda_vector_iterator(vec_); }
+
         lambda_vector_iterator end() { return lambda_vector_iterator(vec_, static_cast<unsigned>(vec_.size())); }
     };
 
@@ -110,15 +111,11 @@ struct generic_lambda_vector_container {
         return HasEntry::has_entry(node_lambda_vec[node][proc]);
     }
 
-    inline ValueType &get_proc_entry(const vertex_idx_t node, const unsigned proc) {
-        return node_lambda_vec[node][proc];
-    }
+    inline ValueType &get_proc_entry(const vertex_idx_t node, const unsigned proc) { return node_lambda_vec[node][proc]; }
 
-    inline ValueType get_proc_entry(const vertex_idx_t node, const unsigned proc) const {
-        return node_lambda_vec[node][proc];
-    }
+    inline ValueType get_proc_entry(const vertex_idx_t node, const unsigned proc) const { return node_lambda_vec[node][proc]; }
 
     inline auto iterate_proc_entries(const vertex_idx_t node) { return lambda_vector_range(node_lambda_vec[node]); }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp
index 2cf0c631..7fd3693f 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp
@@ -18,20 +18,21 @@ limitations under the License.
 
 #pragma once
 
+#include <array>
+
 #include "../kl_active_schedule.hpp"
 #include "../kl_improver.hpp"
 #include "max_comm_datastructure.hpp"
-#include <array>
 
 namespace osp {
 
 // A lightweight helper to track deltas without hash maps or repeated allocations.
 // Uses a dense vector for O(1) lookups and a sparse list for fast iteration/clearing.
-template<typename comm_weight_t>
+template <typename comm_weight_t>
 struct FastDeltaTracker {
-    std::vector<comm_weight_t> dense_vals;  // Size: num_procs
-    std::vector<unsigned> dirty_procs;      // List of modified indices
-    std::vector<unsigned> proc_dirty_index; // Map proc -> index in dirty_procs (num_procs if not dirty)
+    std::vector<comm_weight_t> dense_vals;     // Size: num_procs
+    std::vector<unsigned> dirty_procs;         // List of modified indices
+    std::vector<unsigned> proc_dirty_index;    // Map proc -> index in dirty_procs (num_procs if not dirty)
     unsigned num_procs = 0;
 
     void initialize(unsigned n_procs) {
@@ -44,8 +45,9 @@ struct FastDeltaTracker {
     }
 
     inline void add(unsigned proc, comm_weight_t val) {
-        if (val == 0)
+        if (val == 0) {
             return;
+        }
 
         // If currently 0, it is becoming dirty
         if (dense_vals[proc] == 0) {
@@ -71,8 +73,9 @@ struct FastDeltaTracker {
     }
 
     inline comm_weight_t get(unsigned proc) const {
-        if (proc < dense_vals.size())
+        if (proc < dense_vals.size()) {
             return dense_vals[proc];
+        }
         return 0;
     }
 
@@ -85,9 +88,8 @@ struct FastDeltaTracker {
     }
 };
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1>
 struct kl_bsp_comm_cost_function {
-
     using VertexType = vertex_idx_t<Graph_t>;
     using kl_move = kl_move_struct<cost_t, VertexType>;
     using kl_gain_update_info = kl_update_info<VertexType>;
@@ -104,22 +106,24 @@ struct kl_bsp_comm_cost_function {
     max_comm_datastructure<Graph_t, cost_t, kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>> comm_ds;
 
     inline cost_t get_comm_multiplier() { return 1; }
+
     inline cost_t get_max_comm_weight() { return comm_ds.max_comm_weight; }
+
     inline cost_t get_max_comm_weight_multiplied() { return comm_ds.max_comm_weight; }
+
     inline const std::string name() const { return "bsp_comm"; }
-    inline bool is_compatible(VertexType node, unsigned proc) {
-        return active_schedule->getInstance().isCompatible(node, proc);
-    }
+
+    inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); }
+
     inline unsigned start_idx(const unsigned node_step, const unsigned start_step) {
         return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0;
     }
+
     inline unsigned end_idx(const unsigned node_step, const unsigned end_step) {
-        return (node_step + window_size <= end_step) ? window_range
-                                                     : window_range - (node_step + window_size - end_step);
+        return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step);
     }
 
-    void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched,
-                    CompatibleProcessorRange<Graph_t> &p_range) {
+    void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched, CompatibleProcessorRange<Graph_t> &p_range) {
         active_schedule = &sched;
         proc_range = &p_range;
         instance = &sched.getInstance();
@@ -135,14 +139,13 @@ struct kl_bsp_comm_cost_function {
         return comm_ds.get_pre_move_comm_data(move);
     }
 
-    void compute_send_receive_datastructures() {
-        comm_ds.compute_comm_datastructures(0, active_schedule->num_steps() - 1);
-    }
+    void compute_send_receive_datastructures() { comm_ds.compute_comm_datastructures(0, active_schedule->num_steps() - 1); }
 
-    template<bool compute_datastructures = true>
+    template <bool compute_datastructures = true>
     cost_t compute_schedule_cost() {
-        if constexpr (compute_datastructures)
+        if constexpr (compute_datastructures) {
             compute_send_receive_datastructures();
+        }
 
         cost_t total_cost = 0;
         for (unsigned step = 0; step < active_schedule->num_steps(); step++) {
@@ -165,11 +168,11 @@ struct kl_bsp_comm_cost_function {
 
     // Structure to hold thread-local scratchpads to avoid re-allocation.
     struct ScratchData {
-        std::vector<FastDeltaTracker<comm_weight_t>> send_deltas; // Size: num_steps
-        std::vector<FastDeltaTracker<comm_weight_t>> recv_deltas; // Size: num_steps
+        std::vector<FastDeltaTracker<comm_weight_t>> send_deltas;    // Size: num_steps
+        std::vector<FastDeltaTracker<comm_weight_t>> recv_deltas;    // Size: num_steps
 
-        std::vector<unsigned> active_steps; // List of steps touched in current operation
-        std::vector<bool> step_is_active;   // Fast lookup for active steps
+        std::vector<unsigned> active_steps;    // List of steps touched in current operation
+        std::vector<bool> step_is_active;      // Fast lookup for active steps
 
         std::vector<std::pair<unsigned, comm_weight_t>> child_cost_buffer;
 
@@ -181,10 +184,12 @@ struct kl_bsp_comm_cost_function {
                 active_steps.reserve(n_steps);
             }
 
-            for (auto &tracker : send_deltas)
+            for (auto &tracker : send_deltas) {
                 tracker.initialize(n_procs);
-            for (auto &tracker : recv_deltas)
+            }
+            for (auto &tracker : recv_deltas) {
                 tracker.initialize(n_procs);
+            }
 
             child_cost_buffer.reserve(n_procs);
         }
@@ -207,10 +212,13 @@ struct kl_bsp_comm_cost_function {
         }
     };
 
-    template<typename affinity_table_t>
-    void compute_comm_affinity(VertexType node, affinity_table_t &affinity_table_node, const cost_t &penalty,
-                               const cost_t &reward, const unsigned start_step, const unsigned end_step) {
-
+    template <typename affinity_table_t>
+    void compute_comm_affinity(VertexType node,
+                               affinity_table_t &affinity_table_node,
+                               const cost_t &penalty,
+                               const cost_t &reward,
+                               const unsigned start_step,
+                               const unsigned end_step) {
         // Use static thread_local scratchpad to avoid allocation in hot loop
         static thread_local ScratchData scratch;
         scratch.init(active_schedule->num_steps(), instance->numberOfProcessors());
@@ -286,14 +294,16 @@ struct kl_bsp_comm_cost_function {
         const auto &current_vec_schedule = active_schedule->getVectorSchedule();
 
         auto add_delta = [&](bool is_recv, unsigned step, unsigned proc, comm_weight_t val) {
-            if (val == 0)
+            if (val == 0) {
                 return;
+            }
             if (step < active_schedule->num_steps()) {
                 scratch.mark_active(step);
-                if (is_recv)
+                if (is_recv) {
                     scratch.recv_deltas[step].add(proc, val);
-                else
+                } else {
                     scratch.send_deltas[step].add(proc, val);
+                }
             }
         };
 
@@ -337,7 +347,6 @@ struct kl_bsp_comm_cost_function {
         // 2. Add Node to Target (Iterate candidates)
 
         for (const unsigned p_to : proc_range->compatible_processors_vertex(node)) {
-
             // --- Part A: Incoming Edges (Parents -> p_to) ---
             // These updates are specific to p_to but independent of s_to.
             // We apply them, run the s_to loop, then revert them.
@@ -352,8 +361,9 @@ struct kl_bsp_comm_cost_function {
                     unsigned count_on_p_to = comm_ds.node_lambda_map.get_proc_entry(u, p_to);
 
                     if (p_to == node_proc) {
-                        if (count_on_p_to > 0)
+                        if (count_on_p_to > 0) {
                             count_on_p_to--;
+                        }
                     }
 
                     if (count_on_p_to > 0) {
@@ -404,11 +414,8 @@ struct kl_bsp_comm_cost_function {
                 for (unsigned step : scratch.active_steps) {
                     // Check if dirty_procs is empty implies no change for this step
                     // FastDeltaTracker ensures dirty_procs is empty if all deltas summed to 0
-                    if (!scratch.send_deltas[step].dirty_procs.empty() ||
-                        !scratch.recv_deltas[step].dirty_procs.empty()) {
-
-                        total_change +=
-                            calculate_step_cost_change(step, scratch.send_deltas[step], scratch.recv_deltas[step]);
+                    if (!scratch.send_deltas[step].dirty_procs.empty() || !scratch.recv_deltas[step].dirty_procs.empty()) {
+                        total_change += calculate_step_cost_change(step, scratch.send_deltas[step], scratch.recv_deltas[step]);
                     }
                 }
 
@@ -433,11 +440,13 @@ struct kl_bsp_comm_cost_function {
                     bool already_sending_to_p_to = false;
                     unsigned count_on_p_to = comm_ds.node_lambda_map.get_proc_entry(u, p_to);
                     if (p_to == node_proc) {
-                        if (count_on_p_to > 0)
+                        if (count_on_p_to > 0) {
                             count_on_p_to--;
+                        }
                     }
-                    if (count_on_p_to > 0)
+                    if (count_on_p_to > 0) {
                         already_sending_to_p_to = true;
+                    }
 
                     if (!already_sending_to_p_to) {
                         const comm_weight_t cost = comm_w_u * instance->sendCosts(u_proc, p_to);
@@ -451,9 +460,9 @@ struct kl_bsp_comm_cost_function {
         }
     }
 
-    comm_weight_t calculate_step_cost_change(unsigned step, const FastDeltaTracker<comm_weight_t> &delta_send,
+    comm_weight_t calculate_step_cost_change(unsigned step,
+                                             const FastDeltaTracker<comm_weight_t> &delta_send,
                                              const FastDeltaTracker<comm_weight_t> &delta_recv) {
-
         comm_weight_t old_max = comm_ds.step_max_comm(step);
         comm_weight_t second_max = comm_ds.step_second_max_comm(step);
         unsigned old_max_count = comm_ds.step_max_comm_count(step);
@@ -469,10 +478,12 @@ struct kl_bsp_comm_cost_function {
             comm_weight_t current_val = comm_ds.step_proc_send(step, proc);
             comm_weight_t new_val = current_val + delta;
 
-            if (new_val > new_global_max)
+            if (new_val > new_global_max) {
                 new_global_max = new_val;
-            if (delta < 0 && current_val == old_max)
+            }
+            if (delta < 0 && current_val == old_max) {
                 reduced_max_instances++;
+            }
         }
 
         // 2. Check modified receives (Iterate sparse dirty list)
@@ -482,10 +493,12 @@ struct kl_bsp_comm_cost_function {
             comm_weight_t current_val = comm_ds.step_proc_receive(step, proc);
             comm_weight_t new_val = current_val + delta;
 
-            if (new_val > new_global_max)
+            if (new_val > new_global_max) {
                 new_global_max = new_val;
-            if (delta < 0 && current_val == old_max)
+            }
+            if (delta < 0 && current_val == old_max) {
                 reduced_max_instances++;
+            }
         }
 
         // 3. Determine result
@@ -498,21 +511,25 @@ struct kl_bsp_comm_cost_function {
         return std::max(new_global_max, second_max) - old_max;
     }
 
-    template<typename thread_data_t>
-    void update_node_comm_affinity(const kl_move &move, thread_data_t &thread_data, const cost_t &penalty,
-                                   const cost_t &reward, std::map<VertexType, kl_gain_update_info> &,
+    template <typename thread_data_t>
+    void update_node_comm_affinity(const kl_move &move,
+                                   thread_data_t &thread_data,
+                                   const cost_t &penalty,
+                                   const cost_t &reward,
+                                   std::map<VertexType, kl_gain_update_info> &,
                                    std::vector<VertexType> &new_nodes) {
-
         const unsigned start_step = thread_data.start_step;
         const unsigned end_step = thread_data.end_step;
 
         for (const auto &target : instance->getComputationalDag().children(move.node)) {
             const unsigned target_step = active_schedule->assigned_superstep(target);
-            if (target_step < start_step || target_step > end_step)
+            if (target_step < start_step || target_step > end_step) {
                 continue;
+            }
 
-            if (thread_data.lock_manager.is_locked(target))
+            if (thread_data.lock_manager.is_locked(target)) {
                 continue;
+            }
 
             if (not thread_data.affinity_table.is_selected(target)) {
                 new_nodes.push_back(target);
@@ -590,11 +607,13 @@ struct kl_bsp_comm_cost_function {
 
         for (const auto &source : instance->getComputationalDag().parents(move.node)) {
             const unsigned source_step = active_schedule->assigned_superstep(source);
-            if (source_step < start_step || source_step > end_step)
+            if (source_step < start_step || source_step > end_step) {
                 continue;
+            }
 
-            if (thread_data.lock_manager.is_locked(source))
+            if (thread_data.lock_manager.is_locked(source)) {
                 continue;
+            }
 
             if (not thread_data.affinity_table.is_selected(source)) {
                 new_nodes.push_back(source);
@@ -666,4 +685,4 @@ struct kl_bsp_comm_cost_function {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp
index caaad9ca..898f2df4 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp
@@ -24,9 +24,8 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1>
 struct kl_hyper_total_comm_cost_function {
-
     using VertexType = vertex_idx_t<Graph_t>;
     using kl_move = kl_move_struct<cost_t, VertexType>;
     using kl_gain_update_info = kl_update_info<VertexType>;
@@ -47,9 +46,13 @@ struct kl_hyper_total_comm_cost_function {
     lambda_vector_container<VertexType> node_lambda_map;
 
     inline cost_t get_comm_multiplier() { return comm_multiplier; }
+
     inline cost_t get_max_comm_weight() { return max_comm_weight; }
+
     inline cost_t get_max_comm_weight_multiplied() { return max_comm_weight * comm_multiplier; }
+
     const std::string name() const { return "toal_comm_cost"; }
+
     inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); }
 
     void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched, CompatibleProcessorRange<Graph_t> &p_range) {
@@ -85,12 +88,15 @@ struct kl_hyper_total_comm_cost_function {
                 const unsigned target_proc = active_schedule->assigned_processor(target);
 
                 if (node_lambda_map.increase_proc_count(vertex, target_proc)) {
-                    comm_costs += v_comm_cost * instance->communicationCosts(vertex_proc, target_proc); // is 0 if target_proc == vertex_proc
+                    comm_costs
+                        += v_comm_cost
+                           * instance->communicationCosts(vertex_proc, target_proc);    // is 0 if target_proc == vertex_proc
                 }
             }
         }
 
-        return work_costs + comm_costs * comm_multiplier + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
+        return work_costs + comm_costs * comm_multiplier
+               + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
     }
 
     cost_t compute_schedule_cost_test() {
@@ -109,15 +115,17 @@ struct kl_hyper_total_comm_cost_function {
             }
         }
 
-        return work_costs + comm_costs * comm_multiplier + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
+        return work_costs + comm_costs * comm_multiplier
+               + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
     }
 
     inline void update_datastructure_after_move(const kl_move &move, const unsigned start_step, const unsigned end_step) {
         if (move.to_proc != move.from_proc) {
             for (const auto &source : instance->getComputationalDag().parents(move.node)) {
                 const unsigned source_step = active_schedule->assigned_superstep(source);
-                if (source_step < start_step || source_step > end_step)
+                if (source_step < start_step || source_step > end_step) {
                     continue;
+                }
                 update_source_after_move(move, source);
             }
         }
@@ -128,19 +136,25 @@ struct kl_hyper_total_comm_cost_function {
         node_lambda_map.increase_proc_count(source, move.to_proc);
     }
 
-    template<typename thread_data_t>
-    void update_node_comm_affinity(const kl_move &move, thread_data_t &thread_data, const cost_t &penalty, const cost_t &reward, std::map<VertexType, kl_gain_update_info> &max_gain_recompute, std::vector<VertexType> &new_nodes) {
-
+    template <typename thread_data_t>
+    void update_node_comm_affinity(const kl_move &move,
+                                   thread_data_t &thread_data,
+                                   const cost_t &penalty,
+                                   const cost_t &reward,
+                                   std::map<VertexType, kl_gain_update_info> &max_gain_recompute,
+                                   std::vector<VertexType> &new_nodes) {
         const unsigned start_step = thread_data.start_step;
         const unsigned end_step = thread_data.end_step;
 
         for (const auto &target : instance->getComputationalDag().children(move.node)) {
             const unsigned target_step = active_schedule->assigned_superstep(target);
-            if (target_step < start_step || target_step > end_step)
+            if (target_step < start_step || target_step > end_step) {
                 continue;
+            }
 
-            if (thread_data.lock_manager.is_locked(target))
+            if (thread_data.lock_manager.is_locked(target)) {
                 continue;
+            }
 
             if (not thread_data.affinity_table.is_selected(target)) {
                 new_nodes.push_back(target);
@@ -226,8 +240,9 @@ struct kl_hyper_total_comm_cost_function {
 
                 const unsigned window_bound = end_idx(target_step, end_step);
                 for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
-                    if (p == target_proc)
+                    if (p == target_proc) {
                         continue;
+                    }
                     if (node_lambda_map.get_proc_entry(move.node, target_proc) == 1) {
                         for (unsigned idx = target_start_idx; idx < window_bound; idx++) {
                             const cost_t x = instance->communicationCosts(move.from_proc, target_proc) * comm_gain;
@@ -248,7 +263,6 @@ struct kl_hyper_total_comm_cost_function {
         }
 
         for (const auto &source : instance->getComputationalDag().parents(move.node)) {
-
             if (move.to_proc != move.from_proc) {
                 const unsigned source_proc = active_schedule->assigned_processor(source);
                 if (node_lambda_map.has_no_proc_entry(source, move.from_proc)) {
@@ -256,11 +270,13 @@ struct kl_hyper_total_comm_cost_function {
 
                     for (const auto &target : instance->getComputationalDag().children(source)) {
                         const unsigned target_step = active_schedule->assigned_superstep(target);
-                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))
+                        if ((target_step < start_step || target_step > end_step) || (target == move.node)
+                            || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) {
                             continue;
+                        }
 
                         if (source_proc != move.from_proc && is_compatible(target, move.from_proc)) {
-                            if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update
+                            if (max_gain_recompute.find(target) != max_gain_recompute.end()) {    // todo more specialized update
                                 max_gain_recompute[target].full_update = true;
                             } else {
                                 max_gain_recompute[target] = kl_gain_update_info(target, true);
@@ -279,12 +295,14 @@ struct kl_hyper_total_comm_cost_function {
 
                     for (const auto &target : instance->getComputationalDag().children(source)) {
                         const unsigned target_step = active_schedule->assigned_superstep(target);
-                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || thread_data.lock_manager.is_locked(target) || (not thread_data.affinity_table.is_selected(target)))
+                        if ((target_step < start_step || target_step > end_step) || (target == move.node)
+                            || thread_data.lock_manager.is_locked(target) || (not thread_data.affinity_table.is_selected(target))) {
                             continue;
+                        }
 
                         const unsigned target_proc = active_schedule->assigned_processor(target);
                         if (target_proc == move.from_proc) {
-                            if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update
+                            if (max_gain_recompute.find(target) != max_gain_recompute.end()) {    // todo more specialized update
                                 max_gain_recompute[target].full_update = true;
                             } else {
                                 max_gain_recompute[target] = kl_gain_update_info(target, true);
@@ -295,14 +313,15 @@ struct kl_hyper_total_comm_cost_function {
                             auto &affinity_table_target = thread_data.affinity_table.at(target);
                             const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) * comm_gain;
                             for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
-                                if (p == target_proc)
+                                if (p == target_proc) {
                                     continue;
+                                }
 
                                 for (unsigned idx = target_start_idx; idx < target_window_bound; idx++) {
                                     affinity_table_target[p][idx] -= comm_aff;
                                 }
                             }
-                            break; // since node_lambda_map[source][move.from_proc] == 1
+                            break;    // since node_lambda_map[source][move.from_proc] == 1
                         }
                     }
                 }
@@ -312,8 +331,10 @@ struct kl_hyper_total_comm_cost_function {
 
                     for (const auto &target : instance->getComputationalDag().children(source)) {
                         const unsigned target_step = active_schedule->assigned_superstep(target);
-                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))
+                        if ((target_step < start_step || target_step > end_step) || (target == move.node)
+                            || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) {
                             continue;
+                        }
 
                         if (source_proc != move.to_proc && is_compatible(target, move.to_proc)) {
                             if (max_gain_recompute.find(target) != max_gain_recompute.end()) {
@@ -333,8 +354,10 @@ struct kl_hyper_total_comm_cost_function {
                 } else if (node_lambda_map.get_proc_entry(source, move.to_proc) == 2) {
                     for (const auto &target : instance->getComputationalDag().children(source)) {
                         const unsigned target_step = active_schedule->assigned_superstep(target);
-                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))
+                        if ((target_step < start_step || target_step > end_step) || (target == move.node)
+                            || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) {
                             continue;
+                        }
 
                         const unsigned target_proc = active_schedule->assigned_processor(target);
                         if (target_proc == move.to_proc) {
@@ -348,10 +371,12 @@ struct kl_hyper_total_comm_cost_function {
                                 const unsigned target_start_idx = start_idx(target_step, start_step);
                                 const unsigned target_window_bound = end_idx(target_step, end_step);
                                 auto &affinity_table_target = thread_data.affinity_table.at(target);
-                                const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) * graph->vertex_comm_weight(source) * comm_multiplier;
+                                const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc)
+                                                        * graph->vertex_comm_weight(source) * comm_multiplier;
                                 for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
-                                    if (p == target_proc)
+                                    if (p == target_proc) {
                                         continue;
+                                    }
 
                                     for (unsigned idx = target_start_idx; idx < target_window_bound; idx++) {
                                         affinity_table_target[p][idx] += comm_aff;
@@ -365,11 +390,13 @@ struct kl_hyper_total_comm_cost_function {
             }
 
             const unsigned source_step = active_schedule->assigned_superstep(source);
-            if (source_step < start_step || source_step > end_step)
+            if (source_step < start_step || source_step > end_step) {
                 continue;
+            }
 
-            if (thread_data.lock_manager.is_locked(source))
+            if (thread_data.lock_manager.is_locked(source)) {
                 continue;
+            }
 
             if (not thread_data.affinity_table.is_selected(source)) {
                 new_nodes.push_back(source);
@@ -449,10 +476,13 @@ struct kl_hyper_total_comm_cost_function {
                     const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
 
                     for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
-                        if (p == source_proc)
+                        if (p == source_proc) {
                             continue;
+                        }
 
-                        const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.from_proc), instance->communicationCosts(source_proc, move.from_proc), comm_gain);
+                        const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.from_proc),
+                                                                  instance->communicationCosts(source_proc, move.from_proc),
+                                                                  comm_gain);
                         for (unsigned idx = source_start_idx; idx < window_bound; idx++) {
                             affinity_table_source[p][idx] -= comm_cost;
                         }
@@ -463,10 +493,13 @@ struct kl_hyper_total_comm_cost_function {
                     const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
 
                     for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
-                        if (p == source_proc)
+                        if (p == source_proc) {
                             continue;
+                        }
 
-                        const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.to_proc), instance->communicationCosts(source_proc, move.to_proc), comm_gain);
+                        const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.to_proc),
+                                                                  instance->communicationCosts(source_proc, move.to_proc),
+                                                                  comm_gain);
                         for (unsigned idx = source_start_idx; idx < window_bound; idx++) {
                             affinity_table_source[p][idx] += comm_cost;
                         }
@@ -476,12 +509,28 @@ struct kl_hyper_total_comm_cost_function {
         }
     }
 
-    inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { return node_step < window_size + start_step ? window_size - (node_step - start_step) : 0; }
-    inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step); }
-    inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost, const v_commw_t<Graph_t> &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; }
+    inline unsigned start_idx(const unsigned node_step, const unsigned start_step) {
+        return node_step < window_size + start_step ? window_size - (node_step - start_step) : 0;
+    }
+
+    inline unsigned end_idx(const unsigned node_step, const unsigned end_step) {
+        return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step);
+    }
 
-    template<typename affinity_table_t>
-    void compute_comm_affinity(VertexType node, affinity_table_t &affinity_table_node, const cost_t &penalty, const cost_t &reward, const unsigned start_step, const unsigned end_step) {
+    inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost,
+                                   const v_commw_t<Graph_t> &node_target_comm_cost,
+                                   const cost_t &comm_gain) {
+        return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain
+                                                          : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0;
+    }
+
+    template <typename affinity_table_t>
+    void compute_comm_affinity(VertexType node,
+                               affinity_table_t &affinity_table_node,
+                               const cost_t &penalty,
+                               const cost_t &reward,
+                               const unsigned start_step,
+                               const unsigned end_step) {
         const unsigned node_step = active_schedule->assigned_superstep(node);
         const unsigned node_proc = active_schedule->assigned_processor(node);
         const unsigned window_bound = end_idx(node_step, end_step);
@@ -520,17 +569,19 @@ struct kl_hyper_total_comm_cost_function {
                     }
                 }
             }
-        } // traget
+        }    // traget
 
         const cost_t comm_gain = graph->vertex_comm_weight(node) * comm_multiplier;
 
         for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
-            if (p == node_proc)
+            if (p == node_proc) {
                 continue;
+            }
 
             for (const auto lambda_pair : node_lambda_map.iterate_proc_entries(node)) {
                 const auto &lambda_proc = lambda_pair.first;
-                const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, lambda_proc), instance->communicationCosts(node_proc, lambda_proc), comm_gain);
+                const cost_t comm_cost = change_comm_cost(
+                    instance->communicationCosts(p, lambda_proc), instance->communicationCosts(node_proc, lambda_proc), comm_gain);
                 for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
                     affinity_table_node[p][idx] += comm_cost;
                 }
@@ -575,8 +626,9 @@ struct kl_hyper_total_comm_cost_function {
 
             const cost_t source_comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
             for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
-                if (p == node_proc)
+                if (p == node_proc) {
                     continue;
+                }
 
                 if (source_proc != node_proc && node_lambda_map.get_proc_entry(source, node_proc) == 1) {
                     for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
@@ -590,8 +642,8 @@ struct kl_hyper_total_comm_cost_function {
                     }
                 }
             }
-        } // source
+        }    // source
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp
index 5f471077..a28e4640 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp
@@ -22,9 +22,9 @@ limitations under the License.
 #include "../kl_improver.hpp"
 
 namespace osp {
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1, bool use_node_communication_costs_arg = true>
-struct kl_total_comm_cost_function {
 
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1, bool use_node_communication_costs_arg = true>
+struct kl_total_comm_cost_function {
     using VertexType = vertex_idx_t<Graph_t>;
     using kl_move = kl_move_struct<cost_t, VertexType>;
     using kl_gain_update_info = kl_update_info<VertexType>;
@@ -45,7 +45,9 @@ struct kl_total_comm_cost_function {
     cost_t max_comm_weight = 0;
 
     inline cost_t get_comm_multiplier() { return comm_multiplier; }
+
     inline cost_t get_max_comm_weight() { return max_comm_weight; }
+
     inline cost_t get_max_comm_weight_multiplied() { return max_comm_weight * comm_multiplier; }
 
     const std::string name() const { return "toal_comm_cost"; }
@@ -61,17 +63,16 @@ struct kl_total_comm_cost_function {
     }
 
     struct empty_struct {};
+
     using pre_move_comm_data_t = empty_struct;
+
     inline empty_struct get_pre_move_comm_data(const kl_move &) { return empty_struct(); }
 
-    cost_t compute_schedule_cost_test() {
-        return compute_schedule_cost();
-    }
+    cost_t compute_schedule_cost_test() { return compute_schedule_cost(); }
 
     void update_datastructure_after_move(const kl_move &, const unsigned, const unsigned) {}
 
     cost_t compute_schedule_cost() {
-
         cost_t work_costs = 0;
         for (unsigned step = 0; step < active_schedule->num_steps(); step++) {
             work_costs += active_schedule->get_step_max_work(step);
@@ -79,7 +80,6 @@ struct kl_total_comm_cost_function {
 
         cost_t comm_costs = 0;
         for (const auto &edge : edges(*graph)) {
-
             const auto &source_v = source(edge, *graph);
             const auto &target_v = target(edge, *graph);
 
@@ -87,7 +87,6 @@ struct kl_total_comm_cost_function {
             const unsigned &target_proc = active_schedule->assigned_processor(target_v);
 
             if (source_proc != target_proc) {
-
                 if constexpr (use_node_communication_costs) {
                     const cost_t source_comm_cost = graph->vertex_comm_weight(source_v);
                     max_comm_weight = std::max(max_comm_weight, source_comm_cost);
@@ -100,23 +99,29 @@ struct kl_total_comm_cost_function {
             }
         }
 
-        return work_costs + comm_costs * comm_multiplier + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
+        return work_costs + comm_costs * comm_multiplier
+               + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
     }
 
-    template<typename thread_data_t>
-    void update_node_comm_affinity(const kl_move &move, thread_data_t &thread_data, const cost_t &penalty, const cost_t &reward, std::map<VertexType, kl_gain_update_info> &max_gain_recompute, std::vector<VertexType> &new_nodes) {
-
+    template <typename thread_data_t>
+    void update_node_comm_affinity(const kl_move &move,
+                                   thread_data_t &thread_data,
+                                   const cost_t &penalty,
+                                   const cost_t &reward,
+                                   std::map<VertexType, kl_gain_update_info> &max_gain_recompute,
+                                   std::vector<VertexType> &new_nodes) {
         const unsigned &start_step = thread_data.start_step;
         const unsigned &end_step = thread_data.end_step;
 
         for (const auto &target : instance->getComputationalDag().children(move.node)) {
-
             const unsigned target_step = active_schedule->assigned_superstep(target);
-            if (target_step < start_step || target_step > end_step)
+            if (target_step < start_step || target_step > end_step) {
                 continue;
+            }
 
-            if (thread_data.lock_manager.is_locked(target))
+            if (thread_data.lock_manager.is_locked(target)) {
                 continue;
+            }
 
             if (not thread_data.affinity_table.is_selected(target)) {
                 new_nodes.push_back(target);
@@ -134,7 +139,6 @@ struct kl_total_comm_cost_function {
             auto &affinity_table_target = thread_data.affinity_table.at(target);
 
             if (move.from_step < target_step + (move.from_proc == target_proc)) {
-
                 const unsigned diff = target_step - move.from_step;
                 const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0;
                 unsigned idx = target_start_idx;
@@ -149,7 +153,6 @@ struct kl_total_comm_cost_function {
                 }
 
             } else {
-
                 const unsigned diff = move.from_step - target_step;
                 const unsigned window_bound = end_idx(target_step, end_step);
                 unsigned idx = std::min(window_size + diff, window_bound);
@@ -209,8 +212,10 @@ struct kl_total_comm_cost_function {
                 const unsigned window_bound = end_idx(target_step, end_step);
                 for (; idx < window_bound; idx++) {
                     for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
-                        const auto x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_target_comm_cost, comm_gain);
-                        const auto y = change_comm_cost(instance->communicationCosts(p, move.from_proc), from_proc_target_comm_cost, comm_gain);
+                        const auto x = change_comm_cost(
+                            instance->communicationCosts(p, move.to_proc), to_proc_target_comm_cost, comm_gain);
+                        const auto y = change_comm_cost(
+                            instance->communicationCosts(p, move.from_proc), from_proc_target_comm_cost, comm_gain);
                         affinity_table_target[p][idx] += x - y;
                     }
                 }
@@ -218,13 +223,14 @@ struct kl_total_comm_cost_function {
         }
 
         for (const auto &source : instance->getComputationalDag().parents(move.node)) {
-
             const unsigned source_step = active_schedule->assigned_superstep(source);
-            if (source_step < start_step || source_step > end_step)
+            if (source_step < start_step || source_step > end_step) {
                 continue;
+            }
 
-            if (thread_data.lock_manager.is_locked(source))
+            if (thread_data.lock_manager.is_locked(source)) {
                 continue;
+            }
 
             if (not thread_data.affinity_table.is_selected(source)) {
                 new_nodes.push_back(source);
@@ -242,7 +248,6 @@ struct kl_total_comm_cost_function {
             auto &affinity_table_source = thread_data.affinity_table.at(source);
 
             if (move.from_step < source_step + (move.from_proc != source_proc)) {
-
                 const unsigned diff = source_step - move.from_step;
                 const unsigned bound = window_size > diff ? window_size - diff : 0;
                 unsigned idx = start_idx(source_step, start_step);
@@ -257,7 +262,6 @@ struct kl_total_comm_cost_function {
                 }
 
             } else {
-
                 const unsigned diff = move.from_step - source_step;
                 unsigned idx = window_size + diff;
 
@@ -309,8 +313,10 @@ struct kl_total_comm_cost_function {
                 unsigned idx = start_idx(source_step, start_step);
                 for (; idx < window_bound; idx++) {
                     for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
-                        const cost_t x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_source_comm_cost, comm_gain);
-                        const cost_t y = change_comm_cost(instance->communicationCosts(p, move.from_proc), from_proc_source_comm_cost, comm_gain);
+                        const cost_t x = change_comm_cost(
+                            instance->communicationCosts(p, move.to_proc), to_proc_source_comm_cost, comm_gain);
+                        const cost_t y = change_comm_cost(
+                            instance->communicationCosts(p, move.from_proc), from_proc_source_comm_cost, comm_gain);
                         affinity_table_source[p][idx] += x - y;
                     }
                 }
@@ -318,13 +324,28 @@ struct kl_total_comm_cost_function {
         }
     }
 
-    inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0; }
-    inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step); }
+    inline unsigned start_idx(const unsigned node_step, const unsigned start_step) {
+        return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0;
+    }
+
+    inline unsigned end_idx(const unsigned node_step, const unsigned end_step) {
+        return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step);
+    }
 
-    inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost, const v_commw_t<Graph_t> &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; }
+    inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost,
+                                   const v_commw_t<Graph_t> &node_target_comm_cost,
+                                   const cost_t &comm_gain) {
+        return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain
+                                                          : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0;
+    }
 
-    template<typename affinity_table_t>
-    void compute_comm_affinity(VertexType node, affinity_table_t &affinity_table_node, const cost_t &penalty, const cost_t &reward, const unsigned start_step, const unsigned end_step) {
+    template <typename affinity_table_t>
+    void compute_comm_affinity(VertexType node,
+                               affinity_table_t &affinity_table_node,
+                               const cost_t &penalty,
+                               const cost_t &reward,
+                               const unsigned start_step,
+                               const unsigned end_step) {
         const unsigned node_step = active_schedule->assigned_superstep(node);
         const unsigned node_proc = active_schedule->assigned_processor(node);
         const unsigned window_bound = end_idx(node_step, end_step);
@@ -368,13 +389,14 @@ struct kl_total_comm_cost_function {
             const auto node_target_comm_cost = instance->communicationCosts(node_proc, target_proc);
 
             for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
-                const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, target_proc), node_target_comm_cost, comm_gain);
+                const cost_t comm_cost
+                    = change_comm_cost(instance->communicationCosts(p, target_proc), node_target_comm_cost, comm_gain);
                 for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
                     affinity_table_node[p][idx] += comm_cost;
                 }
             }
 
-        } // traget
+        }    // traget
 
         for (const auto &source : instance->getComputationalDag().parents(node)) {
             const unsigned source_step = active_schedule->assigned_superstep(source);
@@ -416,13 +438,14 @@ struct kl_total_comm_cost_function {
             const auto source_node_comm_cost = instance->communicationCosts(source_proc, node_proc);
 
             for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
-                const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, source_proc), source_node_comm_cost, comm_gain);
+                const cost_t comm_cost
+                    = change_comm_cost(instance->communicationCosts(p, source_proc), source_node_comm_cost, comm_gain);
                 for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
                     affinity_table_node[p][idx] += comm_cost;
                 }
             }
-        } // source
+        }    // source
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp
index 0eccc815..a1494bff 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp
@@ -33,9 +33,8 @@ namespace osp {
  * For each node, the map stores the count of children assigned to each processor, which is
  * important for computing communication costs in BSP scheduling.
  */
-template<typename vertex_idx_t>
+template <typename vertex_idx_t>
 struct lambda_map_container {
-
     /// Vector of maps: for each node, maps processor ID to assignment count
     std::vector<std::unordered_map<unsigned, unsigned>> node_lambda_map;
 
@@ -102,9 +101,7 @@ struct lambda_map_container {
      * @param node Node index
      * @return The count of different processors the node is sending to
      */
-    inline unsigned get_proc_count(const vertex_idx_t node) const {
-        return static_cast<unsigned>(node_lambda_map[node].size());
-    }
+    inline unsigned get_proc_count(const vertex_idx_t node) const { return static_cast<unsigned>(node_lambda_map[node].size()); }
 
     /**
      * @brief Increase the processor count for a given node.
@@ -158,9 +155,8 @@ struct lambda_map_container {
  * For each node, the vector stores the count of children assigned to each processor, which is
  * important for computing communication costs in BSP scheduling.
  */
-template<typename vertex_idx_t>
+template <typename vertex_idx_t>
 struct lambda_vector_container {
-
     /**
      * @brief Range adapter for iterating over non-zero processor entries.
      *
@@ -179,7 +175,6 @@ struct lambda_vector_container {
          * for all processors with non-zero assignment counts.
          */
         class lambda_vector_iterator {
-
             using iterator_category = std::input_iterator_tag;
             using value_type = std::pair<unsigned, unsigned>;
             using difference_type = std::ptrdiff_t;
@@ -296,9 +291,7 @@ struct lambda_vector_container {
      * @param proc Processor ID
      * @return true if the processor has no assignments to the node
      */
-    inline bool has_no_proc_entry(const vertex_idx_t node, const unsigned proc) const {
-        return node_lambda_vec[node][proc] == 0;
-    }
+    inline bool has_no_proc_entry(const vertex_idx_t node, const unsigned proc) const { return node_lambda_vec[node][proc] == 0; }
 
     /**
      * @brief Get a reference to the processor count for a given node.
@@ -369,4 +362,4 @@ struct lambda_vector_container {
     inline auto iterate_proc_entries(const vertex_idx_t node) { return lambda_vector_range(node_lambda_vec[node]); }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp
index 236e11cc..8c76efe4 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp
@@ -18,20 +18,20 @@ limitations under the License.
 
 #pragma once
 
-#include "comm_cost_policies.hpp"
-#include "generic_lambda_container.hpp"
-#include "lambda_container.hpp"
-#include "osp/bsp/model/BspInstance.hpp"
 #include <algorithm>
 #include <type_traits>
 #include <unordered_map>
 #include <vector>
 
+#include "comm_cost_policies.hpp"
+#include "generic_lambda_container.hpp"
+#include "lambda_container.hpp"
+#include "osp/bsp/model/BspInstance.hpp"
+
 namespace osp {
 
-template<typename comm_weight_t>
+template <typename comm_weight_t>
 struct pre_move_comm_data {
-
     struct step_info {
         comm_weight_t max_comm;
         comm_weight_t second_max_comm;
@@ -56,9 +56,8 @@ struct pre_move_comm_data {
     }
 };
 
-template<typename Graph_t, typename cost_t, typename kl_active_schedule_t, typename CommPolicy = EagerCommCostPolicy>
+template <typename Graph_t, typename cost_t, typename kl_active_schedule_t, typename CommPolicy = EagerCommCostPolicy>
 struct max_comm_datastructure {
-
     using comm_weight_t = v_commw_t<Graph_t>;
     using VertexType = vertex_idx_t<Graph_t>;
     using kl_move = kl_move_struct<cost_t, VertexType>;
@@ -89,14 +88,17 @@ struct max_comm_datastructure {
     std::vector<bool> step_is_affected;
 
     inline comm_weight_t step_proc_send(unsigned step, unsigned proc) const { return step_proc_send_[step][proc]; }
+
     inline comm_weight_t &step_proc_send(unsigned step, unsigned proc) { return step_proc_send_[step][proc]; }
-    inline comm_weight_t step_proc_receive(unsigned step, unsigned proc) const {
-        return step_proc_receive_[step][proc];
-    }
+
+    inline comm_weight_t step_proc_receive(unsigned step, unsigned proc) const { return step_proc_receive_[step][proc]; }
+
     inline comm_weight_t &step_proc_receive(unsigned step, unsigned proc) { return step_proc_receive_[step][proc]; }
 
     inline comm_weight_t step_max_comm(unsigned step) const { return step_max_comm_cache[step]; }
+
     inline comm_weight_t step_second_max_comm(unsigned step) const { return step_second_max_comm_cache[step]; }
+
     inline unsigned step_max_comm_count(unsigned step) const { return step_max_comm_count_cache[step]; }
 
     inline void initialize(kl_active_schedule_t &kl_sched) {
@@ -170,10 +172,12 @@ struct max_comm_datastructure {
         step_max_comm_cache[step] = global_max;
 
         unsigned global_count = 0;
-        if (max_send == global_max)
+        if (max_send == global_max) {
             global_count += max_send_count;
-        if (max_receive == global_max)
+        }
+        if (max_receive == global_max) {
             global_count += max_receive_count;
+        }
         step_max_comm_count_cache[step] = global_count;
 
         comm_weight_t cand_send = (max_send == global_max) ? second_max_send : max_send;
@@ -209,8 +213,9 @@ struct max_comm_datastructure {
 
         // Prepare Scratchpad (Avoids Allocations) ---
         for (unsigned step : affected_steps_list) {
-            if (step < step_is_affected.size())
+            if (step < step_is_affected.size()) {
                 step_is_affected[step] = false;
+            }
         }
         affected_steps_list.clear();
 
@@ -280,8 +285,9 @@ struct max_comm_datastructure {
         for (const auto &parent : graph.parents(node)) {
             const unsigned parent_step = active_schedule->assigned_superstep(parent);
             // Fast boundary check
-            if (parent_step >= step_proc_send_.size())
+            if (parent_step >= step_proc_send_.size()) {
                 continue;
+            }
 
             const unsigned parent_proc = active_schedule->assigned_processor(parent);
             const comm_weight_t comm_w_parent = graph.vertex_comm_weight(parent);
@@ -294,8 +300,7 @@ struct max_comm_datastructure {
                 if (from_proc != parent_proc) {
                     const comm_weight_t cost = comm_w_parent * instance->sendCosts(parent_proc, from_proc);
                     if (cost > 0) {
-                        CommPolicy::unattribute_communication(*this, cost, parent_step, parent_proc, from_proc,
-                                                                from_step, val);
+                        CommPolicy::unattribute_communication(*this, cost, parent_step, parent_proc, from_proc, from_step, val);
                     }
                 }
             }
@@ -308,15 +313,13 @@ struct max_comm_datastructure {
                 if (to_proc != parent_proc) {
                     const comm_weight_t cost = comm_w_parent * instance->sendCosts(parent_proc, to_proc);
                     if (cost > 0) {
-                        CommPolicy::attribute_communication(*this, cost, parent_step, parent_proc, to_proc, to_step,
-                                                            val_to);
+                        CommPolicy::attribute_communication(*this, cost, parent_step, parent_proc, to_proc, to_step, val_to);
                     }
                 }
             }
 
             mark_step(parent_step);
         }
-        
 
         // Re-arrange Affected Steps
         for (unsigned step : affected_steps_list) {
@@ -358,14 +361,12 @@ struct max_comm_datastructure {
                 const unsigned v_proc = vec_sched.assignedProcessor(v);
                 const unsigned v_step = vec_sched.assignedSuperstep(v);
 
-                const comm_weight_t comm_w_send_cost =
-                    (u_proc != v_proc) ? comm_w * instance->sendCosts(u_proc, v_proc) : 0;
+                const comm_weight_t comm_w_send_cost = (u_proc != v_proc) ? comm_w * instance->sendCosts(u_proc, v_proc) : 0;
 
                 auto &val = node_lambda_map.get_proc_entry(u, v_proc);
                 if (CommPolicy::add_child(val, v_step)) {
                     if (u_proc != v_proc && comm_w_send_cost > 0) {
-                        CommPolicy::attribute_communication(*this, comm_w_send_cost, u_step, u_proc, v_proc, v_step,
-                                                            val);
+                        CommPolicy::attribute_communication(*this, comm_w_send_cost, u_step, u_proc, v_proc, v_step, val);
                     }
                 }
             }
@@ -380,4 +381,4 @@ struct max_comm_datastructure {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
index 862eeacc..2e6f3af0 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
@@ -28,9 +28,8 @@ limitations under the License.
 
 namespace osp {
 
-template<typename cost_t, typename vertex_idx_t>
+template <typename cost_t, typename vertex_idx_t>
 struct kl_move_struct {
-
     vertex_idx_t node;
     cost_t gain;
 
@@ -41,10 +40,9 @@ struct kl_move_struct {
     unsigned to_step;
 
     kl_move_struct() : node(0), gain(0), from_proc(0), from_step(0), to_proc(0), to_step(0) {}
-    kl_move_struct(vertex_idx_t _node, cost_t _gain, unsigned _from_proc, unsigned _from_step,
-                   unsigned _to_proc, unsigned _to_step)
-        : node(_node), gain(_gain), from_proc(_from_proc), from_step(_from_step),
-          to_proc(_to_proc), to_step(_to_step) {}
+
+    kl_move_struct(vertex_idx_t _node, cost_t _gain, unsigned _from_proc, unsigned _from_step, unsigned _to_proc, unsigned _to_step)
+        : node(_node), gain(_gain), from_proc(_from_proc), from_step(_from_step), to_proc(_to_proc), to_step(_to_step) {}
 
     bool operator<(kl_move_struct<cost_t, vertex_idx_t> const &rhs) const {
         return (gain < rhs.gain) or (gain == rhs.gain and node > rhs.node);
@@ -59,9 +57,8 @@ struct kl_move_struct {
     }
 };
 
-template<typename work_weight_t>
+template <typename work_weight_t>
 struct pre_move_work_data {
-
     work_weight_t from_step_max_work;
     work_weight_t from_step_second_max_work;
     unsigned from_step_max_work_processor_count;
@@ -71,18 +68,23 @@ struct pre_move_work_data {
     unsigned to_step_max_work_processor_count;
 
     pre_move_work_data() {}
-    pre_move_work_data(work_weight_t from_step_max_work_, work_weight_t from_step_second_max_work_, unsigned from_step_max_work_processor_count_,
-                       work_weight_t to_step_max_work_, work_weight_t to_step_second_max_work_,
+
+    pre_move_work_data(work_weight_t from_step_max_work_,
+                       work_weight_t from_step_second_max_work_,
+                       unsigned from_step_max_work_processor_count_,
+                       work_weight_t to_step_max_work_,
+                       work_weight_t to_step_second_max_work_,
                        unsigned to_step_max_work_processor_count_)
-        : from_step_max_work(from_step_max_work_), from_step_second_max_work(from_step_second_max_work_),
+        : from_step_max_work(from_step_max_work_),
+          from_step_second_max_work(from_step_second_max_work_),
           from_step_max_work_processor_count(from_step_max_work_processor_count_),
-          to_step_max_work(to_step_max_work_), to_step_second_max_work(to_step_second_max_work_),
+          to_step_max_work(to_step_max_work_),
+          to_step_second_max_work(to_step_second_max_work_),
           to_step_max_work_processor_count(to_step_max_work_processor_count_) {}
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct kl_active_schedule_work_datastructures {
-
     using work_weight_t = v_workw_t<Graph_t>;
 
     const BspInstance<Graph_t> *instance;
@@ -93,11 +95,10 @@ struct kl_active_schedule_work_datastructures {
         unsigned proc;
 
         weight_proc() : work(0), proc(0) {}
+
         weight_proc(work_weight_t _work, unsigned _proc) : work(_work), proc(_proc) {}
 
-        bool operator<(weight_proc const &rhs) const {
-            return (work > rhs.work) or (work == rhs.work and proc < rhs.proc);
-        }
+        bool operator<(weight_proc const &rhs) const { return (work > rhs.work) or (work == rhs.work and proc < rhs.proc); }
     };
 
     std::vector<std::vector<weight_proc>> step_processor_work_;
@@ -107,14 +108,27 @@ struct kl_active_schedule_work_datastructures {
     work_weight_t total_work_weight;
 
     inline work_weight_t step_max_work(unsigned step) const { return step_processor_work_[step][0].work; }
-    inline work_weight_t step_second_max_work(unsigned step) const { return step_processor_work_[step][step_max_work_processor_count[step]].work; }
-    inline work_weight_t step_proc_work(unsigned step, unsigned proc) const { return step_processor_work_[step][step_processor_position[step][proc]].work; }
-    inline work_weight_t &step_proc_work(unsigned step, unsigned proc) { return step_processor_work_[step][step_processor_position[step][proc]].work; }
 
-    template<typename cost_t, typename vertex_idx_t>
+    inline work_weight_t step_second_max_work(unsigned step) const {
+        return step_processor_work_[step][step_max_work_processor_count[step]].work;
+    }
+
+    inline work_weight_t step_proc_work(unsigned step, unsigned proc) const {
+        return step_processor_work_[step][step_processor_position[step][proc]].work;
+    }
+
+    inline work_weight_t &step_proc_work(unsigned step, unsigned proc) {
+        return step_processor_work_[step][step_processor_position[step][proc]].work;
+    }
+
+    template <typename cost_t, typename vertex_idx_t>
     inline pre_move_work_data<work_weight_t> get_pre_move_work_data(kl_move_struct<cost_t, vertex_idx_t> move) {
-        return pre_move_work_data<work_weight_t>(step_max_work(move.from_step), step_second_max_work(move.from_step), step_max_work_processor_count[move.from_step],
-                                                 step_max_work(move.to_step), step_second_max_work(move.to_step), step_max_work_processor_count[move.to_step]);
+        return pre_move_work_data<work_weight_t>(step_max_work(move.from_step),
+                                                 step_second_max_work(move.from_step),
+                                                 step_max_work_processor_count[move.from_step],
+                                                 step_max_work(move.to_step),
+                                                 step_second_max_work(move.to_step),
+                                                 step_max_work_processor_count[move.to_step]);
     }
 
     inline void initialize(const SetSchedule<Graph_t> &sched, const BspInstance<Graph_t> &inst, unsigned num_steps) {
@@ -122,8 +136,10 @@ struct kl_active_schedule_work_datastructures {
         set_schedule = &sched;
         max_work_weight = 0;
         total_work_weight = 0;
-        step_processor_work_ = std::vector<std::vector<weight_proc>>(num_steps, std::vector<weight_proc>(instance->numberOfProcessors()));
-        step_processor_position = std::vector<std::vector<unsigned>>(num_steps, std::vector<unsigned>(instance->numberOfProcessors(), 0));
+        step_processor_work_
+            = std::vector<std::vector<weight_proc>>(num_steps, std::vector<weight_proc>(instance->numberOfProcessors()));
+        step_processor_position
+            = std::vector<std::vector<unsigned>>(num_steps, std::vector<unsigned>(instance->numberOfProcessors(), 0));
         step_max_work_processor_count = std::vector<unsigned>(num_steps, 0);
     }
 
@@ -141,16 +157,17 @@ struct kl_active_schedule_work_datastructures {
         for (const auto &wp : step_processor_work_[step]) {
             step_processor_position[step][wp.proc] = pos++;
 
-            if (wp.work == max_work_to && pos < instance->numberOfProcessors())
+            if (wp.work == max_work_to && pos < instance->numberOfProcessors()) {
                 step_max_work_processor_count[step] = pos;
+            }
         }
     }
 
-    template<typename cost_t, typename vertex_idx_t>
+    template <typename cost_t, typename vertex_idx_t>
     void apply_move(kl_move_struct<cost_t, vertex_idx_t> move, work_weight_t work_weight) {
-
-        if (work_weight == 0)
+        if (work_weight == 0) {
             return;
+        }
 
         if (move.to_step != move.from_step) {
             step_proc_work(move.to_step, move.to_proc) += work_weight;
@@ -171,8 +188,9 @@ struct kl_active_schedule_work_datastructures {
             // unsigned to_proc_pos = step_processor_position[move.to_step][move.to_proc];
 
             // while (to_proc_pos > 0 && step_processor_work_[move.to_step][to_proc_pos - 1].work < new_weight_to) {
-            //     std::swap(step_processor_work_[move.to_step][to_proc_pos], step_processor_work_[move.to_step][to_proc_pos - 1]);
-            //     std::swap(step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos].proc], step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos - 1].proc]);
+            //     std::swap(step_processor_work_[move.to_step][to_proc_pos], step_processor_work_[move.to_step][to_proc_pos -
+            //     1]); std::swap(step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos].proc],
+            //     step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos - 1].proc]);
             //     to_proc_pos--;
             // }
 
@@ -182,9 +200,12 @@ struct kl_active_schedule_work_datastructures {
 
             // unsigned from_proc_pos = step_processor_position[move.from_step][move.from_proc];
 
-            // while (from_proc_pos < instance->numberOfProcessors() - 1 && step_processor_work_[move.from_step][from_proc_pos + 1].work > new_weight_from) {
-            //     std::swap(step_processor_work_[move.from_step][from_proc_pos], step_processor_work_[move.from_step][from_proc_pos + 1]);
-            //     std::swap(step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos].proc], step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos + 1].proc]);
+            // while (from_proc_pos < instance->numberOfProcessors() - 1 && step_processor_work_[move.from_step][from_proc_pos +
+            // 1].work > new_weight_from) {
+            //     std::swap(step_processor_work_[move.from_step][from_proc_pos],
+            //     step_processor_work_[move.from_step][from_proc_pos + 1]);
+            //     std::swap(step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos].proc],
+            //     step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos + 1].proc]);
             //     from_proc_pos++;
             // }
 
@@ -209,7 +230,6 @@ struct kl_active_schedule_work_datastructures {
     }
 
     void override_next_superstep(unsigned step) {
-
         const unsigned next_step = step + 1;
         for (unsigned i = 0; i < instance->numberOfProcessors(); i++) {
             step_processor_work_[next_step][i] = step_processor_work_[step][i];
@@ -245,7 +265,8 @@ struct kl_active_schedule_work_datastructures {
                 if (step_processor_work_[step][proc].work > max_work) {
                     max_work = step_processor_work_[step][proc].work;
                     step_max_work_processor_count[step] = 1;
-                } else if (step_processor_work_[step][proc].work == max_work && step_max_work_processor_count[step] < (instance->numberOfProcessors() - 1)) {
+                } else if (step_processor_work_[step][proc].work == max_work
+                           && step_max_work_processor_count[step] < (instance->numberOfProcessors() - 1)) {
                     step_max_work_processor_count[step]++;
                 }
             }
@@ -259,9 +280,8 @@ struct kl_active_schedule_work_datastructures {
     }
 };
 
-template<typename Graph_t, typename cost_t>
+template <typename Graph_t, typename cost_t>
 struct thread_local_active_schedule_data {
-
     using VertexType = vertex_idx_t<Graph_t>;
     using EdgeType = edge_desc_t<Graph_t>;
 
@@ -297,9 +317,8 @@ struct thread_local_active_schedule_data {
     }
 };
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 class kl_active_schedule {
-
   private:
     using VertexType = vertex_idx_t<Graph_t>;
     using EdgeType = edge_desc_t<Graph_t>;
@@ -318,21 +337,45 @@ class kl_active_schedule {
     virtual ~kl_active_schedule() = default;
 
     inline const BspInstance<Graph_t> &getInstance() const { return *instance; }
+
     inline const VectorSchedule<Graph_t> &getVectorSchedule() const { return vector_schedule; }
+
     inline VectorSchedule<Graph_t> &getVectorSchedule() { return vector_schedule; }
+
     inline const SetSchedule<Graph_t> &getSetSchedule() const { return set_schedule; }
+
     inline cost_t get_cost() { return cost; }
+
     inline bool is_feasible() { return feasible; }
+
     inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); }
+
     inline unsigned assigned_processor(VertexType node) const { return vector_schedule.assignedProcessor(node); }
+
     inline unsigned assigned_superstep(VertexType node) const { return vector_schedule.assignedSuperstep(node); }
+
     inline v_workw_t<Graph_t> get_step_max_work(unsigned step) const { return work_datastructures.step_max_work(step); }
-    inline v_workw_t<Graph_t> get_step_second_max_work(unsigned step) const { return work_datastructures.step_second_max_work(step); }
-    inline std::vector<unsigned> &get_step_max_work_processor_count() { return work_datastructures.step_max_work_processor_count; }
-    inline v_workw_t<Graph_t> get_step_processor_work(unsigned step, unsigned proc) const { return work_datastructures.step_proc_work(step, proc); }
-    inline pre_move_work_data<v_workw_t<Graph_t>> get_pre_move_work_data(kl_move move) { return work_datastructures.get_pre_move_work_data(move); }
+
+    inline v_workw_t<Graph_t> get_step_second_max_work(unsigned step) const {
+        return work_datastructures.step_second_max_work(step);
+    }
+
+    inline std::vector<unsigned> &get_step_max_work_processor_count() {
+        return work_datastructures.step_max_work_processor_count;
+    }
+
+    inline v_workw_t<Graph_t> get_step_processor_work(unsigned step, unsigned proc) const {
+        return work_datastructures.step_proc_work(step, proc);
+    }
+
+    inline pre_move_work_data<v_workw_t<Graph_t>> get_pre_move_work_data(kl_move move) {
+        return work_datastructures.get_pre_move_work_data(move);
+    }
+
     inline v_workw_t<Graph_t> get_max_work_weight() { return work_datastructures.max_work_weight; }
+
     inline v_workw_t<Graph_t> get_total_work_weight() { return work_datastructures.total_work_weight; }
+
     inline void set_cost(cost_t cost_) { cost = cost_; }
 
     constexpr static bool use_memory_constraint = is_local_search_memory_constraint_v<MemoryConstraint_t>;
@@ -365,8 +408,13 @@ class kl_active_schedule {
         }
     }
 
-    template<typename comm_datastructures_t>
-    void revert_to_best_schedule(unsigned start_move, unsigned insert_step, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned &end_step) {
+    template <typename comm_datastructures_t>
+    void revert_to_best_schedule(unsigned start_move,
+                                 unsigned insert_step,
+                                 comm_datastructures_t &comm_datastructures,
+                                 thread_data_t &thread_data,
+                                 unsigned start_step,
+                                 unsigned &end_step) {
         const unsigned bound = std::max(start_move, thread_data.best_schedule_idx);
         revert_moves(bound, comm_datastructures, thread_data, start_step, end_step);
 
@@ -377,8 +425,10 @@ class kl_active_schedule {
         revert_moves(thread_data.best_schedule_idx, comm_datastructures, thread_data, start_step, end_step);
 
 #ifdef KL_DEBUG
-        if (not thread_data.feasible)
-            std::cout << "Reverted to best schedule with cost: " << thread_data.best_cost << " and " << vector_schedule.number_of_supersteps << " supersteps" << std::endl;
+        if (not thread_data.feasible) {
+            std::cout << "Reverted to best schedule with cost: " << thread_data.best_cost << " and "
+                      << vector_schedule.number_of_supersteps << " supersteps" << std::endl;
+        }
 #endif
 
         thread_data.applied_moves.clear();
@@ -388,8 +438,14 @@ class kl_active_schedule {
         thread_data.cost = thread_data.best_cost;
     }
 
-    template<typename comm_datastructures_t>
-    void revert_schedule_to_bound(const size_t bound, const cost_t new_cost, const bool is_feasible, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) {
+    template <typename comm_datastructures_t>
+    void revert_schedule_to_bound(const size_t bound,
+                                  const cost_t new_cost,
+                                  const bool is_feasible,
+                                  comm_datastructures_t &comm_datastructures,
+                                  thread_data_t &thread_data,
+                                  unsigned start_step,
+                                  unsigned end_step) {
         revert_moves(bound, comm_datastructures, thread_data, start_step, end_step);
 
         thread_data.current_violations.clear();
@@ -409,8 +465,12 @@ class kl_active_schedule {
     void swap_steps(const unsigned step1, const unsigned step2);
 
   private:
-    template<typename comm_datastructures_t>
-    void revert_moves(const size_t bound, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) {
+    template <typename comm_datastructures_t>
+    void revert_moves(const size_t bound,
+                      comm_datastructures_t &comm_datastructures,
+                      thread_data_t &thread_data,
+                      unsigned start_step,
+                      unsigned end_step) {
         while (thread_data.applied_moves.size() > bound) {
             const auto move = thread_data.applied_moves.back().reverse_move();
             thread_data.applied_moves.pop_back();
@@ -439,14 +499,16 @@ class kl_active_schedule {
             const auto &child = target(edge, instance->getComputationalDag());
 
             if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) {
-                if ((node_step > vector_schedule.assignedSuperstep(child)) ||
-                    (node_step == vector_schedule.assignedSuperstep(child) && node_proc != vector_schedule.assignedProcessor(child))) {
+                if ((node_step > vector_schedule.assignedSuperstep(child))
+                    || (node_step == vector_schedule.assignedSuperstep(child)
+                        && node_proc != vector_schedule.assignedProcessor(child))) {
                     thread_data.current_violations.insert(edge);
                     thread_data.new_violations[child] = edge;
                 }
             } else {
-                if ((node_step < vector_schedule.assignedSuperstep(child)) ||
-                    (node_step == vector_schedule.assignedSuperstep(child) && node_proc == vector_schedule.assignedProcessor(child))) {
+                if ((node_step < vector_schedule.assignedSuperstep(child))
+                    || (node_step == vector_schedule.assignedSuperstep(child)
+                        && node_proc == vector_schedule.assignedProcessor(child))) {
                     thread_data.current_violations.erase(edge);
                     thread_data.resolved_violations.insert(edge);
                 }
@@ -457,14 +519,16 @@ class kl_active_schedule {
             const auto &parent = source(edge, instance->getComputationalDag());
 
             if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) {
-                if ((node_step < vector_schedule.assignedSuperstep(parent)) ||
-                    (node_step == vector_schedule.assignedSuperstep(parent) && node_proc != vector_schedule.assignedProcessor(parent))) {
+                if ((node_step < vector_schedule.assignedSuperstep(parent))
+                    || (node_step == vector_schedule.assignedSuperstep(parent)
+                        && node_proc != vector_schedule.assignedProcessor(parent))) {
                     thread_data.current_violations.insert(edge);
                     thread_data.new_violations[parent] = edge;
                 }
             } else {
-                if ((node_step > vector_schedule.assignedSuperstep(parent)) ||
-                    (node_step == vector_schedule.assignedSuperstep(parent) && node_proc == vector_schedule.assignedProcessor(parent))) {
+                if ((node_step > vector_schedule.assignedSuperstep(parent))
+                    || (node_step == vector_schedule.assignedSuperstep(parent)
+                        && node_proc == vector_schedule.assignedProcessor(parent))) {
                     thread_data.current_violations.erase(edge);
                     thread_data.resolved_violations.insert(edge);
                 }
@@ -499,7 +563,7 @@ class kl_active_schedule {
     }
 };
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::clear() {
     work_datastructures.clear();
     vector_schedule.clear();
@@ -509,14 +573,12 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::clear() {
     }
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_violations(thread_data_t &thread_data) {
-
     thread_data.current_violations.clear();
     thread_data.feasible = true;
 
     for (const auto &edge : edges(instance->getComputationalDag())) {
-
         const auto &source_v = source(edge, instance->getComputationalDag());
         const auto &target_v = target(edge, instance->getComputationalDag());
 
@@ -532,7 +594,7 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_violations
     }
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::initialize(const IBspSchedule<Graph_t> &schedule) {
     instance = &schedule.getInstance();
     vector_schedule = VectorSchedule(schedule);
@@ -549,15 +611,16 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::initialize(const I
     compute_work_memory_datastructures(0, num_steps() - 1);
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_work_memory_datastructures(unsigned start_step, unsigned end_step) {
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_work_memory_datastructures(unsigned start_step,
+                                                                                                 unsigned end_step) {
     if constexpr (use_memory_constraint) {
         memory_constraint.compute_memory_datastructure(start_step, end_step);
     }
     work_datastructures.compute_work_datastructures(start_step, end_step);
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::write_schedule(BspSchedule<Graph_t> &schedule) {
     for (const auto v : instance->vertices()) {
         schedule.setAssignedProcessor(v, vector_schedule.assignedProcessor(v));
@@ -566,7 +629,7 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::write_schedule(Bsp
     schedule.updateNumberOfSupersteps();
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::remove_empty_step(unsigned step) {
     for (unsigned i = step; i < num_steps() - 1; i++) {
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
@@ -583,7 +646,7 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::remove_empty_step(
     vector_schedule.number_of_supersteps--;
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_fwd(const unsigned step, const unsigned to_step) {
     for (unsigned i = step; i < to_step; i++) {
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
@@ -599,7 +662,7 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_fw
     }
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::insert_empty_step(unsigned step) {
     unsigned i = vector_schedule.number_of_supersteps++;
 
@@ -617,8 +680,9 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::insert_empty_step(
     }
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_bwd(const unsigned to_step, const unsigned empty_step) {
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_bwd(const unsigned to_step,
+                                                                                  const unsigned empty_step) {
     unsigned i = to_step;
 
     for (; i > empty_step; i--) {
@@ -635,10 +699,11 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_bw
     }
 }
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
+template <typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_steps(const unsigned step1, const unsigned step2) {
-    if (step1 == step2)
+    if (step1 == step2) {
         return;
+    }
 
     for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
         for (const auto node : set_schedule.step_processor_vertices[step1][proc]) {
@@ -655,4 +720,4 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_steps(const u
     }
 }
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
index dd572710..5003d796 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
@@ -28,6 +28,8 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "kl_active_schedule.hpp"
+#include "kl_util.hpp"
 #include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp"
 #include "osp/auxiliary/misc.hpp"
 #include "osp/bsp/model/util/CompatibleProcessorRange.hpp"
@@ -36,9 +38,6 @@ limitations under the License.
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 
-#include "kl_active_schedule.hpp"
-#include "kl_util.hpp"
-
 namespace osp {
 
 struct kl_parameter {
@@ -64,9 +63,8 @@ struct kl_parameter {
     unsigned thread_range_gap = 0;
 };
 
-template<typename VertexType>
+template <typename VertexType>
 struct kl_update_info {
-
     VertexType node = 0;
 
     bool full_update = false;
@@ -76,17 +74,19 @@ struct kl_update_info {
     bool update_entire_from_step = false;
 
     kl_update_info() = default;
-    kl_update_info(VertexType n)
-        : node(n), full_update(false), update_entire_to_step(false), update_entire_from_step(false) {}
+
+    kl_update_info(VertexType n) : node(n), full_update(false), update_entire_to_step(false), update_entire_from_step(false) {}
+
     kl_update_info(VertexType n, bool full)
         : node(n), full_update(full), update_entire_to_step(false), update_entire_from_step(false) {}
 };
 
-template<typename Graph_t, typename comm_cost_function_t,
-         typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1,
-         typename cost_t = double>
+template <typename Graph_t,
+          typename comm_cost_function_t,
+          typename MemoryConstraint_t = no_local_search_memory_constraint,
+          unsigned window_size = 1,
+          typename cost_t = double>
 class kl_improver : public ImprovementScheduler<Graph_t> {
-
     static_assert(is_directed_graph_edge_desc_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     static_assert(has_hashable_edge_desc_v<Graph_t>, "Graph_t must satisfy the has_hashable_edge_desc concept");
     static_assert(is_computational_dag_v<Graph_t>, "Graph_t must satisfy the computational_dag concept");
@@ -110,7 +110,6 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     using kl_gain_update_info = kl_update_info<VertexType>;
 
     struct ThreadSearchContext {
-
         unsigned thread_id = 0;
         unsigned start_step = 0;
         unsigned end_step = 0;
@@ -138,12 +137,13 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         unsigned max_no_vioaltions_removed_backtrack = 0;
 
         inline unsigned num_steps() const { return end_step - start_step + 1; }
+
         inline unsigned start_idx(const unsigned node_step) const {
             return node_step < start_step + window_size ? window_size - (node_step - start_step) : 0;
         }
+
         inline unsigned end_idx(unsigned node_step) const {
-            return node_step + window_size <= end_step ? window_range
-                                                       : window_range - (node_step + window_size - end_step);
+            return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step);
         }
     };
 
@@ -164,9 +164,9 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     std::vector<bool> thread_finished_vec;
 
     inline unsigned rel_step_idx(const unsigned node_step, const unsigned move_step) const {
-        return (move_step >= node_step) ? ((move_step - node_step) + window_size)
-                                        : (window_size - (node_step - move_step));
+        return (move_step >= node_step) ? ((move_step - node_step) + window_size) : (window_size - (node_step - move_step));
     }
+
     inline bool is_compatible(VertexType node, unsigned proc) const {
         return active_schedule.getInstance().isCompatible(node, proc);
     }
@@ -188,7 +188,8 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     }
 
     kl_move get_best_move(node_selection_container_t &affinity_table,
-                          vector_vertex_lock_manger<VertexType> &lock_manager, heap_datastructure &max_gain_heap) {
+                          vector_vertex_lock_manger<VertexType> &lock_manager,
+                          heap_datastructure &max_gain_heap) {
         // To introduce non-determinism and help escape local optima, if there are multiple moves with the same
         // top gain, we randomly select one. We check up to `local_max` ties.
         const unsigned local_max = 50;
@@ -210,14 +211,19 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         return best_move;
     }
 
-    inline void process_other_steps_best_move(const unsigned idx, const unsigned node_step, const VertexType &node,
-                                              const cost_t affinity_current_proc_step, cost_t &max_gain,
-                                              unsigned &max_proc, unsigned &max_step,
+    inline void process_other_steps_best_move(const unsigned idx,
+                                              const unsigned node_step,
+                                              const VertexType &node,
+                                              const cost_t affinity_current_proc_step,
+                                              cost_t &max_gain,
+                                              unsigned &max_proc,
+                                              unsigned &max_step,
                                               const std::vector<std::vector<cost_t>> &affinity_table_node) const {
         for (const unsigned p : proc_range.compatible_processors_vertex(node)) {
             if constexpr (active_schedule_t::use_memory_constraint) {
-                if (not active_schedule.memory_constraint.can_move(node, p, node_step + idx - window_size))
+                if (not active_schedule.memory_constraint.can_move(node, p, node_step + idx - window_size)) {
                     continue;
+                }
             }
 
             const cost_t gain = affinity_current_proc_step - affinity_table_node[p][idx];
@@ -229,8 +235,9 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         }
     }
 
-    template<bool move_to_same_super_step>
-    kl_move compute_best_move(VertexType node, const std::vector<std::vector<cost_t>> &affinity_table_node,
+    template <bool move_to_same_super_step>
+    kl_move compute_best_move(VertexType node,
+                              const std::vector<std::vector<cost_t>> &affinity_table_node,
                               ThreadSearchContext &thread_data) {
         const unsigned node_step = active_schedule.assigned_superstep(node);
         const unsigned node_proc = active_schedule.assigned_processor(node);
@@ -244,18 +251,20 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 
         unsigned idx = thread_data.start_idx(node_step);
         for (; idx < window_size; idx++) {
-            process_other_steps_best_move(idx, node_step, node, affinity_current_proc_step, max_gain, max_proc,
-                                          max_step, affinity_table_node);
+            process_other_steps_best_move(
+                idx, node_step, node, affinity_current_proc_step, max_gain, max_proc, max_step, affinity_table_node);
         }
 
         if constexpr (move_to_same_super_step) {
             for (const unsigned proc : proc_range.compatible_processors_vertex(node)) {
-                if (proc == node_proc)
+                if (proc == node_proc) {
                     continue;
+                }
 
                 if constexpr (active_schedule_t::use_memory_constraint) {
-                    if (not active_schedule.memory_constraint.can_move(node, proc, node_step + idx - window_size))
+                    if (not active_schedule.memory_constraint.can_move(node, proc, node_step + idx - window_size)) {
                         continue;
+                    }
                 }
 
                 const cost_t gain = affinity_current_proc_step - affinity_table_node[proc][window_size];
@@ -271,14 +280,15 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 
         const unsigned bound = thread_data.end_idx(node_step);
         for (; idx < bound; idx++) {
-            process_other_steps_best_move(idx, node_step, node, affinity_current_proc_step, max_gain, max_proc,
-                                          max_step, affinity_table_node);
+            process_other_steps_best_move(
+                idx, node_step, node, affinity_current_proc_step, max_gain, max_proc, max_step, affinity_table_node);
         }
 
         return kl_move(node, max_gain, node_proc, node_step, max_proc, node_step + max_step - window_size);
     }
 
-    kl_gain_update_info update_node_work_affinity_after_move(VertexType node, kl_move move,
+    kl_gain_update_info update_node_work_affinity_after_move(VertexType node,
+                                                             kl_move move,
                                                              const pre_move_work_data<work_weight_t> &prev_work_data,
                                                              std::vector<std::vector<cost_t>> &affinity_table_node) {
         const unsigned node_step = active_schedule.assigned_superstep(node);
@@ -298,30 +308,25 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 if (node_step == move.from_step) {
                     const unsigned node_proc = active_schedule.assigned_processor(node);
                     const work_weight_t new_max_weight = active_schedule.get_step_max_work(move.from_step);
-                    const work_weight_t new_second_max_weight =
-                        active_schedule.get_step_second_max_work(move.from_step);
-                    const work_weight_t new_step_proc_work =
-                        active_schedule.get_step_processor_work(node_step, node_proc);
-                    const work_weight_t prev_step_proc_work =
-                        (node_proc == move.from_proc) ? new_step_proc_work + graph->vertex_work_weight(move.node)
-                        : (node_proc == move.to_proc) ? new_step_proc_work - graph->vertex_work_weight(move.node)
-                                                      : new_step_proc_work;
-                    const bool prev_is_sole_max_processor = (prev_work_data.from_step_max_work_processor_count == 1) &&
-                                                            (prev_max_work == prev_step_proc_work);
-                    const cost_t prev_node_proc_affinity =
-                        prev_is_sole_max_processor ? std::min(vertex_weight, prev_max_work - prev_second_max_work)
-                                                   : 0.0;
-                    const bool new_is_sole_max_processor =
-                        (active_schedule.get_step_max_work_processor_count()[node_step] == 1) &&
-                        (new_max_weight == new_step_proc_work);
-                    const cost_t new_node_proc_affinity =
-                        new_is_sole_max_processor ? std::min(vertex_weight, new_max_weight - new_second_max_weight)
-                                                  : 0.0;
+                    const work_weight_t new_second_max_weight = active_schedule.get_step_second_max_work(move.from_step);
+                    const work_weight_t new_step_proc_work = active_schedule.get_step_processor_work(node_step, node_proc);
+                    const work_weight_t prev_step_proc_work
+                        = (node_proc == move.from_proc) ? new_step_proc_work + graph->vertex_work_weight(move.node)
+                          : (node_proc == move.to_proc) ? new_step_proc_work - graph->vertex_work_weight(move.node)
+                                                        : new_step_proc_work;
+                    const bool prev_is_sole_max_processor = (prev_work_data.from_step_max_work_processor_count == 1)
+                                                            && (prev_max_work == prev_step_proc_work);
+                    const cost_t prev_node_proc_affinity
+                        = prev_is_sole_max_processor ? std::min(vertex_weight, prev_max_work - prev_second_max_work) : 0.0;
+                    const bool new_is_sole_max_processor = (active_schedule.get_step_max_work_processor_count()[node_step] == 1)
+                                                           && (new_max_weight == new_step_proc_work);
+                    const cost_t new_node_proc_affinity
+                        = new_is_sole_max_processor ? std::min(vertex_weight, new_max_weight - new_second_max_weight) : 0.0;
 
                     const cost_t diff = new_node_proc_affinity - prev_node_proc_affinity;
                     if (std::abs(diff) > EPSILON) {
                         update_info.full_update = true;
-                        affinity_table_node[node_proc][window_size] += diff; // Use the pre-calculated diff
+                        affinity_table_node[node_proc][window_size] += diff;    // Use the pre-calculated diff
                     }
 
                     if ((prev_max_work != new_max_weight) || update_info.full_update) {
@@ -332,40 +337,40 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                                 continue;
                             }
 
-                            const work_weight_t new_weight =
-                                vertex_weight + active_schedule.get_step_processor_work(node_step, proc);
-                            const cost_t prev_other_affinity =
-                                compute_same_step_affinity(prev_max_work, new_weight, prev_node_proc_affinity);
-                            const cost_t other_affinity =
-                                compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
+                            const work_weight_t new_weight
+                                = vertex_weight + active_schedule.get_step_processor_work(node_step, proc);
+                            const cost_t prev_other_affinity
+                                = compute_same_step_affinity(prev_max_work, new_weight, prev_node_proc_affinity);
+                            const cost_t other_affinity
+                                = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
 
                             affinity_table_node[proc][window_size] += (other_affinity - prev_other_affinity);
                         }
                     }
 
                     if (node_proc != move.from_proc && is_compatible(node, move.from_proc)) {
-                        const work_weight_t prev_new_weight =
-                            vertex_weight + active_schedule.get_step_processor_work(node_step, move.from_proc) +
-                            graph->vertex_work_weight(move.node);
-                        const cost_t prev_other_affinity =
-                            compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity);
-                        const work_weight_t new_weight =
-                            vertex_weight + active_schedule.get_step_processor_work(node_step, move.from_proc);
-                        const cost_t other_affinity =
-                            compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
+                        const work_weight_t prev_new_weight = vertex_weight
+                                                              + active_schedule.get_step_processor_work(node_step, move.from_proc)
+                                                              + graph->vertex_work_weight(move.node);
+                        const cost_t prev_other_affinity
+                            = compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity);
+                        const work_weight_t new_weight
+                            = vertex_weight + active_schedule.get_step_processor_work(node_step, move.from_proc);
+                        const cost_t other_affinity
+                            = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
                         affinity_table_node[move.from_proc][window_size] += (other_affinity - prev_other_affinity);
                     }
 
                     if (node_proc != move.to_proc && is_compatible(node, move.to_proc)) {
-                        const work_weight_t prev_new_weight =
-                            vertex_weight + active_schedule.get_step_processor_work(node_step, move.to_proc) -
-                            graph->vertex_work_weight(move.node);
-                        const cost_t prev_other_affinity =
-                            compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity);
-                        const work_weight_t new_weight =
-                            vertex_weight + active_schedule.get_step_processor_work(node_step, move.to_proc);
-                        const cost_t other_affinity =
-                            compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
+                        const work_weight_t prev_new_weight = vertex_weight
+                                                              + active_schedule.get_step_processor_work(node_step, move.to_proc)
+                                                              - graph->vertex_work_weight(move.node);
+                        const cost_t prev_other_affinity
+                            = compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity);
+                        const work_weight_t new_weight
+                            = vertex_weight + active_schedule.get_step_processor_work(node_step, move.to_proc);
+                        const cost_t other_affinity
+                            = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
                         affinity_table_node[move.to_proc][window_size] += (other_affinity - prev_other_affinity);
                     }
 
@@ -376,79 +381,73 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                         update_info.update_entire_from_step = true;
                         // update moving to all procs with special for move.from_proc
                         for (const unsigned proc : proc_range.compatible_processors_vertex(node)) {
-                            const work_weight_t new_weight =
-                                vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc);
+                            const work_weight_t new_weight
+                                = vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc);
                             if (proc == move.from_proc) {
-                                const work_weight_t prev_new_weight =
-                                    vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc) +
-                                    graph->vertex_work_weight(move.node);
-                                const cost_t prev_affinity =
-                                    prev_max_work < prev_new_weight
-                                        ? static_cast<cost_t>(prev_new_weight) - static_cast<cost_t>(prev_max_work)
-                                        : 0.0;
-                                const cost_t new_affinity =
-                                    new_max_weight < new_weight
-                                        ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(new_max_weight)
-                                        : 0.0;
+                                const work_weight_t prev_new_weight
+                                    = vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc)
+                                      + graph->vertex_work_weight(move.node);
+                                const cost_t prev_affinity
+                                    = prev_max_work < prev_new_weight
+                                          ? static_cast<cost_t>(prev_new_weight) - static_cast<cost_t>(prev_max_work)
+                                          : 0.0;
+                                const cost_t new_affinity = new_max_weight < new_weight ? static_cast<cost_t>(new_weight)
+                                                                                              - static_cast<cost_t>(new_max_weight)
+                                                                                        : 0.0;
                                 affinity_table_node[proc][idx] += new_affinity - prev_affinity;
                             } else if (proc == move.to_proc) {
-                                const work_weight_t prev_new_weight =
-                                    vertex_weight + active_schedule.get_step_processor_work(move.to_step, proc) -
-                                    graph->vertex_work_weight(move.node);
-                                const cost_t prev_affinity =
-                                    prev_max_work < prev_new_weight
-                                        ? static_cast<cost_t>(prev_new_weight) - static_cast<cost_t>(prev_max_work)
-                                        : 0.0;
-                                const cost_t new_affinity =
-                                    new_max_weight < new_weight
-                                        ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(new_max_weight)
-                                        : 0.0;
+                                const work_weight_t prev_new_weight
+                                    = vertex_weight + active_schedule.get_step_processor_work(move.to_step, proc)
+                                      - graph->vertex_work_weight(move.node);
+                                const cost_t prev_affinity
+                                    = prev_max_work < prev_new_weight
+                                          ? static_cast<cost_t>(prev_new_weight) - static_cast<cost_t>(prev_max_work)
+                                          : 0.0;
+                                const cost_t new_affinity = new_max_weight < new_weight ? static_cast<cost_t>(new_weight)
+                                                                                              - static_cast<cost_t>(new_max_weight)
+                                                                                        : 0.0;
                                 affinity_table_node[proc][idx] += new_affinity - prev_affinity;
                             } else {
-                                const cost_t prev_affinity =
-                                    prev_max_work < new_weight
-                                        ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(prev_max_work)
-                                        : 0.0;
-                                const cost_t new_affinity =
-                                    new_max_weight < new_weight
-                                        ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(new_max_weight)
-                                        : 0.0;
+                                const cost_t prev_affinity = prev_max_work < new_weight ? static_cast<cost_t>(new_weight)
+                                                                                              - static_cast<cost_t>(prev_max_work)
+                                                                                        : 0.0;
+                                const cost_t new_affinity = new_max_weight < new_weight ? static_cast<cost_t>(new_weight)
+                                                                                              - static_cast<cost_t>(new_max_weight)
+                                                                                        : 0.0;
                                 affinity_table_node[proc][idx] += new_affinity - prev_affinity;
                             }
                         }
                     } else {
                         // update only move.from_proc and move.to_proc
                         if (is_compatible(node, move.from_proc)) {
-                            const work_weight_t from_new_weight =
-                                vertex_weight + active_schedule.get_step_processor_work(move.from_step, move.from_proc);
-                            const work_weight_t from_prev_new_weight =
-                                from_new_weight + graph->vertex_work_weight(move.node);
-                            const cost_t from_prev_affinity =
-                                prev_max_work < from_prev_new_weight
-                                    ? static_cast<cost_t>(from_prev_new_weight) - static_cast<cost_t>(prev_max_work)
-                                    : 0.0;
-
-                            const cost_t from_new_affinity =
-                                new_max_weight < from_new_weight
-                                    ? static_cast<cost_t>(from_new_weight) - static_cast<cost_t>(new_max_weight)
-                                    : 0.0;
+                            const work_weight_t from_new_weight
+                                = vertex_weight + active_schedule.get_step_processor_work(move.from_step, move.from_proc);
+                            const work_weight_t from_prev_new_weight = from_new_weight + graph->vertex_work_weight(move.node);
+                            const cost_t from_prev_affinity
+                                = prev_max_work < from_prev_new_weight
+                                      ? static_cast<cost_t>(from_prev_new_weight) - static_cast<cost_t>(prev_max_work)
+                                      : 0.0;
+
+                            const cost_t from_new_affinity
+                                = new_max_weight < from_new_weight
+                                      ? static_cast<cost_t>(from_new_weight) - static_cast<cost_t>(new_max_weight)
+                                      : 0.0;
                             affinity_table_node[move.from_proc][idx] += from_new_affinity - from_prev_affinity;
                         }
 
                         if (is_compatible(node, move.to_proc)) {
-                            const work_weight_t to_new_weight =
-                                vertex_weight + active_schedule.get_step_processor_work(move.to_step, move.to_proc);
-                            const work_weight_t to_prev_new_weight =
-                                to_new_weight - graph->vertex_work_weight(move.node);
-                            const cost_t to_prev_affinity =
-                                prev_max_work < to_prev_new_weight
-                                    ? static_cast<cost_t>(to_prev_new_weight) - static_cast<cost_t>(prev_max_work)
-                                    : 0.0;
-
-                            const cost_t to_new_affinity =
-                                new_max_weight < to_new_weight
-                                    ? static_cast<cost_t>(to_new_weight) - static_cast<cost_t>(new_max_weight)
-                                    : 0.0;
+                            const work_weight_t to_new_weight
+                                = vertex_weight + active_schedule.get_step_processor_work(move.to_step, move.to_proc);
+                            const work_weight_t to_prev_new_weight = to_new_weight - graph->vertex_work_weight(move.node);
+                            const cost_t to_prev_affinity
+                                = prev_max_work < to_prev_new_weight
+                                      ? static_cast<cost_t>(to_prev_new_weight) - static_cast<cost_t>(prev_max_work)
+                                      : 0.0;
+
+                            const cost_t to_new_affinity
+                                = new_max_weight < to_new_weight
+                                      ? static_cast<cost_t>(to_new_weight) - static_cast<cost_t>(new_max_weight)
+                                      : 0.0;
                             affinity_table_node[move.to_proc][idx] += to_new_affinity - to_prev_affinity;
                         }
                     }
@@ -457,47 +456,82 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 
         } else {
             const unsigned node_proc = active_schedule.assigned_processor(node);
-            process_work_update_step(node, node_step, node_proc, vertex_weight, move.from_step, move.from_proc,
-                                     graph->vertex_work_weight(move.node), prev_work_data.from_step_max_work,
+            process_work_update_step(node,
+                                     node_step,
+                                     node_proc,
+                                     vertex_weight,
+                                     move.from_step,
+                                     move.from_proc,
+                                     graph->vertex_work_weight(move.node),
+                                     prev_work_data.from_step_max_work,
                                      prev_work_data.from_step_second_max_work,
-                                     prev_work_data.from_step_max_work_processor_count, update_info.update_from_step,
-                                     update_info.update_entire_from_step, update_info.full_update, affinity_table_node);
-            process_work_update_step(node, node_step, node_proc, vertex_weight, move.to_step, move.to_proc,
-                                     -graph->vertex_work_weight(move.node), prev_work_data.to_step_max_work,
+                                     prev_work_data.from_step_max_work_processor_count,
+                                     update_info.update_from_step,
+                                     update_info.update_entire_from_step,
+                                     update_info.full_update,
+                                     affinity_table_node);
+            process_work_update_step(node,
+                                     node_step,
+                                     node_proc,
+                                     vertex_weight,
+                                     move.to_step,
+                                     move.to_proc,
+                                     -graph->vertex_work_weight(move.node),
+                                     prev_work_data.to_step_max_work,
                                      prev_work_data.to_step_second_max_work,
-                                     prev_work_data.to_step_max_work_processor_count, update_info.update_to_step,
-                                     update_info.update_entire_to_step, update_info.full_update, affinity_table_node);
+                                     prev_work_data.to_step_max_work_processor_count,
+                                     update_info.update_to_step,
+                                     update_info.update_entire_to_step,
+                                     update_info.full_update,
+                                     affinity_table_node);
         }
 
         return update_info;
     }
 
-    void process_work_update_step(VertexType node, unsigned node_step, unsigned node_proc, work_weight_t vertex_weight,
-                                  unsigned move_step, unsigned move_proc, work_weight_t move_correction_node_weight,
+    void process_work_update_step(VertexType node,
+                                  unsigned node_step,
+                                  unsigned node_proc,
+                                  work_weight_t vertex_weight,
+                                  unsigned move_step,
+                                  unsigned move_proc,
+                                  work_weight_t move_correction_node_weight,
                                   const work_weight_t prev_move_step_max_work,
                                   const work_weight_t prev_move_step_second_max_work,
-                                  unsigned prev_move_step_max_work_processor_count, bool &update_step,
-                                  bool &update_entire_step, bool &full_update,
+                                  unsigned prev_move_step_max_work_processor_count,
+                                  bool &update_step,
+                                  bool &update_entire_step,
+                                  bool &full_update,
                                   std::vector<std::vector<cost_t>> &affinity_table_node);
-    void update_node_work_affinity(node_selection_container_t &nodes, kl_move move,
+    void update_node_work_affinity(node_selection_container_t &nodes,
+                                   kl_move move,
                                    const pre_move_work_data<work_weight_t> &prev_work_data,
                                    std::map<VertexType, kl_gain_update_info> &recompute_max_gain);
-    void update_best_move(VertexType node, unsigned step, unsigned proc, node_selection_container_t &affinity_table,
+    void update_best_move(VertexType node,
+                          unsigned step,
+                          unsigned proc,
+                          node_selection_container_t &affinity_table,
                           ThreadSearchContext &thread_data);
-    void update_best_move(VertexType node, unsigned step, node_selection_container_t &affinity_table,
+    void update_best_move(VertexType node,
+                          unsigned step,
+                          node_selection_container_t &affinity_table,
                           ThreadSearchContext &thread_data);
-    void update_max_gain(kl_move move, std::map<VertexType, kl_gain_update_info> &recompute_max_gain,
+    void update_max_gain(kl_move move,
+                         std::map<VertexType, kl_gain_update_info> &recompute_max_gain,
                          ThreadSearchContext &thread_data);
-    void compute_work_affinity(VertexType node, std::vector<std::vector<cost_t>> &affinity_table_node,
+    void compute_work_affinity(VertexType node,
+                               std::vector<std::vector<cost_t>> &affinity_table_node,
                                ThreadSearchContext &thread_data);
 
-    inline void recompute_node_max_gain(VertexType node, node_selection_container_t &affinity_table,
+    inline void recompute_node_max_gain(VertexType node,
+                                        node_selection_container_t &affinity_table,
                                         ThreadSearchContext &thread_data) {
         const auto best_move = compute_best_move<true>(node, affinity_table[node], thread_data);
         thread_data.max_gain_heap.update(node, best_move);
     }
 
-    inline cost_t compute_same_step_affinity(const work_weight_t &max_work_for_step, const work_weight_t &new_weight,
+    inline cost_t compute_same_step_affinity(const work_weight_t &max_work_for_step,
+                                             const work_weight_t &new_weight,
                                              const cost_t &node_proc_affinity) {
         const cost_t max_work_after_removal = static_cast<cost_t>(max_work_for_step) - node_proc_affinity;
         if (new_weight > max_work_after_removal) {
@@ -510,10 +544,10 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         active_schedule.apply_move(move, thread_data.active_schedule_data);
         comm_cost_f.update_datastructure_after_move(move, thread_data.start_step, thread_data.end_step);
         cost_t change_in_cost = -move.gain;
-        change_in_cost += static_cast<cost_t>(thread_data.active_schedule_data.resolved_violations.size()) *
-                          thread_data.reward_penalty_strat.reward;
-        change_in_cost -= static_cast<cost_t>(thread_data.active_schedule_data.new_violations.size()) *
-                          thread_data.reward_penalty_strat.penalty;
+        change_in_cost += static_cast<cost_t>(thread_data.active_schedule_data.resolved_violations.size())
+                          * thread_data.reward_penalty_strat.reward;
+        change_in_cost -= static_cast<cost_t>(thread_data.active_schedule_data.new_violations.size())
+                          * thread_data.reward_penalty_strat.penalty;
 
 #ifdef KL_DEBUG
         std::cout << "penalty: " << thread_data.reward_penalty_strat.penalty
@@ -531,7 +565,9 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         return change_in_cost;
     }
 
-    void run_quick_moves(unsigned &inner_iter, ThreadSearchContext &thread_data, const cost_t change_in_cost,
+    void run_quick_moves(unsigned &inner_iter,
+                         ThreadSearchContext &thread_data,
+                         const cost_t change_in_cost,
                          const VertexType best_move_node) {
 #ifdef KL_DEBUG
         std::cout << "Starting quick moves sequence." << std::endl;
@@ -552,15 +588,13 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         }
 
         while (quick_moves_stack.size() > 0) {
-
             auto next_node_to_move = quick_moves_stack.back();
             quick_moves_stack.pop_back();
 
             thread_data.reward_penalty_strat.init_reward_penalty(
                 static_cast<double>(thread_data.active_schedule_data.current_violations.size()) + 1.0);
             compute_node_affinities(next_node_to_move, thread_data.local_affinity_table, thread_data);
-            kl_move best_quick_move =
-                compute_best_move<true>(next_node_to_move, thread_data.local_affinity_table, thread_data);
+            kl_move best_quick_move = compute_best_move<true>(next_node_to_move, thread_data.local_affinity_table, thread_data);
 
             local_lock.insert(next_node_to_move);
             if (best_quick_move.gain <= std::numeric_limits<cost_t>::lowest()) {
@@ -588,8 +622,9 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                     quick_moves_stack.push_back(key);
                 }
 
-                if (abort)
+                if (abort) {
                     break;
+                }
 
             } else if (thread_data.active_schedule_data.feasible) {
                 break;
@@ -597,8 +632,12 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         }
 
         if (!thread_data.active_schedule_data.feasible) {
-            active_schedule.revert_schedule_to_bound(num_applied_moves, saved_cost, true, comm_cost_f,
-                                                     thread_data.active_schedule_data, thread_data.start_step,
+            active_schedule.revert_schedule_to_bound(num_applied_moves,
+                                                     saved_cost,
+                                                     true,
+                                                     comm_cost_f,
+                                                     thread_data.active_schedule_data,
+                                                     thread_data.start_step,
                                                      thread_data.end_step);
 #ifdef KL_DEBUG
             std::cout << "Ending quick moves sequence with infeasible solution." << std::endl;
@@ -613,17 +652,16 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         thread_data.affinity_table.trim();
         thread_data.max_gain_heap.clear();
         thread_data.reward_penalty_strat.init_reward_penalty(1.0);
-        insert_gain_heap(thread_data); // Re-initialize the heap with the current state
+        insert_gain_heap(thread_data);    // Re-initialize the heap with the current state
     }
 
     void resolve_violations(ThreadSearchContext &thread_data) {
         auto &current_violations = thread_data.active_schedule_data.current_violations;
         unsigned num_violations = static_cast<unsigned>(current_violations.size());
         if (num_violations > 0) {
-
 #ifdef KL_DEBUG_1
-            std::cout << "thread " << thread_data.thread_id << ", Starting preresolving violations with "
-                      << num_violations << " initial violations" << std::endl;
+            std::cout << "thread " << thread_data.thread_id << ", Starting preresolving violations with " << num_violations
+                      << " initial violations" << std::endl;
 #endif
             thread_data.reward_penalty_strat.init_reward_penalty(static_cast<double>(num_violations) + 1.0);
             std::unordered_set<VertexType> local_lock;
@@ -653,18 +691,17 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                     best_move = compute_best_move<true>(node, thread_data.local_affinity_table, thread_data);
                 } else {
                     compute_node_affinities(source_v, thread_data.local_affinity_table, thread_data);
-                    kl_move best_source_v_move =
-                        compute_best_move<true>(source_v, thread_data.local_affinity_table, thread_data);
+                    kl_move best_source_v_move = compute_best_move<true>(source_v, thread_data.local_affinity_table, thread_data);
                     compute_node_affinities(target_v, thread_data.local_affinity_table, thread_data);
-                    kl_move best_target_v_move =
-                        compute_best_move<true>(target_v, thread_data.local_affinity_table, thread_data);
+                    kl_move best_target_v_move = compute_best_move<true>(target_v, thread_data.local_affinity_table, thread_data);
                     best_move = best_target_v_move.gain > best_source_v_move.gain ? std::move(best_target_v_move)
                                                                                   : std::move(best_source_v_move);
                 }
 
                 local_lock.insert(best_move.node);
-                if (best_move.gain <= std::numeric_limits<cost_t>::lowest())
+                if (best_move.gain <= std::numeric_limits<cost_t>::lowest()) {
                     continue;
+                }
 
                 apply_move(best_move, thread_data);
                 thread_data.affinity_table.insert(best_move.node);
@@ -674,8 +711,9 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                           << " to: " << best_move.to_proc << "|" << best_move.to_step << std::endl;
 #endif
                 const unsigned new_num_violations = static_cast<unsigned>(current_violations.size());
-                if (new_num_violations == 0)
+                if (new_num_violations == 0) {
                     break;
+                }
 
                 if (thread_data.active_schedule_data.new_violations.size() > 0) {
                     for (const auto &vertex_edge_pair : thread_data.active_schedule_data.new_violations) {
@@ -689,8 +727,8 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 update_avg_gain(gain, num_iter++, thread_data.average_gain);
 #ifdef KL_DEBUG_1
                 std::cout << "thread " << thread_data.thread_id << ",  preresolving violations with " << num_violations
-                          << " violations, " << num_iter << " #iterations, " << thread_data.average_gain
-                          << " average gain" << std::endl;
+                          << " violations, " << num_iter << " #iterations, " << thread_data.average_gain << " average gain"
+                          << std::endl;
 #endif
                 if (num_iter > min_iter && thread_data.average_gain < 0.0) {
                     break;
@@ -701,11 +739,10 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     }
 
     void run_local_search(ThreadSearchContext &thread_data) {
-
 #ifdef KL_DEBUG_1
         std::cout << "thread " << thread_data.thread_id
-                  << ", start local search, initial schedule cost: " << thread_data.active_schedule_data.cost
-                  << " with " << thread_data.num_steps() << " supersteps." << std::endl;
+                  << ", start local search, initial schedule cost: " << thread_data.active_schedule_data.cost << " with "
+                  << thread_data.num_steps() << " supersteps." << std::endl;
 #endif
         std::vector<VertexType> new_nodes;
         std::vector<VertexType> unlock_nodes;
@@ -751,19 +788,20 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
             if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) {
                 std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test()
                           << ", current cost: " << thread_data.active_schedule_data.cost << std::endl;
-                std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<"
-                          << std::endl;
+                std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl;
             }
             if constexpr (active_schedule_t::use_memory_constraint) {
-                if (not active_schedule.memory_constraint.satisfied_memory_constraint())
+                if (not active_schedule.memory_constraint.satisfied_memory_constraint()) {
                     std::cout << "memory constraint not satisfied" << std::endl;
+                }
             }
 #endif
 
             while (inner_iter < thread_data.max_inner_iterations && thread_data.max_gain_heap.size() > 0) {
-                kl_move best_move =
-                    get_best_move(thread_data.affinity_table, thread_data.lock_manager,
-                                  thread_data.max_gain_heap); // locks best_move.node and removes it from node_selection
+                kl_move best_move
+                    = get_best_move(thread_data.affinity_table,
+                                    thread_data.lock_manager,
+                                    thread_data.max_gain_heap);    // locks best_move.node and removes it from node_selection
                 if (best_move.gain <= std::numeric_limits<cost_t>::lowest()) {
                     break;
                 }
@@ -771,13 +809,12 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 #ifdef KL_DEBUG
                 std::cout << " >>> move node " << best_move.node << " with gain " << best_move.gain
                           << ", from proc|step: " << best_move.from_proc << "|" << best_move.from_step
-                          << " to: " << best_move.to_proc << "|" << best_move.to_step
-                          << ",avg gain: " << thread_data.average_gain << std::endl;
+                          << " to: " << best_move.to_proc << "|" << best_move.to_step << ",avg gain: " << thread_data.average_gain
+                          << std::endl;
 #endif
                 if (inner_iter > thread_data.min_inner_iter && thread_data.average_gain < 0.0) {
 #ifdef KL_DEBUG
-                    std::cout << "Negative average gain: " << thread_data.average_gain << ", end local search"
-                              << std::endl;
+                    std::cout << "Negative average gain: " << thread_data.average_gain << ", end local search" << std::endl;
 #endif
                     break;
                 }
@@ -789,21 +826,20 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 #endif
 
                 const auto prev_work_data = active_schedule.get_pre_move_work_data(best_move);
-                const typename comm_cost_function_t::pre_move_comm_data_t prev_comm_data =
-                    comm_cost_f.get_pre_move_comm_data(best_move);
+                const typename comm_cost_function_t::pre_move_comm_data_t prev_comm_data
+                    = comm_cost_f.get_pre_move_comm_data(best_move);
                 const cost_t change_in_cost = apply_move(best_move, thread_data);
 #ifdef KL_DEBUG_COST_CHECK
                 active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps();
-                if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) >
-                    0.00001) {
+                if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) {
                     std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test()
                               << ", current cost: " << thread_data.active_schedule_data.cost << std::endl;
-                    std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<"
-                              << std::endl;
+                    std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl;
                 }
                 if constexpr (active_schedule_t::use_memory_constraint) {
-                    if (not active_schedule.memory_constraint.satisfied_memory_constraint())
+                    if (not active_schedule.memory_constraint.satisfied_memory_constraint()) {
                         std::cout << "memory constraint not satisfied" << std::endl;
+                    }
                 }
 #endif
                 if constexpr (enable_quick_moves) {
@@ -811,17 +847,16 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                         run_quick_moves(inner_iter, thread_data, change_in_cost, best_move.node);
 #ifdef KL_DEBUG_COST_CHECK
                         active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps();
-                        if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) >
-                            0.00001) {
+                        if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) {
                             std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test()
                                       << ", current cost: " << thread_data.active_schedule_data.cost << std::endl;
-                            std::cout
-                                << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<"
-                                << std::endl;
+                            std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<"
+                                      << std::endl;
                         }
                         if constexpr (active_schedule_t::use_memory_constraint) {
-                            if (not active_schedule.memory_constraint.satisfied_memory_constraint())
+                            if (not active_schedule.memory_constraint.satisfied_memory_constraint()) {
                                 std::cout << "memory constraint not satisfied" << std::endl;
+                            }
                         }
 #endif
                         continue;
@@ -835,19 +870,20 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                         violation_removed_count++;
 
                         if (violation_removed_count > 3) {
-                            if (reset_counter < thread_data.max_no_vioaltions_removed_backtrack &&
-                                ((not iter_inital_feasible) || (thread_data.active_schedule_data.cost <
-                                                                thread_data.active_schedule_data.best_cost))) {
+                            if (reset_counter < thread_data.max_no_vioaltions_removed_backtrack
+                                && ((not iter_inital_feasible)
+                                    || (thread_data.active_schedule_data.cost < thread_data.active_schedule_data.best_cost))) {
                                 thread_data.affinity_table.reset_node_selection();
                                 thread_data.max_gain_heap.clear();
                                 thread_data.lock_manager.clear();
                                 thread_data.selection_strategy.select_nodes_violations(
-                                    thread_data.affinity_table, thread_data.active_schedule_data.current_violations,
-                                    thread_data.start_step, thread_data.end_step);
+                                    thread_data.affinity_table,
+                                    thread_data.active_schedule_data.current_violations,
+                                    thread_data.start_step,
+                                    thread_data.end_step);
 #ifdef KL_DEBUG
-                                std::cout
-                                    << "Infeasible, and no violations resolved for 5 iterations, reset node selection"
-                                    << std::endl;
+                                std::cout << "Infeasible, and no violations resolved for 5 iterations, reset node selection"
+                                          << std::endl;
 #endif
                                 thread_data.reward_penalty_strat.init_reward_penalty(
                                     static_cast<double>(thread_data.active_schedule_data.current_violations.size()));
@@ -874,8 +910,7 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 }
 
                 thread_data.affinity_table.trim();
-                update_affinities(best_move, thread_data, recompute_max_gain, new_nodes, prev_work_data,
-                                  prev_comm_data);
+                update_affinities(best_move, thread_data, recompute_max_gain, new_nodes, prev_work_data, prev_comm_data);
 
                 for (const auto v : unlock_nodes) {
                     thread_data.lock_manager.unlock(v);
@@ -898,16 +933,15 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 #endif
 #ifdef KL_DEBUG_COST_CHECK
                 active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps();
-                if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) >
-                    0.00001) {
+                if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) {
                     std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test()
                               << ", current cost: " << thread_data.active_schedule_data.cost << std::endl;
-                    std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<"
-                              << std::endl;
+                    std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl;
                 }
                 if constexpr (active_schedule_t::use_memory_constraint) {
-                    if (not active_schedule.memory_constraint.satisfied_memory_constraint())
+                    if (not active_schedule.memory_constraint.satisfied_memory_constraint()) {
                         std::cout << "memory constraint not satisfied" << std::endl;
+                    }
                 }
 #endif
                 update_max_gain(best_move, recompute_max_gain, thread_data);
@@ -921,17 +955,20 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 
 #ifdef KL_DEBUG
             std::cout << "--- end inner loop after " << inner_iter
-                      << " inner iterations, gain heap size: " << thread_data.max_gain_heap.size()
-                      << ", outer iteraion " << outer_iter << "/" << parameters.max_outer_iterations
+                      << " inner iterations, gain heap size: " << thread_data.max_gain_heap.size() << ", outer iteraion "
+                      << outer_iter << "/" << parameters.max_outer_iterations
                       << ", current cost: " << thread_data.active_schedule_data.cost << ", "
                       << (thread_data.active_schedule_data.feasible ? "feasible" : "infeasible") << std::endl;
 #endif
 #ifdef KL_DEBUG_1
             const unsigned num_steps_tmp = thread_data.end_step;
 #endif
-            active_schedule.revert_to_best_schedule(thread_data.local_search_start_step, thread_data.step_to_remove,
-                                                    comm_cost_f, thread_data.active_schedule_data,
-                                                    thread_data.start_step, thread_data.end_step);
+            active_schedule.revert_to_best_schedule(thread_data.local_search_start_step,
+                                                    thread_data.step_to_remove,
+                                                    comm_cost_f,
+                                                    thread_data.active_schedule_data,
+                                                    thread_data.start_step,
+                                                    thread_data.end_step);
 #ifdef KL_DEBUG_1
             if (thread_data.local_search_start_step > 0) {
                 if (num_steps_tmp == thread_data.end_step) {
@@ -949,12 +986,12 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
             if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) {
                 std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test()
                           << ", current cost: " << thread_data.active_schedule_data.cost << std::endl;
-                std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<"
-                          << std::endl;
+                std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl;
             }
             if constexpr (active_schedule_t::use_memory_constraint) {
-                if (not active_schedule.memory_constraint.satisfied_memory_constraint())
+                if (not active_schedule.memory_constraint.satisfied_memory_constraint()) {
                     std::cout << "memory constraint not satisfied" << std::endl;
+                }
             }
 #endif
 
@@ -968,8 +1005,7 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 
             if (other_threads_finished(thread_data.thread_id)) {
 #ifdef KL_DEBUG_1
-                std::cout << "thread " << thread_data.thread_id << ", other threads finished, end local search"
-                          << std::endl;
+                std::cout << "thread " << thread_data.thread_id << ", other threads finished, end local search" << std::endl;
 #endif
                 break;
             }
@@ -980,8 +1016,7 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 if (no_improvement_iter_counter >= parameters.max_no_improvement_iterations) {
 #ifdef KL_DEBUG_1
                     std::cout << "thread " << thread_data.thread_id << ", no improvement for "
-                              << parameters.max_no_improvement_iterations << " iterations, end local search"
-                              << std::endl;
+                              << parameters.max_no_improvement_iterations << " iterations, end local search" << std::endl;
 #endif
                     break;
                 }
@@ -995,35 +1030,40 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 #ifdef KL_DEBUG_1
         std::cout << "thread " << thread_data.thread_id << ", local search end after " << outer_iter
                   << " outer iterations, current cost: " << thread_data.active_schedule_data.cost << " with "
-                  << thread_data.num_steps() << " supersteps, vs serial cost "
-                  << active_schedule.get_total_work_weight() << "." << std::endl;
+                  << thread_data.num_steps() << " supersteps, vs serial cost " << active_schedule.get_total_work_weight() << "."
+                  << std::endl;
 #endif
         thread_finished_vec[thread_data.thread_id] = true;
     }
 
     bool other_threads_finished(const unsigned thread_id) {
         const size_t num_threads = thread_finished_vec.size();
-        if (num_threads == 1)
+        if (num_threads == 1) {
             return false;
+        }
 
         for (size_t i = 0; i < num_threads; i++) {
-            if (i != thread_id && !thread_finished_vec[i])
+            if (i != thread_id && !thread_finished_vec[i]) {
                 return false;
+            }
         }
         return true;
     }
 
-    inline void update_affinities(const kl_move &best_move, ThreadSearchContext &thread_data,
+    inline void update_affinities(const kl_move &best_move,
+                                  ThreadSearchContext &thread_data,
                                   std::map<VertexType, kl_gain_update_info> &recompute_max_gain,
                                   std::vector<VertexType> &new_nodes,
                                   const pre_move_work_data<v_workw_t<Graph_t>> &prev_work_data,
                                   const typename comm_cost_function_t::pre_move_comm_data_t &prev_comm_data) {
-
         if constexpr (comm_cost_function_t::is_max_comm_cost_function) {
             comm_cost_f.update_node_comm_affinity(
-                best_move, thread_data, thread_data.reward_penalty_strat.penalty,
-                thread_data.reward_penalty_strat.reward, recompute_max_gain,
-                new_nodes); // this only updated reward/penalty, collects new_nodes, and fills recompute_max_gain
+                best_move,
+                thread_data,
+                thread_data.reward_penalty_strat.penalty,
+                thread_data.reward_penalty_strat.reward,
+                recompute_max_gain,
+                new_nodes);    // this only updated reward/penalty, collects new_nodes, and fills recompute_max_gain
 
             // Add nodes from affected steps to new_nodes
             // {
@@ -1073,29 +1113,28 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 const auto current_max = active_schedule.get_step_max_work(best_move.from_step);
                 const auto current_second_max = active_schedule.get_step_second_max_work(best_move.from_step);
                 const auto current_count = active_schedule.get_step_max_work_processor_count()[best_move.from_step];
-                if (current_max != prev_work_data.from_step_max_work ||
-                    current_second_max != prev_work_data.from_step_second_max_work ||
-                    current_count != prev_work_data.from_step_max_work_processor_count) {
+                if (current_max != prev_work_data.from_step_max_work
+                    || current_second_max != prev_work_data.from_step_second_max_work
+                    || current_count != prev_work_data.from_step_max_work_processor_count) {
                     changed_steps.insert(best_move.from_step);
                 }
             } else {
                 // Different steps - check both
                 const auto current_from_max = active_schedule.get_step_max_work(best_move.from_step);
                 const auto current_from_second_max = active_schedule.get_step_second_max_work(best_move.from_step);
-                const auto current_from_count =
-                    active_schedule.get_step_max_work_processor_count()[best_move.from_step];
-                if (current_from_max != prev_work_data.from_step_max_work ||
-                    current_from_second_max != prev_work_data.from_step_second_max_work ||
-                    current_from_count != prev_work_data.from_step_max_work_processor_count) {
+                const auto current_from_count = active_schedule.get_step_max_work_processor_count()[best_move.from_step];
+                if (current_from_max != prev_work_data.from_step_max_work
+                    || current_from_second_max != prev_work_data.from_step_second_max_work
+                    || current_from_count != prev_work_data.from_step_max_work_processor_count) {
                     changed_steps.insert(best_move.from_step);
                 }
 
                 const auto current_to_max = active_schedule.get_step_max_work(best_move.to_step);
                 const auto current_to_second_max = active_schedule.get_step_second_max_work(best_move.to_step);
                 const auto current_to_count = active_schedule.get_step_max_work_processor_count()[best_move.to_step];
-                if (current_to_max != prev_work_data.to_step_max_work ||
-                    current_to_second_max != prev_work_data.to_step_second_max_work ||
-                    current_to_count != prev_work_data.to_step_max_work_processor_count) {
+                if (current_to_max != prev_work_data.to_step_max_work
+                    || current_to_second_max != prev_work_data.to_step_second_max_work
+                    || current_to_count != prev_work_data.to_step_max_work_processor_count) {
                     changed_steps.insert(best_move.to_step);
                 }
             }
@@ -1107,8 +1146,8 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 const auto current_second_max = comm_cost_f.comm_ds.step_second_max_comm(step);
                 const auto current_count = comm_cost_f.comm_ds.step_max_comm_count(step);
 
-                if (current_max != step_info.max_comm || current_second_max != step_info.second_max_comm ||
-                    current_count != step_info.max_comm_count) {
+                if (current_max != step_info.max_comm || current_second_max != step_info.second_max_comm
+                    || current_count != step_info.max_comm_count) {
                     changed_steps.insert(step);
                 }
             }
@@ -1152,14 +1191,16 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
             }
         } else {
             update_node_work_affinity(thread_data.affinity_table, best_move, prev_work_data, recompute_max_gain);
-            comm_cost_f.update_node_comm_affinity(best_move, thread_data, thread_data.reward_penalty_strat.penalty,
-                                                  thread_data.reward_penalty_strat.reward, recompute_max_gain,
+            comm_cost_f.update_node_comm_affinity(best_move,
+                                                  thread_data,
+                                                  thread_data.reward_penalty_strat.penalty,
+                                                  thread_data.reward_penalty_strat.reward,
+                                                  recompute_max_gain,
                                                   new_nodes);
         }
     }
 
-    inline bool blocked_edge_strategy(VertexType node, std::vector<VertexType> &unlock_nodes,
-                                      ThreadSearchContext &thread_data) {
+    inline bool blocked_edge_strategy(VertexType node, std::vector<VertexType> &unlock_nodes, ThreadSearchContext &thread_data) {
         if (thread_data.unlock_edge_backtrack_counter > 1) {
             for (const auto vertex_edge_pair : thread_data.active_schedule_data.new_violations) {
                 const auto &e = vertex_edge_pair.second;
@@ -1173,8 +1214,8 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 }
             }
 #ifdef KL_DEBUG
-            std::cout << "Nodes of violated edge locked, backtrack counter: "
-                      << thread_data.unlock_edge_backtrack_counter << std::endl;
+            std::cout << "Nodes of violated edge locked, backtrack counter: " << thread_data.unlock_edge_backtrack_counter
+                      << std::endl;
 #endif
             thread_data.unlock_edge_backtrack_counter--;
             return true;
@@ -1182,28 +1223,26 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 #ifdef KL_DEBUG
             std::cout << "Nodes of violated edge locked, end local search" << std::endl;
 #endif
-            return false; // or reset local search and initalize with violating nodes
+            return false;    // or reset local search and initalize with violating nodes
         }
     }
 
-    inline void adjust_local_search_parameters(unsigned outer_iter, unsigned no_imp_counter,
-                                               ThreadSearchContext &thread_data) {
-        if (no_imp_counter >= thread_data.no_improvement_iterations_reduce_penalty &&
-            thread_data.reward_penalty_strat.initial_penalty > 1.0) {
-            thread_data.reward_penalty_strat.initial_penalty =
-                static_cast<cost_t>(std::floor(std::sqrt(thread_data.reward_penalty_strat.initial_penalty)));
+    inline void adjust_local_search_parameters(unsigned outer_iter, unsigned no_imp_counter, ThreadSearchContext &thread_data) {
+        if (no_imp_counter >= thread_data.no_improvement_iterations_reduce_penalty
+            && thread_data.reward_penalty_strat.initial_penalty > 1.0) {
+            thread_data.reward_penalty_strat.initial_penalty
+                = static_cast<cost_t>(std::floor(std::sqrt(thread_data.reward_penalty_strat.initial_penalty)));
             thread_data.unlock_edge_backtrack_counter_reset += 1;
             thread_data.no_improvement_iterations_reduce_penalty += 15;
 #ifdef KL_DEBUG_1
             std::cout << "thread " << thread_data.thread_id << ", no improvement for "
-                      << thread_data.no_improvement_iterations_reduce_penalty
-                      << " iterations, reducing initial penalty to " << thread_data.reward_penalty_strat.initial_penalty
-                      << std::endl;
+                      << thread_data.no_improvement_iterations_reduce_penalty << " iterations, reducing initial penalty to "
+                      << thread_data.reward_penalty_strat.initial_penalty << std::endl;
 #endif
         }
 
-        if (parameters.try_remove_step_after_num_outer_iterations > 0 &&
-            ((outer_iter + 1) % parameters.try_remove_step_after_num_outer_iterations) == 0) {
+        if (parameters.try_remove_step_after_num_outer_iterations > 0
+            && ((outer_iter + 1) % parameters.try_remove_step_after_num_outer_iterations) == 0) {
             thread_data.step_selection_epoch_counter = 0;
             ;
 #ifdef KL_DEBUG
@@ -1216,8 +1255,8 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
             thread_data.no_improvement_iterations_increase_inner_iter += 20;
 #ifdef KL_DEBUG_1
             std::cout << "thread " << thread_data.thread_id << ", no improvement for "
-                      << thread_data.no_improvement_iterations_increase_inner_iter
-                      << " iterations, increasing min inner iter to " << thread_data.min_inner_iter << std::endl;
+                      << thread_data.no_improvement_iterations_increase_inner_iter << " iterations, increasing min inner iter to "
+                      << thread_data.min_inner_iter << std::endl;
 #endif
         }
     }
@@ -1230,14 +1269,19 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     void cleanup_datastructures();
     void update_avg_gain(const cost_t gain, const unsigned num_iter, double &average_gain);
     void insert_gain_heap(ThreadSearchContext &thread_data);
-    void insert_new_nodes_gain_heap(std::vector<VertexType> &new_nodes, node_selection_container_t &nodes,
+    void insert_new_nodes_gain_heap(std::vector<VertexType> &new_nodes,
+                                    node_selection_container_t &nodes,
                                     ThreadSearchContext &thread_data);
 
-    inline void compute_node_affinities(VertexType node, std::vector<std::vector<cost_t>> &affinity_table_node,
+    inline void compute_node_affinities(VertexType node,
+                                        std::vector<std::vector<cost_t>> &affinity_table_node,
                                         ThreadSearchContext &thread_data) {
         compute_work_affinity(node, affinity_table_node, thread_data);
-        comm_cost_f.compute_comm_affinity(node, affinity_table_node, thread_data.reward_penalty_strat.penalty,
-                                          thread_data.reward_penalty_strat.reward, thread_data.start_step,
+        comm_cost_f.compute_comm_affinity(node,
+                                          affinity_table_node,
+                                          thread_data.reward_penalty_strat.penalty,
+                                          thread_data.reward_penalty_strat.reward,
+                                          thread_data.start_step,
                                           thread_data.end_step);
     }
 
@@ -1245,8 +1289,7 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         if (select_nodes_check_remove_superstep(thread_data.step_to_remove, thread_data)) {
             active_schedule.swap_empty_step_fwd(thread_data.step_to_remove, thread_data.end_step);
             thread_data.end_step--;
-            thread_data.local_search_start_step =
-                static_cast<unsigned>(thread_data.active_schedule_data.applied_moves.size());
+            thread_data.local_search_start_step = static_cast<unsigned>(thread_data.active_schedule_data.applied_moves.size());
             thread_data.active_schedule_data.update_cost(static_cast<cost_t>(-1.0 * instance->synchronisationCosts()));
 
             if constexpr (enable_preresolving_violations) {
@@ -1254,27 +1297,30 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
             }
 
             if (thread_data.active_schedule_data.current_violations.size() > parameters.initial_violation_threshold) {
-                active_schedule.revert_to_best_schedule(thread_data.local_search_start_step, thread_data.step_to_remove,
-                                                        comm_cost_f, thread_data.active_schedule_data,
-                                                        thread_data.start_step, thread_data.end_step);
+                active_schedule.revert_to_best_schedule(thread_data.local_search_start_step,
+                                                        thread_data.step_to_remove,
+                                                        comm_cost_f,
+                                                        thread_data.active_schedule_data,
+                                                        thread_data.start_step,
+                                                        thread_data.end_step);
             } else {
-                thread_data.unlock_edge_backtrack_counter =
-                    static_cast<unsigned>(thread_data.active_schedule_data.current_violations.size());
-                thread_data.max_inner_iterations =
-                    std::max(thread_data.unlock_edge_backtrack_counter * 5u, parameters.max_inner_iterations_reset);
-                thread_data.max_no_vioaltions_removed_backtrack =
-                    parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset;
+                thread_data.unlock_edge_backtrack_counter
+                    = static_cast<unsigned>(thread_data.active_schedule_data.current_violations.size());
+                thread_data.max_inner_iterations
+                    = std::max(thread_data.unlock_edge_backtrack_counter * 5u, parameters.max_inner_iterations_reset);
+                thread_data.max_no_vioaltions_removed_backtrack
+                    = parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset;
 #ifdef KL_DEBUG_1
-                std::cout << "thread " << thread_data.thread_id << ", Trying to remove step "
-                          << thread_data.step_to_remove << std::endl;
+                std::cout << "thread " << thread_data.thread_id << ", Trying to remove step " << thread_data.step_to_remove
+                          << std::endl;
 #endif
                 return;
             }
         }
         // thread_data.step_to_remove = thread_data.start_step;
         thread_data.local_search_start_step = 0;
-        thread_data.selection_strategy.select_active_nodes(thread_data.affinity_table, thread_data.start_step,
-                                                           thread_data.end_step);
+        thread_data.selection_strategy.select_active_nodes(
+            thread_data.affinity_table, thread_data.start_step, thread_data.end_step);
     }
 
     bool check_remove_superstep(unsigned step);
@@ -1289,7 +1335,6 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 active_schedule.getSetSchedule().step_processor_vertices[step][proc].begin(),
                 active_schedule.getSetSchedule().step_processor_vertices[step][proc].end());
             for (const auto &node : step_proc_node_vec) {
-
                 thread_data.reward_penalty_strat.init_reward_penalty(
                     static_cast<double>(thread_data.active_schedule_data.current_violations.size()) + 1.0);
                 compute_node_affinities(node, thread_data.local_affinity_table, thread_data);
@@ -1301,8 +1346,8 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 }
 
                 apply_move(best_move, thread_data);
-                if (thread_data.active_schedule_data.current_violations.size() >
-                    parameters.abort_scatter_nodes_violation_threshold) {
+                if (thread_data.active_schedule_data.current_violations.size()
+                    > parameters.abort_scatter_nodes_violation_threshold) {
                     abort = true;
                     break;
                 }
@@ -1311,7 +1356,6 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
                 // thread_data.selection_strategy.add_neighbours_to_selection(node, thread_data.affinity_table,
                 // thread_data.start_step, thread_data.end_step);
                 if (thread_data.active_schedule_data.new_violations.size() > 0) {
-
                     for (const auto &vertex_edge_pair : thread_data.active_schedule_data.new_violations) {
                         const auto &vertex = vertex_edge_pair.first;
                         thread_data.affinity_table.insert(vertex);
@@ -1326,16 +1370,15 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
 
 #ifdef KL_DEBUG_COST_CHECK
                 active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps();
-                if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) >
-                    0.00001) {
+                if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) {
                     std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test()
                               << ", current cost: " << thread_data.active_schedule_data.cost << std::endl;
-                    std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<"
-                              << std::endl;
+                    std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl;
                 }
                 if constexpr (active_schedule_t::use_memory_constraint) {
-                    if (not active_schedule.memory_constraint.satisfied_memory_constraint())
+                    if (not active_schedule.memory_constraint.satisfied_memory_constraint()) {
                         std::cout << "memory constraint not satisfied" << std::endl;
+                    }
                 }
 #endif
             }
@@ -1346,8 +1389,8 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         }
 
         if (abort) {
-            active_schedule.revert_to_best_schedule(0, 0, comm_cost_f, thread_data.active_schedule_data,
-                                                    thread_data.start_step, thread_data.end_step);
+            active_schedule.revert_to_best_schedule(
+                0, 0, comm_cost_f, thread_data.active_schedule_data, thread_data.start_step, thread_data.end_step);
             thread_data.affinity_table.reset_node_selection();
             return false;
         }
@@ -1355,7 +1398,7 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     }
 
     void synchronize_active_schedule(const unsigned num_threads) {
-        if (num_threads == 1) { // single thread case
+        if (num_threads == 1) {    // single thread case
             active_schedule.set_cost(thread_data_vec[0].active_schedule_data.cost);
             active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps();
             return;
@@ -1389,8 +1432,9 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     virtual ~kl_improver() = default;
 
     virtual RETURN_STATUS improveSchedule(BspSchedule<Graph_t> &schedule) override {
-        if (schedule.getInstance().numberOfProcessors() < 2)
+        if (schedule.getInstance().numberOfProcessors() < 2) {
             return RETURN_STATUS::BEST_FOUND;
+        }
 
         const unsigned num_threads = 1;
 
@@ -1428,6 +1472,7 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     }
 
     virtual void setTimeQualityParameter(const double time_quality) { this->parameters.time_quality = time_quality; }
+
     virtual void setSuperstepRemoveStrengthParameter(const double superstep_remove_strength) {
         this->parameters.superstep_remove_strength = superstep_remove_strength;
     }
@@ -1435,15 +1480,14 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     virtual std::string getScheduleName() const { return "kl_improver_" + comm_cost_f.name(); }
 };
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::set_parameters(
     vertex_idx_t<Graph_t> num_nodes) {
     const unsigned log_num_nodes = (num_nodes > 1) ? static_cast<unsigned>(std::log(num_nodes)) : 1;
 
     // Total number of outer iterations. Proportional to sqrt N.
-    parameters.max_outer_iterations =
-        static_cast<unsigned>(std::sqrt(num_nodes) * (parameters.time_quality * 10.0) / parameters.num_parallel_loops);
+    parameters.max_outer_iterations
+        = static_cast<unsigned>(std::sqrt(num_nodes) * (parameters.time_quality * 10.0) / parameters.num_parallel_loops);
 
     // Number of times to reset the search for violations before giving up.
     parameters.max_no_vioaltions_removed_backtrack_reset = parameters.time_quality < 0.75  ? 1
@@ -1451,19 +1495,17 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
                                                                                            : 3;
 
     // Parameters for the superstep removal heuristic.
-    parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset =
-        3 + static_cast<unsigned>(parameters.superstep_remove_strength * 7);
+    parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset
+        = 3 + static_cast<unsigned>(parameters.superstep_remove_strength * 7);
     parameters.node_max_step_selection_epochs = parameters.superstep_remove_strength < 0.75  ? 1
                                                 : parameters.superstep_remove_strength < 1.0 ? 2
                                                                                              : 3;
     parameters.remove_step_epocs = static_cast<unsigned>(parameters.superstep_remove_strength * 4.0);
 
-    parameters.min_inner_iter_reset =
-        static_cast<unsigned>(log_num_nodes + log_num_nodes * (1.0 + parameters.time_quality));
+    parameters.min_inner_iter_reset = static_cast<unsigned>(log_num_nodes + log_num_nodes * (1.0 + parameters.time_quality));
 
     if (parameters.remove_step_epocs > 0) {
-        parameters.try_remove_step_after_num_outer_iterations =
-            parameters.max_outer_iterations / parameters.remove_step_epocs;
+        parameters.try_remove_step_after_num_outer_iterations = parameters.max_outer_iterations / parameters.remove_step_epocs;
     } else {
         // Effectively disable superstep removal if remove_step_epocs is 0.
         parameters.try_remove_step_after_num_outer_iterations = parameters.max_outer_iterations + 1;
@@ -1473,16 +1515,16 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     for (auto &thread : thread_data_vec) {
         thread.thread_id = i++;
         // The number of nodes to consider in each inner iteration. Proportional to log(N).
-        thread.selection_strategy.selection_threshold =
-            static_cast<std::size_t>(std::ceil(parameters.time_quality * 10 * log_num_nodes + log_num_nodes));
+        thread.selection_strategy.selection_threshold
+            = static_cast<std::size_t>(std::ceil(parameters.time_quality * 10 * log_num_nodes + log_num_nodes));
     }
 
 #ifdef KL_DEBUG_1
     std::cout << "kl set parameter, number of nodes: " << num_nodes << std::endl;
     std::cout << "max outer iterations: " << parameters.max_outer_iterations << std::endl;
     std::cout << "max inner iterations: " << parameters.max_inner_iterations_reset << std::endl;
-    std::cout << "no improvement iterations reduce penalty: "
-              << thread_data_vec[0].no_improvement_iterations_reduce_penalty << std::endl;
+    std::cout << "no improvement iterations reduce penalty: " << thread_data_vec[0].no_improvement_iterations_reduce_penalty
+              << std::endl;
     std::cout << "selction threshold: " << thread_data_vec[0].selection_strategy.selection_threshold << std::endl;
     std::cout << "remove step epocs: " << parameters.remove_step_epocs << std::endl;
     std::cout << "try remove step after num outer iterations: " << parameters.try_remove_step_after_num_outer_iterations
@@ -1491,26 +1533,25 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
 #endif
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::update_node_work_affinity(
-    node_selection_container_t &nodes, kl_move move, const pre_move_work_data<work_weight_t> &prev_work_data,
+    node_selection_container_t &nodes,
+    kl_move move,
+    const pre_move_work_data<work_weight_t> &prev_work_data,
     std::map<VertexType, kl_gain_update_info> &recompute_max_gain) {
     const size_t active_count = nodes.size();
 
     for (size_t i = 0; i < active_count; ++i) {
         const VertexType node = nodes.get_selected_nodes()[i];
 
-        kl_gain_update_info update_info =
-            update_node_work_affinity_after_move(node, move, prev_work_data, nodes.at(node));
+        kl_gain_update_info update_info = update_node_work_affinity_after_move(node, move, prev_work_data, nodes.at(node));
         if (update_info.update_from_step || update_info.update_to_step) {
             recompute_max_gain[node] = update_info;
         }
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::update_max_gain(
     kl_move move, std::map<VertexType, kl_gain_update_info> &recompute_max_gain, ThreadSearchContext &thread_data) {
     for (auto &pair : recompute_max_gain) {
@@ -1534,8 +1575,7 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::compute_work_affinity(
     VertexType node, std::vector<std::vector<cost_t>> &affinity_table_node, ThreadSearchContext &thread_data) {
     const unsigned node_step = active_schedule.assigned_superstep(node);
@@ -1558,33 +1598,39 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
 
     const unsigned node_proc = active_schedule.assigned_processor(node);
     const work_weight_t max_work_for_step = active_schedule.get_step_max_work(node_step);
-    const bool is_sole_max_processor =
-        (active_schedule.get_step_max_work_processor_count()[node_step] == 1) &&
-        (max_work_for_step == active_schedule.get_step_processor_work(node_step, node_proc));
-
-    const cost_t node_proc_affinity =
-        is_sole_max_processor
-            ? std::min(vertex_weight, max_work_for_step - active_schedule.get_step_second_max_work(node_step))
-            : 0.0;
+    const bool is_sole_max_processor = (active_schedule.get_step_max_work_processor_count()[node_step] == 1)
+                                       && (max_work_for_step == active_schedule.get_step_processor_work(node_step, node_proc));
+
+    const cost_t node_proc_affinity
+        = is_sole_max_processor ? std::min(vertex_weight, max_work_for_step - active_schedule.get_step_second_max_work(node_step))
+                                : 0.0;
     affinity_table_node[node_proc][window_size] = node_proc_affinity;
 
     for (const unsigned proc : proc_range.compatible_processors_vertex(node)) {
-        if (proc == node_proc)
+        if (proc == node_proc) {
             continue;
+        }
 
         const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(node_step, proc);
-        affinity_table_node[proc][window_size] =
-            compute_same_step_affinity(max_work_for_step, new_weight, node_proc_affinity);
+        affinity_table_node[proc][window_size] = compute_same_step_affinity(max_work_for_step, new_weight, node_proc_affinity);
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::process_work_update_step(
-    VertexType node, unsigned node_step, unsigned node_proc, work_weight_t vertex_weight, unsigned move_step,
-    unsigned move_proc, work_weight_t move_correction_node_weight, const work_weight_t prev_move_step_max_work,
-    const work_weight_t prev_move_step_second_max_work, unsigned prev_move_step_max_work_processor_count,
-    bool &update_step, bool &update_entire_step, bool &full_update,
+    VertexType node,
+    unsigned node_step,
+    unsigned node_proc,
+    work_weight_t vertex_weight,
+    unsigned move_step,
+    unsigned move_proc,
+    work_weight_t move_correction_node_weight,
+    const work_weight_t prev_move_step_max_work,
+    const work_weight_t prev_move_step_second_max_work,
+    unsigned prev_move_step_max_work_processor_count,
+    bool &update_step,
+    bool &update_entire_step,
+    bool &full_update,
     std::vector<std::vector<cost_t>> &affinity_table_node) {
     const unsigned lower_bound = move_step > window_size ? move_step - window_size : 0;
     if (lower_bound <= node_step && node_step <= move_step + window_size) {
@@ -1594,20 +1640,18 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
             const work_weight_t new_second_max_weight = active_schedule.get_step_second_max_work(move_step);
             const work_weight_t new_step_proc_work = active_schedule.get_step_processor_work(node_step, node_proc);
 
-            const work_weight_t prev_step_proc_work =
-                (node_proc == move_proc) ? new_step_proc_work + move_correction_node_weight : new_step_proc_work;
-            const bool prev_is_sole_max_processor =
-                (prev_move_step_max_work_processor_count == 1) && (prev_move_step_max_work == prev_step_proc_work);
-            const cost_t prev_node_proc_affinity =
-                prev_is_sole_max_processor
-                    ? std::min(vertex_weight, prev_move_step_max_work - prev_move_step_second_max_work)
-                    : 0.0;
-
-            const bool new_is_sole_max_processor =
-                (active_schedule.get_step_max_work_processor_count()[node_step] == 1) &&
-                (new_max_weight == new_step_proc_work);
-            const cost_t new_node_proc_affinity =
-                new_is_sole_max_processor ? std::min(vertex_weight, new_max_weight - new_second_max_weight) : 0.0;
+            const work_weight_t prev_step_proc_work = (node_proc == move_proc) ? new_step_proc_work + move_correction_node_weight
+                                                                               : new_step_proc_work;
+            const bool prev_is_sole_max_processor = (prev_move_step_max_work_processor_count == 1)
+                                                    && (prev_move_step_max_work == prev_step_proc_work);
+            const cost_t prev_node_proc_affinity
+                = prev_is_sole_max_processor ? std::min(vertex_weight, prev_move_step_max_work - prev_move_step_second_max_work)
+                                             : 0.0;
+
+            const bool new_is_sole_max_processor = (active_schedule.get_step_max_work_processor_count()[node_step] == 1)
+                                                   && (new_max_weight == new_step_proc_work);
+            const cost_t new_node_proc_affinity
+                = new_is_sole_max_processor ? std::min(vertex_weight, new_max_weight - new_second_max_weight) : 0.0;
 
             const cost_t diff = new_node_proc_affinity - prev_node_proc_affinity;
             const bool update_node_proc_affinity = std::abs(diff) > EPSILON;
@@ -1620,30 +1664,26 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
                 update_entire_step = true;
 
                 for (const unsigned proc : proc_range.compatible_processors_vertex(node)) {
-                    if ((proc == node_proc) || (proc == move_proc))
+                    if ((proc == node_proc) || (proc == move_proc)) {
                         continue;
+                    }
 
-                    const work_weight_t new_weight =
-                        vertex_weight + active_schedule.get_step_processor_work(node_step, proc);
-                    const cost_t prev_other_affinity =
-                        compute_same_step_affinity(prev_move_step_max_work, new_weight, prev_node_proc_affinity);
-                    const cost_t other_affinity =
-                        compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
+                    const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(node_step, proc);
+                    const cost_t prev_other_affinity
+                        = compute_same_step_affinity(prev_move_step_max_work, new_weight, prev_node_proc_affinity);
+                    const cost_t other_affinity = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
 
                     affinity_table_node[proc][window_size] += (other_affinity - prev_other_affinity);
                 }
             }
 
             if (node_proc != move_proc && is_compatible(node, move_proc)) {
-                const work_weight_t prev_new_weight = vertex_weight +
-                                                      active_schedule.get_step_processor_work(node_step, move_proc) +
-                                                      move_correction_node_weight;
-                const cost_t prev_other_affinity =
-                    compute_same_step_affinity(prev_move_step_max_work, prev_new_weight, prev_node_proc_affinity);
-                const work_weight_t new_weight =
-                    vertex_weight + active_schedule.get_step_processor_work(node_step, move_proc);
-                const cost_t other_affinity =
-                    compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
+                const work_weight_t prev_new_weight
+                    = vertex_weight + active_schedule.get_step_processor_work(node_step, move_proc) + move_correction_node_weight;
+                const cost_t prev_other_affinity
+                    = compute_same_step_affinity(prev_move_step_max_work, prev_new_weight, prev_node_proc_affinity);
+                const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(node_step, move_proc);
+                const cost_t other_affinity = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity);
 
                 affinity_table_node[move_proc][window_size] += (other_affinity - prev_other_affinity);
             }
@@ -1656,50 +1696,45 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
 
                 // update moving to all procs with special for move_proc
                 for (const unsigned proc : proc_range.compatible_processors_vertex(node)) {
-                    const work_weight_t new_weight =
-                        vertex_weight + active_schedule.get_step_processor_work(move_step, proc);
+                    const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(move_step, proc);
                     if (proc != move_proc) {
-
-                        const cost_t prev_affinity =
-                            prev_move_step_max_work < new_weight
-                                ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(prev_move_step_max_work)
-                                : 0.0;
-                        const cost_t new_affinity =
-                            new_max_weight < new_weight
-                                ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(new_max_weight)
-                                : 0.0;
+                        const cost_t prev_affinity
+                            = prev_move_step_max_work < new_weight
+                                  ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(prev_move_step_max_work)
+                                  : 0.0;
+                        const cost_t new_affinity = new_max_weight < new_weight
+                                                        ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(new_max_weight)
+                                                        : 0.0;
                         affinity_table_node[proc][idx] += new_affinity - prev_affinity;
 
                     } else {
-                        const work_weight_t prev_new_weight = vertex_weight +
-                                                              active_schedule.get_step_processor_work(move_step, proc) +
-                                                              move_correction_node_weight;
-                        const cost_t prev_affinity =
-                            prev_move_step_max_work < prev_new_weight
-                                ? static_cast<cost_t>(prev_new_weight) - static_cast<cost_t>(prev_move_step_max_work)
-                                : 0.0;
-
-                        const cost_t new_affinity =
-                            new_max_weight < new_weight
-                                ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(new_max_weight)
-                                : 0.0;
+                        const work_weight_t prev_new_weight = vertex_weight
+                                                              + active_schedule.get_step_processor_work(move_step, proc)
+                                                              + move_correction_node_weight;
+                        const cost_t prev_affinity
+                            = prev_move_step_max_work < prev_new_weight
+                                  ? static_cast<cost_t>(prev_new_weight) - static_cast<cost_t>(prev_move_step_max_work)
+                                  : 0.0;
+
+                        const cost_t new_affinity = new_max_weight < new_weight
+                                                        ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(new_max_weight)
+                                                        : 0.0;
                         affinity_table_node[proc][idx] += new_affinity - prev_affinity;
                     }
                 }
             } else {
                 // update only move_proc
                 if (is_compatible(node, move_proc)) {
-                    const work_weight_t new_weight =
-                        vertex_weight + active_schedule.get_step_processor_work(move_step, move_proc);
+                    const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(move_step, move_proc);
                     const work_weight_t prev_new_weight = new_weight + move_correction_node_weight;
-                    const cost_t prev_affinity =
-                        prev_move_step_max_work < prev_new_weight
-                            ? static_cast<cost_t>(prev_new_weight) - static_cast<cost_t>(prev_move_step_max_work)
-                            : 0.0;
-
-                    const cost_t new_affinity = new_max_weight < new_weight ? static_cast<cost_t>(new_weight) -
-                                                                                  static_cast<cost_t>(new_max_weight)
-                                                                            : 0.0;
+                    const cost_t prev_affinity
+                        = prev_move_step_max_work < prev_new_weight
+                              ? static_cast<cost_t>(prev_new_weight) - static_cast<cost_t>(prev_move_step_max_work)
+                              : 0.0;
+
+                    const cost_t new_affinity = new_max_weight < new_weight
+                                                    ? static_cast<cost_t>(new_weight) - static_cast<cost_t>(new_max_weight)
+                                                    : 0.0;
                     affinity_table_node[move_proc][idx] += new_affinity - prev_affinity;
                 }
             }
@@ -1707,18 +1742,14 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
-bool kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
-                 cost_t>::select_nodes_check_remove_superstep(unsigned &step_to_remove,
-                                                              ThreadSearchContext &thread_data) {
-    if (thread_data.step_selection_epoch_counter >= parameters.node_max_step_selection_epochs ||
-        thread_data.num_steps() < 3) {
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
+bool kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::select_nodes_check_remove_superstep(
+    unsigned &step_to_remove, ThreadSearchContext &thread_data) {
+    if (thread_data.step_selection_epoch_counter >= parameters.node_max_step_selection_epochs || thread_data.num_steps() < 3) {
         return false;
     }
 
-    for (step_to_remove = thread_data.step_selection_counter; step_to_remove <= thread_data.end_step;
-         step_to_remove++) {
+    for (step_to_remove = thread_data.step_selection_counter; step_to_remove <= thread_data.end_step; step_to_remove++) {
         assert(step_to_remove >= thread_data.start_step && step_to_remove <= thread_data.end_step);
 #ifdef KL_DEBUG
         std::cout << "Checking to remove step " << step_to_remove << "/" << thread_data.end_step << std::endl;
@@ -1745,21 +1776,20 @@ bool kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     return false;
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
-bool kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::check_remove_superstep(
-    unsigned step) {
-    if (active_schedule.num_steps() < 2)
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
+bool kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::check_remove_superstep(unsigned step) {
+    if (active_schedule.num_steps() < 2) {
         return false;
+    }
 
-    if (active_schedule.get_step_max_work(step) < instance->synchronisationCosts())
+    if (active_schedule.get_step_max_work(step) < instance->synchronisationCosts()) {
         return true;
+    }
 
     return false;
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::reset_inner_search_structures(
     ThreadSearchContext &thread_data) const {
     thread_data.unlock_edge_backtrack_counter = thread_data.unlock_edge_backtrack_counter_reset;
@@ -1771,8 +1801,7 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     thread_data.lock_manager.clear();
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 bool kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::is_local_search_blocked(
     ThreadSearchContext &thread_data) {
     for (const auto &pair : thread_data.active_schedule_data.new_violations) {
@@ -1783,8 +1812,7 @@ bool kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     return false;
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::initialize_datastructures(
     BspSchedule<Graph_t> &schedule) {
     input_schedule = &schedule;
@@ -1801,8 +1829,8 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     for (auto &t_data : thread_data_vec) {
         t_data.affinity_table.initialize(active_schedule, t_data.selection_strategy.selection_threshold);
         t_data.lock_manager.initialize(graph->num_vertices());
-        t_data.reward_penalty_strat.initialize(active_schedule, comm_cost_f.get_max_comm_weight_multiplied(),
-                                               active_schedule.get_max_work_weight());
+        t_data.reward_penalty_strat.initialize(
+            active_schedule, comm_cost_f.get_max_comm_weight_multiplied(), active_schedule.get_max_work_weight());
         t_data.selection_strategy.initialize(active_schedule, gen, t_data.start_step, t_data.end_step);
 
         t_data.local_affinity_table.resize(instance->numberOfProcessors());
@@ -1812,15 +1840,14 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
-void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::update_avg_gain(
-    const cost_t gain, const unsigned num_iter, double &average_gain) {
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
+void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::update_avg_gain(const cost_t gain,
+                                                                                                          const unsigned num_iter,
+                                                                                                          double &average_gain) {
     average_gain = static_cast<double>((average_gain * num_iter + gain)) / (num_iter + 1.0);
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::insert_gain_heap(
     ThreadSearchContext &thread_data) {
     const size_t active_count = thread_data.affinity_table.size();
@@ -1833,8 +1860,7 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::insert_new_nodes_gain_heap(
     std::vector<VertexType> &new_nodes, node_selection_container_t &nodes, ThreadSearchContext &thread_data) {
     for (const auto &node : new_nodes) {
@@ -1845,23 +1871,20 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::cleanup_datastructures() {
     thread_data_vec.clear();
     active_schedule.clear();
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::print_heap(
     heap_datastructure &max_gain_heap) const {
-
     if (max_gain_heap.is_empty()) {
         std::cout << "heap is empty" << std::endl;
         return;
     }
-    heap_datastructure temp_heap = max_gain_heap; // requires copy constructor
+    heap_datastructure temp_heap = max_gain_heap;    // requires copy constructor
 
     std::cout << "heap current size: " << temp_heap.size() << std::endl;
     const auto &top_val = temp_heap.get_value(temp_heap.top());
@@ -1870,22 +1893,21 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     unsigned count = 0;
     while (!temp_heap.is_empty() && count++ < 15) {
         const auto &val = temp_heap.get_value(temp_heap.top());
-        std::cout << "node " << val.node << " gain " << val.gain << " to proc " << val.to_proc << " to step "
-                  << val.to_step << std::endl;
+        std::cout << "node " << val.node << " gain " << val.gain << " to proc " << val.to_proc << " to step " << val.to_step
+                  << std::endl;
         temp_heap.pop();
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::update_best_move(
-    VertexType node, unsigned step, unsigned proc, node_selection_container_t &affinity_table,
-    ThreadSearchContext &thread_data) {
+    VertexType node, unsigned step, unsigned proc, node_selection_container_t &affinity_table, ThreadSearchContext &thread_data) {
     const unsigned node_proc = active_schedule.assigned_processor(node);
     const unsigned node_step = active_schedule.assigned_superstep(node);
 
-    if ((node_proc == proc) && (node_step == step))
+    if ((node_proc == proc) && (node_step == step)) {
         return;
+    }
 
     kl_move node_move = thread_data.max_gain_heap.get_value(node);
     cost_t max_gain = node_move.gain;
@@ -1897,8 +1919,9 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
         recompute_node_max_gain(node, affinity_table, thread_data);
     } else {
         if constexpr (active_schedule_t::use_memory_constraint) {
-            if (not active_schedule.memory_constraint.can_move(node, proc, step))
+            if (not active_schedule.memory_constraint.can_move(node, proc, step)) {
                 return;
+            }
         }
         const unsigned idx = rel_step_idx(node_step, step);
         const cost_t gain = affinity_table[node][node_proc][window_size] - affinity_table[node][proc][idx];
@@ -1918,11 +1941,9 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     }
 }
 
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
 void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>::update_best_move(
     VertexType node, unsigned step, node_selection_container_t &affinity_table, ThreadSearchContext &thread_data) {
-
     const unsigned node_proc = active_schedule.assigned_processor(node);
     const unsigned node_step = active_schedule.assigned_superstep(node);
 
@@ -1939,8 +1960,9 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
             const unsigned idx = rel_step_idx(node_step, step);
             for (const unsigned p : proc_range.compatible_processors_vertex(node)) {
                 if constexpr (active_schedule_t::use_memory_constraint) {
-                    if (not active_schedule.memory_constraint.can_move(node, p, step))
+                    if (not active_schedule.memory_constraint.can_move(node, p, step)) {
                         continue;
+                    }
                 }
                 const cost_t gain = affinity_table[node][node_proc][window_size] - affinity_table[node][p][idx];
                 if (gain > max_gain) {
@@ -1951,14 +1973,15 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
             }
         } else {
             for (const unsigned proc : proc_range.compatible_processors_vertex(node)) {
-                if (proc == node_proc)
+                if (proc == node_proc) {
                     continue;
+                }
                 if constexpr (active_schedule_t::use_memory_constraint) {
-                    if (not active_schedule.memory_constraint.can_move(node, proc, step))
+                    if (not active_schedule.memory_constraint.can_move(node, proc, step)) {
                         continue;
+                    }
                 }
-                const cost_t gain =
-                    affinity_table[node][node_proc][window_size] - affinity_table[node][proc][window_size];
+                const cost_t gain = affinity_table[node][node_proc][window_size] - affinity_table[node][proc][window_size];
                 if (gain > max_gain) {
                     max_gain = gain;
                     max_proc = proc;
@@ -1977,4 +2000,4 @@ void kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size,
     }
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp
index 56cb97ad..1a825331 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp
@@ -19,22 +19,21 @@ limitations under the License.
 #pragma once
 
 #include <omp.h>
+
 #include "kl_improver.hpp"
 
 namespace osp {
 
-
-
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t = no_local_search_memory_constraint,
-         unsigned window_size = 1, typename cost_t = double>
+template <typename Graph_t,
+          typename comm_cost_function_t,
+          typename MemoryConstraint_t = no_local_search_memory_constraint,
+          unsigned window_size = 1,
+          typename cost_t = double>
 class kl_improver_mt : public kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t> {
-
   protected:
-
     unsigned max_num_threads = std::numeric_limits<unsigned>::max();
 
     void set_thread_boundaries(const unsigned num_threads, const unsigned num_steps, bool last_thread_large_range) {
-
         if (num_threads == 1) {
             this->set_start_step(0, this->thread_data_vec[0]);
             this->thread_data_vec[0].end_step = (num_steps > 0) ? num_steps - 1 : 0;
@@ -62,7 +61,8 @@ class kl_improver_mt : public kl_improver<Graph_t, comm_cost_function_t, MemoryC
                 this->thread_data_vec[i].original_end_step = this->thread_data_vec[i].end_step;
                 current_start_step = end_step + 1 + this->parameters.thread_range_gap;
 #ifdef KL_DEBUG_1
-                std::cout << "thread " << i << ": start_step=" << this->thread_data_vec[i].start_step << ", end_step=" << this->thread_data_vec[i].end_step << std::endl;
+                std::cout << "thread " << i << ": start_step=" << this->thread_data_vec[i].start_step
+                          << ", end_step=" << this->thread_data_vec[i].end_step << std::endl;
 #endif
             }
         }
@@ -91,59 +91,60 @@ class kl_improver_mt : public kl_improver<Graph_t, comm_cost_function_t, MemoryC
             num_threads = 1;
         }
 #ifdef KL_DEBUG_1
-        std::cout << "num threads: " << num_threads << " number of supersteps: " << num_steps << ", max allowed threads: " << max_allowed_threads << std::endl;
-#endif       
-    
+        std::cout << "num threads: " << num_threads << " number of supersteps: " << num_steps
+                  << ", max allowed threads: " << max_allowed_threads << std::endl;
+#endif
     }
 
-
   public:
-  
     kl_improver_mt() : kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>() {}
-    explicit kl_improver_mt(unsigned seed) : kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>(seed) {}
+
+    explicit kl_improver_mt(unsigned seed)
+        : kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>(seed) {}
+
     virtual ~kl_improver_mt() = default;
 
-    void set_max_num_threads(const unsigned num_threads) {
-        max_num_threads = num_threads;
-    }
+    void set_max_num_threads(const unsigned num_threads) { max_num_threads = num_threads; }
 
     virtual RETURN_STATUS improveSchedule(BspSchedule<Graph_t> &schedule) override {
-        if (schedule.getInstance().numberOfProcessors() < 2)
+        if (schedule.getInstance().numberOfProcessors() < 2) {
             return RETURN_STATUS::BEST_FOUND;
+        }
 
         unsigned num_threads = std::min(max_num_threads, static_cast<unsigned>(omp_get_max_threads()));
         set_num_threads(num_threads, schedule.numberOfSupersteps());
 
-        this->thread_data_vec.resize(num_threads);      
+        this->thread_data_vec.resize(num_threads);
         this->thread_finished_vec.assign(num_threads, true);
 
         if (num_threads == 1) {
-            this->parameters.num_parallel_loops = 1; // no parallelization with one thread. Affects parameters.max_out_iteration calculation in set_parameters()
+            this->parameters.num_parallel_loops
+                = 1;    // no parallelization with one thread. Affects parameters.max_out_iteration calculation in set_parameters()
         }
 
         this->set_parameters(schedule.getInstance().numberOfVertices());
-        this->initialize_datastructures(schedule); 
+        this->initialize_datastructures(schedule);
         const cost_t initial_cost = this->active_schedule.get_cost();
 
         for (size_t i = 0; i < this->parameters.num_parallel_loops; ++i) {
-            set_thread_boundaries(num_threads, schedule.numberOfSupersteps(), i % 2 == 0);                       
+            set_thread_boundaries(num_threads, schedule.numberOfSupersteps(), i % 2 == 0);
 
-            #pragma omp parallel num_threads(num_threads) 
+#pragma omp parallel num_threads(num_threads)
             {
                 const size_t thread_id = static_cast<size_t>(omp_get_thread_num());
-                auto & thread_data = this->thread_data_vec[thread_id];
+                auto &thread_data = this->thread_data_vec[thread_id];
                 thread_data.active_schedule_data.initialize_cost(this->active_schedule.get_cost());
                 thread_data.selection_strategy.setup(thread_data.start_step, thread_data.end_step);
-                this->run_local_search(thread_data); 
+                this->run_local_search(thread_data);
             }
-        
+
             this->synchronize_active_schedule(num_threads);
             if (num_threads > 1) {
                 this->active_schedule.set_cost(this->comm_cost_f.compute_schedule_cost());
                 set_num_threads(num_threads, schedule.numberOfSupersteps());
                 this->thread_finished_vec.resize(num_threads);
             }
-        }               
+        }
 
         if (initial_cost > this->active_schedule.get_cost()) {
             this->active_schedule.write_schedule(schedule);
@@ -156,4 +157,4 @@ class kl_improver_mt : public kl_improver<Graph_t, comm_cost_function_t, MemoryC
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp
index 654ed111..977e693f 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp
@@ -22,11 +22,12 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename comm_cost_function_t,
-         typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1,
-         typename cost_t = double>
+template <typename Graph_t,
+          typename comm_cost_function_t,
+          typename MemoryConstraint_t = no_local_search_memory_constraint,
+          unsigned window_size = 1,
+          typename cost_t = double>
 class kl_improver_test : public kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t> {
-
     using VertexType = vertex_idx_t<Graph_t>;
     using kl_move = kl_move_struct<cost_t, VertexType>;
     using heap_datastructure = MaxPairingHeap<VertexType, kl_move>;
@@ -64,9 +65,7 @@ class kl_improver_test : public kl_improver<Graph_t, comm_cost_function_t, Memor
 
     bool is_feasible() { return this->thread_data_vec[0].active_schedule_data.feasible; }
 
-    void compute_violations_test() {
-        this->active_schedule.compute_violations(this->thread_data_vec[0].active_schedule_data);
-    }
+    void compute_violations_test() { this->active_schedule.compute_violations(this->thread_data_vec[0].active_schedule_data); }
 
     node_selection_container_t &insert_gain_heap_test(const std::vector<VertexType> &n) {
         this->thread_data_vec[0].reward_penalty_strat.penalty = 0.0;
@@ -118,25 +117,23 @@ class kl_improver_test : public kl_improver<Graph_t, comm_cost_function_t, Memor
         this->apply_move(best_move, this->thread_data_vec[0]);
 
         this->thread_data_vec[0].affinity_table.trim();
-        this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data,
-                                prev_comm_data);
+        this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data, prev_comm_data);
     }
 
     auto run_inner_iteration_test() {
-
         std::map<VertexType, kl_gain_update_info> recompute_max_gain;
         std::vector<VertexType> new_nodes;
 
         this->print_heap(this->thread_data_vec[0].max_gain_heap);
 
         kl_move best_move = this->get_best_move(
-            this->thread_data_vec[0].affinity_table, this->thread_data_vec[0].lock_manager,
-            this->thread_data_vec[0].max_gain_heap); // locks best_move.node and removes it from node_selection
+            this->thread_data_vec[0].affinity_table,
+            this->thread_data_vec[0].lock_manager,
+            this->thread_data_vec[0].max_gain_heap);    // locks best_move.node and removes it from node_selection
 
 #ifdef KL_DEBUG
-        std::cout << "Best move: " << best_move.node << " gain: " << best_move.gain << ", from: " << best_move.from_step
-                  << "|" << best_move.from_proc << " to: " << best_move.to_step << "|" << best_move.to_proc
-                  << std::endl;
+        std::cout << "Best move: " << best_move.node << " gain: " << best_move.gain << ", from: " << best_move.from_step << "|"
+                  << best_move.from_proc << " to: " << best_move.to_step << "|" << best_move.to_proc << std::endl;
 #endif
 
         const auto prev_work_data = this->active_schedule.get_pre_move_work_data(best_move);
@@ -144,8 +141,7 @@ class kl_improver_test : public kl_improver<Graph_t, comm_cost_function_t, Memor
         this->apply_move(best_move, this->thread_data_vec[0]);
 
         this->thread_data_vec[0].affinity_table.trim();
-        this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data,
-                                prev_comm_data);
+        this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data, prev_comm_data);
 
 #ifdef KL_DEBUG
         std::cout << "New nodes: { ";
@@ -166,4 +162,4 @@ class kl_improver_test : public kl_improver<Graph_t, comm_cost_function_t, Memor
     void get_active_schedule_test(BspSchedule<Graph_t> &schedule) { this->active_schedule.write_schedule(schedule); }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp
index 80ed0e48..9727357f 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp
@@ -19,39 +19,71 @@ limitations under the License.
 
 #pragma once
 
-//#define KL_DEBUG
-//#define KL_DEBUG_1
-//#define KL_DEBUG_COST_CHECK
+// #define KL_DEBUG
+// #define KL_DEBUG_1
+// #define KL_DEBUG_COST_CHECK
 
-#include "kl_improver.hpp"
-#include "comm_cost_modules/kl_total_comm_cost.hpp"
-#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp"
 #include "comm_cost_modules/kl_bsp_comm_cost.hpp"
+#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp"
+#include "comm_cost_modules/kl_total_comm_cost.hpp"
+#include "kl_improver.hpp"
 #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp"
 
 namespace osp {
 
 using double_cost_t = double;
 
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1, bool use_node_communication_costs_arg = true> 
-using kl_total_comm_improver = kl_improver<Graph_t, kl_total_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size, use_node_communication_costs_arg>, MemoryConstraint_t, window_size, double_cost_t>; 
-
-template<typename Graph_t, typename MemoryConstraint_t = ls_local_memory_constraint<Graph_t>, unsigned window_size = 1, bool use_node_communication_costs_arg = true> 
-using kl_total_comm_improver_local_mem_constr = kl_improver<Graph_t, kl_total_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size, use_node_communication_costs_arg>, MemoryConstraint_t, window_size, double_cost_t>; 
-
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1> 
-using kl_total_lambda_comm_improver = kl_improver<Graph_t, kl_hyper_total_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size>, MemoryConstraint_t, window_size, double_cost_t>; 
-
-template<typename Graph_t, typename MemoryConstraint_t = ls_local_memory_constraint<Graph_t>, unsigned window_size = 1> 
-using kl_total_lambda_comm_improver_local_mem_constr = kl_improver<Graph_t, kl_hyper_total_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size>, MemoryConstraint_t, window_size, double_cost_t>; 
-
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1> 
-using kl_bsp_comm_improver = kl_improver<Graph_t, kl_bsp_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size>, MemoryConstraint_t, window_size, double_cost_t>; 
-
-template<typename Graph_t, typename MemoryConstraint_t = ls_local_memory_constraint<Graph_t>, unsigned window_size = 1> 
-using kl_bsp_comm_improver_local_mem_constr = kl_improver<Graph_t, kl_bsp_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size>, MemoryConstraint_t, window_size, double_cost_t>; 
-
-
-
-} // namespace osp
-
+template <typename Graph_t,
+          typename MemoryConstraint_t = no_local_search_memory_constraint,
+          unsigned window_size = 1,
+          bool use_node_communication_costs_arg = true>
+using kl_total_comm_improver
+    = kl_improver<Graph_t,
+                  kl_total_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size, use_node_communication_costs_arg>,
+                  MemoryConstraint_t,
+                  window_size,
+                  double_cost_t>;
+
+template <typename Graph_t,
+          typename MemoryConstraint_t = ls_local_memory_constraint<Graph_t>,
+          unsigned window_size = 1,
+          bool use_node_communication_costs_arg = true>
+using kl_total_comm_improver_local_mem_constr
+    = kl_improver<Graph_t,
+                  kl_total_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size, use_node_communication_costs_arg>,
+                  MemoryConstraint_t,
+                  window_size,
+                  double_cost_t>;
+
+template <typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1>
+using kl_total_lambda_comm_improver
+    = kl_improver<Graph_t,
+                  kl_hyper_total_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size>,
+                  MemoryConstraint_t,
+                  window_size,
+                  double_cost_t>;
+
+template <typename Graph_t, typename MemoryConstraint_t = ls_local_memory_constraint<Graph_t>, unsigned window_size = 1>
+using kl_total_lambda_comm_improver_local_mem_constr
+    = kl_improver<Graph_t,
+                  kl_hyper_total_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size>,
+                  MemoryConstraint_t,
+                  window_size,
+                  double_cost_t>;
+
+template <typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1>
+using kl_bsp_comm_improver = kl_improver<Graph_t,
+                                         kl_bsp_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size>,
+                                         MemoryConstraint_t,
+                                         window_size,
+                                         double_cost_t>;
+
+template <typename Graph_t, typename MemoryConstraint_t = ls_local_memory_constraint<Graph_t>, unsigned window_size = 1>
+using kl_bsp_comm_improver_local_mem_constr
+    = kl_improver<Graph_t,
+                  kl_bsp_comm_cost_function<Graph_t, double_cost_t, MemoryConstraint_t, window_size>,
+                  MemoryConstraint_t,
+                  window_size,
+                  double_cost_t>;
+
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp
index 5946c7e5..1d70f3eb 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp
@@ -19,23 +19,35 @@ limitations under the License.
 
 #pragma once
 
-#include "kl_include.hpp"
-#include "kl_improver_mt.hpp"
-#include "comm_cost_modules/kl_total_comm_cost.hpp"
-#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp"
 #include "comm_cost_modules/kl_bsp_comm_cost.hpp"
+#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp"
+#include "comm_cost_modules/kl_total_comm_cost.hpp"
+#include "kl_improver_mt.hpp"
+#include "kl_include.hpp"
 
 namespace osp {
 
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1, bool use_node_communication_costs_arg = true> 
-using kl_total_comm_improver_mt = kl_improver_mt<Graph_t, kl_total_comm_cost_function<Graph_t, double, MemoryConstraint_t, window_size, use_node_communication_costs_arg>, MemoryConstraint_t, window_size, double>; 
-
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1> 
-using kl_total_lambda_comm_improver_mt = kl_improver_mt<Graph_t, kl_hyper_total_comm_cost_function<Graph_t, double, MemoryConstraint_t, window_size>, MemoryConstraint_t, window_size, double>; 
-
-template<typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1> 
-using kl_bsp_comm_improver_mt = kl_improver_mt<Graph_t, kl_bsp_comm_cost_function<Graph_t, double, MemoryConstraint_t, window_size>, MemoryConstraint_t, window_size, double>; 
-
-
-} // namespace osp
-
+template <typename Graph_t,
+          typename MemoryConstraint_t = no_local_search_memory_constraint,
+          unsigned window_size = 1,
+          bool use_node_communication_costs_arg = true>
+using kl_total_comm_improver_mt
+    = kl_improver_mt<Graph_t,
+                     kl_total_comm_cost_function<Graph_t, double, MemoryConstraint_t, window_size, use_node_communication_costs_arg>,
+                     MemoryConstraint_t,
+                     window_size,
+                     double>;
+
+template <typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1>
+using kl_total_lambda_comm_improver_mt
+    = kl_improver_mt<Graph_t,
+                     kl_hyper_total_comm_cost_function<Graph_t, double, MemoryConstraint_t, window_size>,
+                     MemoryConstraint_t,
+                     window_size,
+                     double>;
+
+template <typename Graph_t, typename MemoryConstraint_t = no_local_search_memory_constraint, unsigned window_size = 1>
+using kl_bsp_comm_improver_mt
+    = kl_improver_mt<Graph_t, kl_bsp_comm_cost_function<Graph_t, double, MemoryConstraint_t, window_size>, MemoryConstraint_t, window_size, double>;
+
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp
index 7f3bb29d..397f0a1f 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp
@@ -18,14 +18,14 @@ limitations under the License.
 
 #pragma once
 
-#include "kl_active_schedule.hpp"
 #include <unordered_set>
 
+#include "kl_active_schedule.hpp"
+
 namespace osp {
 
-template<typename cost_t, typename comm_cost_function_t, typename kl_active_schedule_t>
+template <typename cost_t, typename comm_cost_function_t, typename kl_active_schedule_t>
 struct reward_penalty_strategy {
-
     kl_active_schedule_t *active_schedule;
     cost_t max_weight;
 
@@ -47,9 +47,8 @@ struct reward_penalty_strategy {
     }
 };
 
-template<typename VertexType>
+template <typename VertexType>
 struct set_vertex_lock_manger {
-
     std::unordered_set<VertexType> locked_nodes;
 
     void initialize(size_t) {}
@@ -63,9 +62,8 @@ struct set_vertex_lock_manger {
     void clear() { locked_nodes.clear(); }
 };
 
-template<typename VertexType>
+template <typename VertexType>
 struct vector_vertex_lock_manger {
-
     std::vector<bool> locked_nodes;
 
     void initialize(size_t num_nodes) { locked_nodes.resize(num_nodes); }
@@ -79,7 +77,7 @@ struct vector_vertex_lock_manger {
     void clear() { locked_nodes.assign(locked_nodes.size(), false); }
 };
 
-template<typename Graph_t, typename cost_t, typename kl_active_schedule_t, unsigned window_size>
+template <typename Graph_t, typename cost_t, typename kl_active_schedule_t, unsigned window_size>
 struct adaptive_affinity_table {
     constexpr static unsigned window_range = 2 * window_size + 1;
     using VertexType = vertex_idx_t<Graph_t>;
@@ -153,8 +151,9 @@ struct adaptive_affinity_table {
     }
 
     bool insert(VertexType node) {
-        if (node_is_selected[node])
-            return false; // Node is already in the table.
+        if (node_is_selected[node]) {
+            return false;    // Node is already in the table.
+        }
 
         size_t insert_location;
         if (!gaps.empty()) {
@@ -241,7 +240,7 @@ struct adaptive_affinity_table {
     }
 };
 
-template<typename Graph_t, typename cost_t, typename kl_active_schedule_t, unsigned window_size>
+template <typename Graph_t, typename cost_t, typename kl_active_schedule_t, unsigned window_size>
 struct static_affinity_table {
     constexpr static unsigned window_range = 2 * window_size + 1;
     using VertexType = vertex_idx_t<Graph_t>;
@@ -300,9 +299,8 @@ struct static_affinity_table {
     void trim() {}
 };
 
-template<typename Graph_t, typename container_t, typename kl_active_schedule_t>
+template <typename Graph_t, typename container_t, typename kl_active_schedule_t>
 struct vertex_selection_strategy {
-
     using EdgeType = edge_desc_t<Graph_t>;
 
     const kl_active_schedule_t *active_schedule;
@@ -316,7 +314,9 @@ struct vertex_selection_strategy {
 
     unsigned max_work_counter = 0;
 
-    inline void initialize(const kl_active_schedule_t &sche_, std::mt19937 &gen_, const unsigned start_step,
+    inline void initialize(const kl_active_schedule_t &sche_,
+                           std::mt19937 &gen_,
+                           const unsigned start_step,
                            const unsigned end_step) {
         active_schedule = &sche_;
         graph = &(sche_.getInstance().getComputationalDag());
@@ -344,18 +344,22 @@ struct vertex_selection_strategy {
         std::shuffle(permutation.begin(), permutation.end(), *gen);
     }
 
-    void add_neighbours_to_selection(vertex_idx_t<Graph_t> node, container_t &nodes, const unsigned start_step,
+    void add_neighbours_to_selection(vertex_idx_t<Graph_t> node,
+                                     container_t &nodes,
+                                     const unsigned start_step,
                                      const unsigned end_step) {
         for (const auto parent : graph->parents(node)) {
             const unsigned parent_step = active_schedule->assigned_superstep(parent);
-            if (parent_step >= start_step && parent_step <= end_step)
+            if (parent_step >= start_step && parent_step <= end_step) {
                 nodes.insert(parent);
+            }
         }
 
         for (const auto child : graph->children(node)) {
             const unsigned child_step = active_schedule->assigned_superstep(child);
-            if (child_step >= start_step && child_step <= end_step)
+            if (child_step >= start_step && child_step <= end_step) {
                 nodes.insert(child);
+            }
         }
     }
 
@@ -370,24 +374,27 @@ struct vertex_selection_strategy {
         strategy_counter %= 5;
     }
 
-    void select_nodes_violations(container_t &node_selection, std::unordered_set<EdgeType> &current_violations,
-                                 const unsigned start_step, const unsigned end_step) {
+    void select_nodes_violations(container_t &node_selection,
+                                 std::unordered_set<EdgeType> &current_violations,
+                                 const unsigned start_step,
+                                 const unsigned end_step) {
         for (const auto &edge : current_violations) {
             const auto source_v = source(edge, *graph);
             const auto target_v = target(edge, *graph);
 
             const unsigned source_step = active_schedule->assigned_superstep(source_v);
-            if (source_step >= start_step && source_step <= end_step)
+            if (source_step >= start_step && source_step <= end_step) {
                 node_selection.insert(source_v);
+            }
 
             const unsigned target_step = active_schedule->assigned_superstep(target_v);
-            if (target_step >= start_step && target_step <= end_step)
+            if (target_step >= start_step && target_step <= end_step) {
                 node_selection.insert(target_v);
+            }
         }
     }
 
     void select_nodes_permutation_threshold(const std::size_t &threshold, container_t &node_selection) {
-
         const size_t bound = std::min(threshold + permutation_idx, permutation.size());
         for (std::size_t i = permutation_idx; i < bound; i++) {
             node_selection.insert(permutation[i]);
@@ -400,12 +407,14 @@ struct vertex_selection_strategy {
         }
     }
 
-    void select_nodes_max_work_proc(const std::size_t &threshold, container_t &node_selection,
-                                    const unsigned start_step, const unsigned end_step) {
+    void select_nodes_max_work_proc(const std::size_t &threshold,
+                                    container_t &node_selection,
+                                    const unsigned start_step,
+                                    const unsigned end_step) {
         while (node_selection.size() < threshold) {
             if (max_work_counter > end_step) {
-                max_work_counter = start_step; // wrap around
-                break;                         // stop after one full pass
+                max_work_counter = start_step;    // wrap around
+                break;                            // stop after one full pass
             }
 
             select_nodes_max_work_proc_helper(threshold - node_selection.size(), max_work_counter, node_selection);
@@ -417,8 +426,8 @@ struct vertex_selection_strategy {
         const unsigned num_max_work_proc = active_schedule->work_datastructures.step_max_work_processor_count[step];
         for (unsigned idx = 0; idx < num_max_work_proc; idx++) {
             const unsigned proc = active_schedule->work_datastructures.step_processor_work_[step][idx].proc;
-            const std::unordered_set<vertex_idx_t<Graph_t>> step_proc_vert =
-                active_schedule->getSetSchedule().step_processor_vertices[step][proc];
+            const std::unordered_set<vertex_idx_t<Graph_t>> step_proc_vert
+                = active_schedule->getSetSchedule().step_processor_vertices[step][proc];
             const size_t num_insert = std::min(threshold - node_selection.size(), step_proc_vert.size());
             auto end_it = step_proc_vert.begin();
             std::advance(end_it, num_insert);
@@ -427,4 +436,4 @@ struct vertex_selection_strategy {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
index 2cee3d0f..bc0ed8eb 100644
--- a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
@@ -31,27 +31,30 @@ namespace osp {
  * This trait checks if a type has the required methods for a memory constraint.
  *
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_local_search_memory_constraint : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_local_search_memory_constraint<
-    T, std::void_t<decltype(std::declval<T>().initialize(std::declval<SetSchedule<typename T::Graph_impl_t>>(),
-                                                         std::declval<VectorSchedule<typename T::Graph_impl_t>>())),
-                   decltype(std::declval<T>().apply_move(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(),
-                                                         std::declval<unsigned>(), std::declval<unsigned>(),
-                                                         std::declval<unsigned>(), std::declval<unsigned>())),
-                   decltype(std::declval<T>().compute_memory_datastructure(std::declval<unsigned>(),
-                                                                           std::declval<unsigned>())),
-                   decltype(std::declval<T>().swap_steps(std::declval<unsigned>(), std::declval<unsigned>())),
-                   decltype(std::declval<T>().reset_superstep(std::declval<unsigned>())),
-                   decltype(std::declval<T>().override_superstep(std::declval<unsigned>(), std::declval<unsigned>(),
-                                                                 std::declval<unsigned>(), std::declval<unsigned>())),
-                   decltype(std::declval<T>().can_move(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(),
-                                                       std::declval<unsigned>(), std::declval<unsigned>())),
-                   decltype(std::declval<T>().clear()), decltype(T())>> : std::true_type {};
-
-template<typename T>
+    T,
+    std::void_t<decltype(std::declval<T>().initialize(std::declval<SetSchedule<typename T::Graph_impl_t>>(),
+                                                      std::declval<VectorSchedule<typename T::Graph_impl_t>>())),
+                decltype(std::declval<T>().apply_move(std::declval<vertex_idx_t<typename T::Graph_impl_t>>(),
+                                                      std::declval<unsigned>(),
+                                                      std::declval<unsigned>(),
+                                                      std::declval<unsigned>(),
+                                                      std::declval<unsigned>())),
+                decltype(std::declval<T>().compute_memory_datastructure(std::declval<unsigned>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().swap_steps(std::declval<unsigned>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().reset_superstep(std::declval<unsigned>())),
+                decltype(std::declval<T>().override_superstep(
+                    std::declval<unsigned>(), std::declval<unsigned>(), std::declval<unsigned>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().can_move(
+                    std::declval<vertex_idx_t<typename T::Graph_impl_t>>(), std::declval<unsigned>(), std::declval<unsigned>())),
+                decltype(std::declval<T>().clear()),
+                decltype(T())>> : std::true_type {};
+
+template <typename T>
 inline constexpr bool is_local_search_memory_constraint_v = is_local_search_memory_constraint<T>::value;
 
 /**
@@ -67,9 +70,8 @@ struct no_local_search_memory_constraint {
  *
  * @tparam Graph_t The graph type.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct ls_local_memory_constraint {
-
     using Graph_impl_t = Graph_t;
 
     const SetSchedule<Graph_t> *set_schedule;
@@ -80,7 +82,6 @@ struct ls_local_memory_constraint {
     ls_local_memory_constraint() : set_schedule(nullptr), graph(nullptr) {}
 
     inline void initialize(const SetSchedule<Graph_t> &set_schedule_, const VectorSchedule<Graph_t> &) {
-
         if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL) {
             throw std::invalid_argument("Memory constraint type is not LOCAL");
         }
@@ -92,15 +93,14 @@ struct ls_local_memory_constraint {
             std::vector<v_memw_t<Graph_t>>(set_schedule->getInstance().numberOfProcessors(), 0));
     }
 
-    inline void apply_move(vertex_idx_t<Graph_t> vertex, unsigned from_proc, unsigned from_step, unsigned to_proc,
-                           unsigned to_step) {
+    inline void apply_move(vertex_idx_t<Graph_t> vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) {
         step_processor_memory[to_step][to_proc] += graph->vertex_mem_weight(vertex);
         step_processor_memory[from_step][from_proc] -= graph->vertex_mem_weight(vertex);
     }
 
     inline bool can_move(vertex_idx_t<Graph_t> vertex, const unsigned proc, unsigned step) const {
-        return step_processor_memory[step][proc] + graph->vertex_mem_weight(vertex) <=
-               set_schedule->getInstance().getArchitecture().memoryBound(proc);
+        return step_processor_memory[step][proc] + graph->vertex_mem_weight(vertex)
+               <= set_schedule->getInstance().getArchitecture().memoryBound(proc);
     }
 
     void swap_steps(const unsigned step1, const unsigned step2) {
@@ -108,15 +108,11 @@ struct ls_local_memory_constraint {
     }
 
     void compute_memory_datastructure(unsigned start_step, unsigned end_step) {
-
         for (unsigned step = start_step; step <= end_step; step++) {
-
             for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) {
-
                 step_processor_memory[step][proc] = 0;
 
                 for (const auto &node : set_schedule->step_processor_vertices[step][proc]) {
-
                     step_processor_memory[step][proc] += graph->vertex_mem_weight(node);
                 }
             }
@@ -131,7 +127,6 @@ struct ls_local_memory_constraint {
     }
 
     inline void reset_superstep(unsigned step) {
-
         for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) {
             step_processor_memory[step][proc] = 0;
         }
@@ -153,9 +148,8 @@ struct ls_local_memory_constraint {
     }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct ls_local_inc_edges_memory_constraint {
-
     using Graph_impl_t = Graph_t;
 
     const SetSchedule<Graph_t> *set_schedule;
@@ -168,7 +162,6 @@ struct ls_local_inc_edges_memory_constraint {
     ls_local_inc_edges_memory_constraint() : set_schedule(nullptr), vector_schedule(nullptr), graph(nullptr) {}
 
     inline void initialize(const SetSchedule<Graph_t> &set_schedule_, const VectorSchedule<Graph_t> &vec_schedule_) {
-
         if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES) {
             throw std::invalid_argument("Memory constraint type is not LOCAL_INC_EDGES");
         }
@@ -184,16 +177,12 @@ struct ls_local_inc_edges_memory_constraint {
             std::vector<std::unordered_set<vertex_idx_t<Graph_t>>>(set_schedule->getInstance().numberOfProcessors()));
     }
 
-    inline void apply_move(vertex_idx_t<Graph_t> vertex, unsigned from_proc, unsigned from_step, unsigned to_proc,
-                           unsigned to_step) {
-
+    inline void apply_move(vertex_idx_t<Graph_t> vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) {
         step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(vertex);
         step_processor_memory[from_step][from_proc] -= graph->vertex_comm_weight(vertex);
 
         for (const auto &pred : graph->parents(vertex)) {
-
             if (vector_schedule->assignedSuperstep(pred) < to_step) {
-
                 auto pair = step_processor_pred[to_step][to_proc].insert(pred);
                 if (pair.second) {
                     step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(pred);
@@ -201,16 +190,14 @@ struct ls_local_inc_edges_memory_constraint {
             }
 
             if (vector_schedule->assignedSuperstep(pred) < from_step) {
-
                 bool remove = true;
                 for (const auto &succ : graph->children(pred)) {
-
                     if (succ == vertex) {
                         continue;
                     }
 
-                    if (vector_schedule->assignedProcessor(succ) == from_proc &&
-                        vector_schedule->assignedSuperstep(succ) == from_step) {
+                    if (vector_schedule->assignedProcessor(succ) == from_proc
+                        && vector_schedule->assignedSuperstep(succ) == from_step) {
                         remove = false;
                         break;
                     }
@@ -224,36 +211,27 @@ struct ls_local_inc_edges_memory_constraint {
         }
 
         if (to_step != from_step) {
-
             for (const auto &succ : graph->children(vertex)) {
-
                 if (to_step > from_step && vector_schedule->assignedSuperstep(succ) == to_step) {
-
-                    if (step_processor_pred[vector_schedule->assignedSuperstep(succ)]
-                                           [vector_schedule->assignedProcessor(succ)]
-                                               .find(vertex) != step_processor_pred[vector_schedule->assignedSuperstep(
-                                                                    succ)][vector_schedule->assignedProcessor(succ)]
-                                                                    .end()) {
-
-                        step_processor_memory[vector_schedule->assignedSuperstep(succ)]
-                                             [vector_schedule->assignedProcessor(succ)] -=
-                            graph->vertex_comm_weight(vertex);
-
-                        step_processor_pred[vector_schedule->assignedSuperstep(succ)]
-                                           [vector_schedule->assignedProcessor(succ)]
-                                               .erase(vertex);
+                    if (step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].find(
+                            vertex)
+                        != step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)]
+                               .end()) {
+                        step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)]
+                            -= graph->vertex_comm_weight(vertex);
+
+                        step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].erase(
+                            vertex);
                     }
                 }
 
                 if (vector_schedule->assignedSuperstep(succ) > to_step) {
-
-                    auto pair = step_processor_pred[vector_schedule->assignedSuperstep(succ)]
-                                                   [vector_schedule->assignedProcessor(succ)]
-                                                       .insert(vertex);
+                    auto pair
+                        = step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)]
+                              .insert(vertex);
                     if (pair.second) {
-                        step_processor_memory[vector_schedule->assignedSuperstep(succ)]
-                                             [vector_schedule->assignedProcessor(succ)] +=
-                            graph->vertex_comm_weight(vertex);
+                        step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)]
+                            += graph->vertex_comm_weight(vertex);
                     }
                 }
             }
@@ -266,22 +244,16 @@ struct ls_local_inc_edges_memory_constraint {
     }
 
     void compute_memory_datastructure(unsigned start_step, unsigned end_step) {
-
         for (unsigned step = start_step; step <= end_step; step++) {
-
             for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) {
-
                 step_processor_memory[step][proc] = 0;
                 step_processor_pred[step][proc].clear();
 
                 for (const auto &node : set_schedule->step_processor_vertices[step][proc]) {
-
                     step_processor_memory[step][proc] += graph->vertex_comm_weight(node);
 
                     for (const auto &pred : graph->parents(node)) {
-
                         if (vector_schedule->assignedSuperstep(pred) < step) {
-
                             auto pair = step_processor_pred[step][proc].insert(pred);
                             if (pair.second) {
                                 step_processor_memory[step][proc] += graph->vertex_comm_weight(pred);
@@ -299,7 +271,6 @@ struct ls_local_inc_edges_memory_constraint {
     }
 
     inline void reset_superstep(unsigned step) {
-
         for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) {
             step_processor_memory[step][proc] = 0;
             step_processor_pred[step][proc].clear();
@@ -312,12 +283,9 @@ struct ls_local_inc_edges_memory_constraint {
     }
 
     inline bool can_move(vertex_idx_t<Graph_t> vertex, const unsigned proc, unsigned step) const {
-
         v_memw_t<Graph_t> inc_memory = graph->vertex_comm_weight(vertex);
         for (const auto &pred : graph->parents(vertex)) {
-
             if (vector_schedule->assignedSuperstep(pred) < step) {
-
                 if (step_processor_pred[step][proc].find(pred) == step_processor_pred[step][proc].end()) {
                     inc_memory += graph->vertex_comm_weight(pred);
                 }
@@ -331,27 +299,22 @@ struct ls_local_inc_edges_memory_constraint {
         }
 
         if (step >= vector_schedule->assignedSuperstep(vertex)) {
-            return step_processor_memory[step][proc] + inc_memory <=
-                   set_schedule->getInstance().getArchitecture().memoryBound(proc);
+            return step_processor_memory[step][proc] + inc_memory
+                   <= set_schedule->getInstance().getArchitecture().memoryBound(proc);
         }
 
-        if (step_processor_memory[step][proc] + inc_memory >
-            set_schedule->getInstance().getArchitecture().memoryBound(proc)) {
-
+        if (step_processor_memory[step][proc] + inc_memory > set_schedule->getInstance().getArchitecture().memoryBound(proc)) {
             return false;
         }
 
         for (const auto &succ : graph->children(vertex)) {
-
             const auto &succ_step = vector_schedule->assignedSuperstep(succ);
             const auto &succ_proc = vector_schedule->assignedProcessor(succ);
 
-            if (succ_step == vector_schedule->assignedSuperstep(vertex) and
-                succ_proc != vector_schedule->assignedProcessor(vertex)) {
-
-                if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) >
-                    set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) {
-
+            if (succ_step == vector_schedule->assignedSuperstep(vertex)
+                and succ_proc != vector_schedule->assignedProcessor(vertex)) {
+                if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex)
+                    > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) {
                     return false;
                 }
             }
@@ -361,9 +324,8 @@ struct ls_local_inc_edges_memory_constraint {
     }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct ls_local_sources_inc_edges_memory_constraint {
-
     using Graph_impl_t = Graph_t;
 
     const SetSchedule<Graph_t> *set_schedule;
@@ -381,8 +343,8 @@ struct ls_local_sources_inc_edges_memory_constraint {
     }
 
     inline void initialize(const SetSchedule<Graph_t> &set_schedule_, const VectorSchedule<Graph_t> &vec_schedule_) {
-
-        if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES) {
+        if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType()
+            != MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES) {
             throw std::invalid_argument("Memory constraint type is not LOCAL_SOURCES_INC_EDGES");
         }
 
@@ -397,18 +359,14 @@ struct ls_local_sources_inc_edges_memory_constraint {
             std::vector<std::unordered_set<vertex_idx_t<Graph_t>>>(set_schedule->getInstance().numberOfProcessors()));
     }
 
-    inline void apply_move(vertex_idx_t<Graph_t> vertex, unsigned from_proc, unsigned from_step, unsigned to_proc,
-                           unsigned to_step) {
-
+    inline void apply_move(vertex_idx_t<Graph_t> vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) {
         if (is_source(vertex, *graph)) {
             step_processor_memory[to_step][to_proc] += graph->vertex_mem_weight(vertex);
             step_processor_memory[from_step][from_proc] -= graph->vertex_mem_weight(vertex);
         }
 
         for (const auto &pred : graph->parents(vertex)) {
-
             if (vector_schedule->assignedSuperstep(pred) < to_step) {
-
                 auto pair = step_processor_pred[to_step][to_proc].insert(pred);
                 if (pair.second) {
                     step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(pred);
@@ -416,16 +374,14 @@ struct ls_local_sources_inc_edges_memory_constraint {
             }
 
             if (vector_schedule->assignedSuperstep(pred) < from_step) {
-
                 bool remove = true;
                 for (const auto &succ : graph->children(pred)) {
-
                     if (succ == vertex) {
                         continue;
                     }
 
-                    if (vector_schedule->assignedProcessor(succ) == from_proc &&
-                        vector_schedule->assignedSuperstep(succ) == from_step) {
+                    if (vector_schedule->assignedProcessor(succ) == from_proc
+                        && vector_schedule->assignedSuperstep(succ) == from_step) {
                         remove = false;
                         break;
                     }
@@ -439,36 +395,27 @@ struct ls_local_sources_inc_edges_memory_constraint {
         }
 
         if (to_step != from_step) {
-
             for (const auto &succ : graph->children(vertex)) {
-
                 if (to_step > from_step && vector_schedule->assignedSuperstep(succ) == to_step) {
-
-                    if (step_processor_pred[vector_schedule->assignedSuperstep(succ)]
-                                           [vector_schedule->assignedProcessor(succ)]
-                                               .find(vertex) != step_processor_pred[vector_schedule->assignedSuperstep(
-                                                                    succ)][vector_schedule->assignedProcessor(succ)]
-                                                                    .end()) {
-
-                        step_processor_memory[vector_schedule->assignedSuperstep(succ)]
-                                             [vector_schedule->assignedProcessor(succ)] -=
-                            graph->vertex_comm_weight(vertex);
-
-                        step_processor_pred[vector_schedule->assignedSuperstep(succ)]
-                                           [vector_schedule->assignedProcessor(succ)]
-                                               .erase(vertex);
+                    if (step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].find(
+                            vertex)
+                        != step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)]
+                               .end()) {
+                        step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)]
+                            -= graph->vertex_comm_weight(vertex);
+
+                        step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].erase(
+                            vertex);
                     }
                 }
 
                 if (vector_schedule->assignedSuperstep(succ) > to_step) {
-
-                    auto pair = step_processor_pred[vector_schedule->assignedSuperstep(succ)]
-                                                   [vector_schedule->assignedProcessor(succ)]
-                                                       .insert(vertex);
+                    auto pair
+                        = step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)]
+                              .insert(vertex);
                     if (pair.second) {
-                        step_processor_memory[vector_schedule->assignedSuperstep(succ)]
-                                             [vector_schedule->assignedProcessor(succ)] +=
-                            graph->vertex_comm_weight(vertex);
+                        step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)]
+                            += graph->vertex_comm_weight(vertex);
                     }
                 }
             }
@@ -476,24 +423,18 @@ struct ls_local_sources_inc_edges_memory_constraint {
     }
 
     void compute_memory_datastructure(unsigned start_step, unsigned end_step) {
-
         for (unsigned step = start_step; step <= end_step; step++) {
-
             for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) {
-
                 step_processor_memory[step][proc] = 0;
                 step_processor_pred[step][proc].clear();
 
                 for (const auto &node : set_schedule->step_processor_vertices[step][proc]) {
-
                     if (is_source(node, *graph)) {
                         step_processor_memory[step][proc] += graph->vertex_mem_weight(node);
                     }
 
                     for (const auto &pred : graph->parents(node)) {
-
                         if (vector_schedule->assignedSuperstep(pred) < step) {
-
                             auto pair = step_processor_pred[step][proc].insert(pred);
                             if (pair.second) {
                                 step_processor_memory[step][proc] += graph->vertex_comm_weight(pred);
@@ -511,7 +452,6 @@ struct ls_local_sources_inc_edges_memory_constraint {
     }
 
     inline void reset_superstep(unsigned step) {
-
         for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) {
             step_processor_memory[step][proc] = 0;
             step_processor_pred[step][proc].clear();
@@ -524,7 +464,6 @@ struct ls_local_sources_inc_edges_memory_constraint {
     }
 
     inline bool can_move(vertex_idx_t<Graph_t> vertex, const unsigned proc, unsigned step) const {
-
         v_memw_t<Graph_t> inc_memory = 0;
 
         if (is_source(vertex, *graph)) {
@@ -532,9 +471,7 @@ struct ls_local_sources_inc_edges_memory_constraint {
         }
 
         for (const auto &pred : graph->parents(vertex)) {
-
             if (vector_schedule->assignedSuperstep(pred) < step) {
-
                 if (step_processor_pred[step][proc].find(pred) == step_processor_pred[step][proc].end()) {
                     inc_memory += graph->vertex_comm_weight(pred);
                 }
@@ -548,37 +485,30 @@ struct ls_local_sources_inc_edges_memory_constraint {
         }
 
         if (vector_schedule->assignedSuperstep(vertex) <= step) {
-
-            return step_processor_memory[step][proc] + inc_memory <=
-                   set_schedule->getInstance().getArchitecture().memoryBound(proc);
+            return step_processor_memory[step][proc] + inc_memory
+                   <= set_schedule->getInstance().getArchitecture().memoryBound(proc);
         }
 
-        if (step_processor_memory[step][proc] + inc_memory >
-            set_schedule->getInstance().getArchitecture().memoryBound(proc)) {
+        if (step_processor_memory[step][proc] + inc_memory > set_schedule->getInstance().getArchitecture().memoryBound(proc)) {
             return false;
         }
 
         for (const auto &succ : graph->children(vertex)) {
-
             const auto &succ_step = vector_schedule->assignedSuperstep(succ);
             const auto &succ_proc = vector_schedule->assignedProcessor(succ);
 
             if (succ_step == vector_schedule->assignedSuperstep(vertex)) {
-
                 if (vector_schedule->assignedProcessor(vertex) != succ_proc || (not is_source(vertex, *graph))) {
-
-                    if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) >
-                        set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) {
+                    if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex)
+                        > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) {
                         return false;
                     }
 
                 } else {
-
                     if (is_source(vertex, *graph)) {
-
-                        if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) -
-                                graph->vertex_mem_weight(vertex) >
-                            set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) {
+                        if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex)
+                                - graph->vertex_mem_weight(vertex)
+                            > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) {
                             return false;
                         }
                     }
@@ -590,4 +520,4 @@ struct ls_local_sources_inc_edges_memory_constraint {
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/MaxBspScheduler.hpp b/include/osp/bsp/scheduler/MaxBspScheduler.hpp
index c6accf25..5a78e382 100644
--- a/include/osp/bsp/scheduler/MaxBspScheduler.hpp
+++ b/include/osp/bsp/scheduler/MaxBspScheduler.hpp
@@ -31,10 +31,9 @@ namespace osp {
  * The Scheduler class provides a common interface for scheduling scheduler in the BSP scheduling system.
  * It defines methods for setting and getting the time limit, as well as computing schedules.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class MaxBspScheduler : public Scheduler<Graph_t> {
-    public:
-
+  public:
     static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
 
     /**
@@ -85,4 +84,4 @@ class MaxBspScheduler : public Scheduler<Graph_t> {
     };
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp b/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp
index 90227221..abd2a1a4 100644
--- a/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp
+++ b/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp
@@ -32,7 +32,7 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 class MultilevelCoarseAndSchedule : public Scheduler<Graph_t> {
   private:
     const BspInstance<Graph_t> *original_inst;
@@ -58,23 +58,31 @@ class MultilevelCoarseAndSchedule : public Scheduler<Graph_t> {
 
   public:
     MultilevelCoarseAndSchedule()
-        : Scheduler<Graph_t>(), original_inst(nullptr), sched(nullptr), improver(nullptr), ml_coarser(nullptr),
+        : Scheduler<Graph_t>(), original_inst(nullptr), sched(nullptr), improver(nullptr), ml_coarser(nullptr), active_graph(-1L) {
+          };
+    MultilevelCoarseAndSchedule(Scheduler<Graph_t_coarse> &sched_, MultilevelCoarser<Graph_t, Graph_t_coarse> &ml_coarser_)
+        : Scheduler<Graph_t>(),
+          original_inst(nullptr),
+          sched(&sched_),
+          improver(nullptr),
+          ml_coarser(&ml_coarser_),
           active_graph(-1L) {};
     MultilevelCoarseAndSchedule(Scheduler<Graph_t_coarse> &sched_,
+                                ImprovementScheduler<Graph_t_coarse> &improver_,
                                 MultilevelCoarser<Graph_t, Graph_t_coarse> &ml_coarser_)
-        : Scheduler<Graph_t>(), original_inst(nullptr), sched(&sched_), improver(nullptr), ml_coarser(&ml_coarser_),
-          active_graph(-1L) {};
-    MultilevelCoarseAndSchedule(Scheduler<Graph_t_coarse> &sched_, ImprovementScheduler<Graph_t_coarse> &improver_,
-                                MultilevelCoarser<Graph_t, Graph_t_coarse> &ml_coarser_)
-        : Scheduler<Graph_t>(), original_inst(nullptr), sched(&sched_), improver(&improver_), ml_coarser(&ml_coarser_),
+        : Scheduler<Graph_t>(),
+          original_inst(nullptr),
+          sched(&sched_),
+          improver(&improver_),
+          ml_coarser(&ml_coarser_),
           active_graph(-1L) {};
     virtual ~MultilevelCoarseAndSchedule() = default;
 
     inline void setInitialScheduler(Scheduler<Graph_t_coarse> &sched_) { sched = &sched_; };
+
     inline void setImprovementScheduler(ImprovementScheduler<Graph_t_coarse> &improver_) { improver = &improver_; };
-    inline void setMultilevelCoarser(MultilevelCoarser<Graph_t, Graph_t_coarse> &ml_coarser_) {
-        ml_coarser = &ml_coarser_;
-    };
+
+    inline void setMultilevelCoarser(MultilevelCoarser<Graph_t, Graph_t_coarse> &ml_coarser_) { ml_coarser = &ml_coarser_; };
 
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override;
 
@@ -82,13 +90,12 @@ class MultilevelCoarseAndSchedule : public Scheduler<Graph_t> {
         if (improver == nullptr) {
             return "C:" + ml_coarser->getCoarserName() + "-S:" + sched->getScheduleName();
         } else {
-            return "C:" + ml_coarser->getCoarserName() + "-S:" + sched->getScheduleName() +
-                   "-I:" + improver->getScheduleName();
+            return "C:" + ml_coarser->getCoarserName() + "-S:" + sched->getScheduleName() + "-I:" + improver->getScheduleName();
         }
     };
 };
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::compute_initial_schedule() {
     active_graph = static_cast<long int>(ml_coarser->dag_history.size());
     active_graph--;
@@ -109,32 +116,33 @@ RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::compute_init
     return status;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::improve_active_schedule() {
     if (improver) {
-        if (active_instance->getComputationalDag().num_vertices() == 0)
+        if (active_instance->getComputationalDag().num_vertices() == 0) {
             return RETURN_STATUS::OSP_SUCCESS;
+        }
         return improver->improveSchedule(*active_schedule);
     }
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::expand_active_schedule() {
     assert((active_graph > 0L) && (static_cast<long unsigned>(active_graph) < ml_coarser->dag_history.size()));
 
     std::unique_ptr<BspInstance<Graph_t_coarse>> expanded_instance = std::make_unique<BspInstance<Graph_t_coarse>>(
         *(ml_coarser->dag_history.at(static_cast<std::size_t>(active_graph) - 1)), original_inst->getArchitecture());
-    std::unique_ptr<BspSchedule<Graph_t_coarse>> expanded_schedule =
-        std::make_unique<BspSchedule<Graph_t_coarse>>(*expanded_instance);
+    std::unique_ptr<BspSchedule<Graph_t_coarse>> expanded_schedule
+        = std::make_unique<BspSchedule<Graph_t_coarse>>(*expanded_instance);
 
     for (const auto &node : expanded_instance->getComputationalDag().vertices()) {
         expanded_schedule->setAssignedProcessor(
-            node, active_schedule->assignedProcessor(
-                      ml_coarser->contraction_maps.at(static_cast<std::size_t>(active_graph))->at(node)));
+            node,
+            active_schedule->assignedProcessor(ml_coarser->contraction_maps.at(static_cast<std::size_t>(active_graph))->at(node)));
         expanded_schedule->setAssignedSuperstep(
-            node, active_schedule->assignedSuperstep(
-                      ml_coarser->contraction_maps.at(static_cast<std::size_t>(active_graph))->at(node)));
+            node,
+            active_schedule->assignedSuperstep(ml_coarser->contraction_maps.at(static_cast<std::size_t>(active_graph))->at(node)));
     }
 
     assert(expanded_schedule->satisfiesPrecedenceConstraints());
@@ -157,18 +165,18 @@ RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::expand_activ
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::expand_active_schedule_to_original_schedule(
     BspSchedule<Graph_t> &schedule) {
     assert(active_graph == 0L);
 
     for (const auto &node : getOriginalInstance()->getComputationalDag().vertices()) {
         schedule.setAssignedProcessor(
-            node, active_schedule->assignedProcessor(
-                      ml_coarser->contraction_maps.at(static_cast<std::size_t>(active_graph))->at(node)));
+            node,
+            active_schedule->assignedProcessor(ml_coarser->contraction_maps.at(static_cast<std::size_t>(active_graph))->at(node)));
         schedule.setAssignedSuperstep(
-            node, active_schedule->assignedSuperstep(
-                      ml_coarser->contraction_maps.at(static_cast<std::size_t>(active_graph))->at(node)));
+            node,
+            active_schedule->assignedSuperstep(ml_coarser->contraction_maps.at(static_cast<std::size_t>(active_graph))->at(node)));
     }
 
     active_graph--;
@@ -180,7 +188,7 @@ RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::expand_activ
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::run_expansions(BspSchedule<Graph_t> &schedule) {
     assert(active_graph >= 0L && static_cast<long unsigned>(active_graph) == ml_coarser->dag_history.size() - 1);
 
@@ -196,14 +204,14 @@ RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::run_expansio
     return status;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::clear_computation_data() {
     active_graph = -1L;
     active_instance = std::unique_ptr<BspInstance<Graph_t_coarse>>();
     active_schedule = std::unique_ptr<BspSchedule<Graph_t_coarse>>();
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::computeSchedule(BspSchedule<Graph_t> &schedule) {
     clear_computation_data();
 
@@ -234,4 +242,4 @@ RETURN_STATUS MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>::computeSched
     return status;
 }
 
-} // end namespace osp
\ No newline at end of file
+}    // end namespace osp
diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp
index fa458ba9..53ee1529 100644
--- a/include/osp/bsp/scheduler/Scheduler.hpp
+++ b/include/osp/bsp/scheduler/Scheduler.hpp
@@ -18,12 +18,13 @@ limitations under the License.
 
 #pragma once
 
+#include <string>
+
 #include "osp/auxiliary/return_status.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/BspScheduleCS.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
-#include <string>
 
 namespace osp {
 
@@ -35,9 +36,8 @@ namespace osp {
  * It specifies the contract for computing standard BSP schedules (BspSchedule) and communication-aware schedules
  * (BspScheduleCS).
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class Scheduler {
-
     static_assert(is_computational_dag_v<Graph_t>, "Scheduler can only be used with computational DAGs.");
 
   public:
@@ -80,7 +80,6 @@ class Scheduler {
      *         RETURN_STATUS::ERROR if an error occurred, or other status codes as appropriate.
      */
     virtual RETURN_STATUS computeScheduleCS(BspScheduleCS<Graph_t> &schedule) {
-
         auto result = computeSchedule(schedule);
         if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) {
             schedule.setAutoCommunicationSchedule();
@@ -91,4 +90,4 @@ class Scheduler {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/bsp/scheduler/Serial.hpp b/include/osp/bsp/scheduler/Serial.hpp
index db2aeef9..cde1b706 100644
--- a/include/osp/bsp/scheduler/Serial.hpp
+++ b/include/osp/bsp/scheduler/Serial.hpp
@@ -18,11 +18,13 @@ limitations under the License.
 
 #pragma once
 
-#include "Scheduler.hpp"
 #include <deque>
 #include <limits>
 #include <string>
 #include <vector>
+
+#include "Scheduler.hpp"
+
 namespace osp {
 
 /**
@@ -32,9 +34,8 @@ namespace osp {
  * smallest number of supersteps.
  *
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class Serial : public Scheduler<Graph_t> {
-
   public:
     /**
      * @brief Default constructor for Serial.
@@ -51,8 +52,9 @@ class Serial : public Scheduler<Graph_t> {
         const auto &dag = instance.getComputationalDag();
         const auto num_vertices = dag.num_vertices();
 
-        if (num_vertices == 0)
+        if (num_vertices == 0) {
             return RETURN_STATUS::OSP_SUCCESS;
+        }
 
         const auto &arch = instance.getArchitecture();
 
@@ -114,8 +116,7 @@ class Serial : public Scheduler<Graph_t> {
                 for (const auto &p : node_type_compatible_processors[v_type]) {
                     bool parents_compatible = true;
                     for (const auto &parent : dag.parents(v)) {
-                        if (schedule.assignedSuperstep(parent) == current_superstep &&
-                            schedule.assignedProcessor(parent) != p) {
+                        if (schedule.assignedSuperstep(parent) == current_superstep && schedule.assignedProcessor(parent) != p) {
                             parents_compatible = false;
                             break;
                         }
@@ -155,4 +156,4 @@ class Serial : public Scheduler<Graph_t> {
     std::string getScheduleName() const override { return "Serial"; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/coarser/BspScheduleCoarser.hpp b/include/osp/coarser/BspScheduleCoarser.hpp
index ea4cf9f9..a65e92e5 100644
--- a/include/osp/coarser/BspScheduleCoarser.hpp
+++ b/include/osp/coarser/BspScheduleCoarser.hpp
@@ -31,9 +31,8 @@ namespace osp {
  * @brief Abstract base class for coarsening ComputationalDags.
  *
  */
-template<typename Graph_t_in, typename Graph_t_out>
+template <typename Graph_t_in, typename Graph_t_out>
 class BspScheduleCoarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
-
   private:
     const BspSchedule<Graph_t_in> *schedule;
 
@@ -56,7 +55,6 @@ class BspScheduleCoarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_o
     //                        std::vector<vertex_idx_t<Graph_t_out>> &reverse_vertex_map) override {
 
     virtual std::vector<vertex_idx_t<Graph_t_out>> generate_vertex_contraction_map(const Graph_t_in &dag_in) override {
-
         using VertexType_in = vertex_idx_t<Graph_t_in>;
         using VertexType_out = vertex_idx_t<Graph_t_out>;
 
@@ -70,23 +68,18 @@ class BspScheduleCoarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_o
         bool schedule_respects_types = true;
 
         for (unsigned step = 0; step < schedule->numberOfSupersteps(); step++) {
-
             for (unsigned proc = 0; proc < schedule->getInstance().numberOfProcessors(); proc++) {
-
                 if (set_schedule.step_processor_vertices[step][proc].size() > 0) {
-
                     v_workw_t<Graph_t_in> total_work = 0;
                     v_memw_t<Graph_t_in> total_memory = 0;
                     v_commw_t<Graph_t_in> total_communication = 0;
 
                     vertex_map.push_back(std::vector<VertexType_in>());
 
-                    v_type_t<Graph_t_in> type =
-                        dag_in.vertex_type(*(set_schedule.step_processor_vertices[step][proc].begin()));
+                    v_type_t<Graph_t_in> type = dag_in.vertex_type(*(set_schedule.step_processor_vertices[step][proc].begin()));
                     bool homogeneous_types = true;
 
                     for (const auto &vertex : set_schedule.step_processor_vertices[step][proc]) {
-
                         if (dag_in.vertex_type(vertex) != type) {
                             homogeneous_types = false;
                         }
@@ -99,8 +92,9 @@ class BspScheduleCoarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_o
                         total_memory += dag_in.vertex_mem_weight(vertex);
                     }
 
-                    if (schedule_respects_types)
+                    if (schedule_respects_types) {
                         schedule_respects_types = homogeneous_types;
+                    }
                 }
             }
         }
@@ -109,4 +103,4 @@ class BspScheduleCoarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_o
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/coarser/Coarser.hpp b/include/osp/coarser/Coarser.hpp
index a3400442..8ea9c400 100644
--- a/include/osp/coarser/Coarser.hpp
+++ b/include/osp/coarser/Coarser.hpp
@@ -22,8 +22,8 @@ limitations under the License.
 #include <set>
 #include <vector>
 
-#include "osp/bsp/model/BspSchedule.hpp"
 #include "coarser_util.hpp"
+#include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/concepts/constructable_computational_dag_concept.hpp"
 #include "osp/concepts/graph_traits.hpp"
@@ -35,11 +35,11 @@ namespace osp {
  * @brief Abstract base class for coarsening ComputationalDags.
  *
  */
-template<typename Graph_t_in, typename Graph_t_out>
+template <typename Graph_t_in, typename Graph_t_out>
 class Coarser {
-
     static_assert(is_computational_dag_v<Graph_t_in>, "Graph_t_in must be a computational DAG");
-    static_assert(is_constructable_cdag_v<Graph_t_out> || is_direct_constructable_cdag_v<Graph_t_out>, "Graph_t_out must be a (direct) constructable computational DAG");
+    static_assert(is_constructable_cdag_v<Graph_t_out> || is_direct_constructable_cdag_v<Graph_t_out>,
+                  "Graph_t_out must be a (direct) constructable computational DAG");
 
     // probably too strict, need to be refined.
     // maybe add concept for when Gtaph_t2 is constructable/coarseable from Graph_t_in
@@ -59,8 +59,10 @@ class Coarser {
      * @param vertex_contraction_map Output mapping from dag_in to coarsened_dag.
      * @return A status code indicating the success or failure of the coarsening operation.
      */
-    virtual bool coarsenDag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
-                            std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map) = 0;
+    virtual bool coarsenDag(const Graph_t_in &dag_in,
+                            Graph_t_out &coarsened_dag,
+                            std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map)
+        = 0;
 
     /**
      * @brief Get the name of the coarsening algorithm.
@@ -79,16 +81,16 @@ class Coarser {
  * @brief Abstract base class for coarsening ComputationalDags.
  *
  */
-template<typename Graph_t_in, typename Graph_t_out>
+template <typename Graph_t_in, typename Graph_t_out>
 class CoarserGenContractionMap : public Coarser<Graph_t_in, Graph_t_out> {
-
   public:
     virtual std::vector<vertex_idx_t<Graph_t_out>> generate_vertex_contraction_map(const Graph_t_in &dag_in) = 0;
 
-    virtual bool coarsenDag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
+    virtual bool coarsenDag(const Graph_t_in &dag_in,
+                            Graph_t_out &coarsened_dag,
                             std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map) override {
-
-        vertex_contraction_map = dag_in.num_vertices() == 0? std::vector<vertex_idx_t<Graph_t_out>>() : generate_vertex_contraction_map(dag_in);
+        vertex_contraction_map = dag_in.num_vertices() == 0 ? std::vector<vertex_idx_t<Graph_t_out>>()
+                                                            : generate_vertex_contraction_map(dag_in);
 
         return coarser_util::construct_coarse_dag(dag_in, coarsened_dag, vertex_contraction_map);
     }
@@ -104,19 +106,17 @@ class CoarserGenContractionMap : public Coarser<Graph_t_in, Graph_t_out> {
  * @brief Abstract base class for coarsening ComputationalDags.
  *
  */
-template<typename Graph_t_in, typename Graph_t_out>
+template <typename Graph_t_in, typename Graph_t_out>
 class CoarserGenExpansionMap : public Coarser<Graph_t_in, Graph_t_out> {
-
   public:
-    virtual std::vector<std::vector<vertex_idx_t<Graph_t_in>>>
-    generate_vertex_expansion_map(const Graph_t_in &dag_in) = 0;
+    virtual std::vector<std::vector<vertex_idx_t<Graph_t_in>>> generate_vertex_expansion_map(const Graph_t_in &dag_in) = 0;
 
-    virtual bool coarsenDag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
+    virtual bool coarsenDag(const Graph_t_in &dag_in,
+                            Graph_t_out &coarsened_dag,
                             std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map) override {
-
         if (dag_in.num_vertices() == 0) {
-          vertex_contraction_map = std::vector<vertex_idx_t<Graph_t_out>>();
-          return true;
+            vertex_contraction_map = std::vector<vertex_idx_t<Graph_t_out>>();
+            return true;
         }
 
         std::vector<std::vector<vertex_idx_t<Graph_t_in>>> vertex_expansion_map = generate_vertex_expansion_map(dag_in);
@@ -124,8 +124,7 @@ class CoarserGenExpansionMap : public Coarser<Graph_t_in, Graph_t_out> {
 
         coarser_util::reorder_expansion_map<Graph_t_in>(dag_in, vertex_expansion_map);
 
-        vertex_contraction_map =
-            coarser_util::invert_vertex_expansion_map<Graph_t_in, Graph_t_out>(vertex_expansion_map);
+        vertex_contraction_map = coarser_util::invert_vertex_expansion_map<Graph_t_in, Graph_t_out>(vertex_expansion_map);
 
         return coarser_util::construct_coarse_dag(dag_in, coarsened_dag, vertex_contraction_map);
     }
@@ -142,4 +141,4 @@ class CoarserGenExpansionMap : public Coarser<Graph_t_in, Graph_t_out> {
     virtual ~CoarserGenExpansionMap() = default;
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/coarser/MultilevelCoarser.hpp b/include/osp/coarser/MultilevelCoarser.hpp
index f8a1434e..4baecd3d 100644
--- a/include/osp/coarser/MultilevelCoarser.hpp
+++ b/include/osp/coarser/MultilevelCoarser.hpp
@@ -30,10 +30,10 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 class MultilevelCoarseAndSchedule;
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     friend class MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>;
 
@@ -48,7 +48,8 @@ class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
 
     RETURN_STATUS add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map);
     RETURN_STATUS add_contraction(std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map);
-    RETURN_STATUS add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map, const Graph_t_coarse &contracted_graph);
+    RETURN_STATUS add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map,
+                                  const Graph_t_coarse &contracted_graph);
     RETURN_STATUS add_contraction(std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map, Graph_t_coarse &&contracted_graph);
     void add_identity_contraction();
 
@@ -64,7 +65,8 @@ class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     MultilevelCoarser(const Graph_t &graph) : original_graph(&graph) {};
     virtual ~MultilevelCoarser() = default;
 
-    bool coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag,
+    bool coarsenDag(const Graph_t &dag_in,
+                    Graph_t_coarse &coarsened_dag,
                     std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) override;
 
     RETURN_STATUS run(const Graph_t &graph);
@@ -73,7 +75,7 @@ class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     virtual std::string getCoarserName() const override = 0;
 };
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const Graph_t &graph) {
     clear_computation_data();
     original_graph = &graph;
@@ -88,12 +90,12 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const Graph_t &gra
     return status;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const BspInstance<Graph_t> &inst) {
     return run(inst.getComputationalDag());
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::clear_computation_data() {
     dag_history.clear();
     dag_history.shrink_to_fit();
@@ -102,10 +104,11 @@ void MultilevelCoarser<Graph_t, Graph_t_coarse>::clear_computation_data() {
     contraction_maps.shrink_to_fit();
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
-    if (dag_history.size() < 3)
+    if (dag_history.size() < 3) {
         return;
+    }
 
     size_t dag_indx_first = dag_history.size() - 2;
     size_t map_indx_first = contraction_maps.size() - 2;
@@ -113,11 +116,15 @@ void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
     size_t dag_indx_second = dag_history.size() - 1;
     size_t map_indx_second = contraction_maps.size() - 1;
 
-    if ((static_cast<double>(dag_history[dag_indx_first - 1]->num_vertices()) / static_cast<double>(dag_history[dag_indx_second - 1]->num_vertices())) > 1.25)
+    if ((static_cast<double>(dag_history[dag_indx_first - 1]->num_vertices())
+         / static_cast<double>(dag_history[dag_indx_second - 1]->num_vertices()))
+        > 1.25) {
         return;
+    }
 
     // Compute combined contraction_map
-    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> combi_contraction_map = std::make_unique<std::vector<vertex_idx_t<Graph_t_coarse>>>(contraction_maps[map_indx_first]->size());
+    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> combi_contraction_map
+        = std::make_unique<std::vector<vertex_idx_t<Graph_t_coarse>>>(contraction_maps[map_indx_first]->size());
     for (std::size_t vert = 0; vert < contraction_maps[map_indx_first]->size(); ++vert) {
         combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at(contraction_maps[map_indx_first]->at(vert));
     }
@@ -136,8 +143,9 @@ void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
     contraction_maps[map_indx_first] = std::move(combi_contraction_map);
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
-RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map) {
+template <typename Graph_t, typename Graph_t_coarse>
+RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(
+    const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map) {
     std::unique_ptr<Graph_t_coarse> new_graph = std::make_unique<Graph_t_coarse>();
 
     contraction_maps.emplace_back(contraction_map);
@@ -145,9 +153,11 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const
     bool success = false;
 
     if (dag_history.size() == 0) {
-        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()));
+        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(
+            *(getOriginalGraph()), *new_graph, *(contraction_maps.back()));
     } else {
-        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()));
+        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(
+            *(dag_history.back()), *new_graph, *(contraction_maps.back()));
     }
 
     dag_history.emplace_back(std::move(new_graph));
@@ -160,19 +170,23 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const
     }
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
-RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map) {
+template <typename Graph_t, typename Graph_t_coarse>
+RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(
+    std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map) {
     std::unique_ptr<Graph_t_coarse> new_graph = std::make_unique<Graph_t_coarse>();
 
-    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(new std::vector<vertex_idx_t<Graph_t_coarse>>(std::move(contraction_map)));
+    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(
+        new std::vector<vertex_idx_t<Graph_t_coarse>>(std::move(contraction_map)));
     contraction_maps.emplace_back(std::move(contr_map_ptr));
 
     bool success = false;
 
     if (dag_history.size() == 0) {
-        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()));
+        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(
+            *(getOriginalGraph()), *new_graph, *(contraction_maps.back()));
     } else {
-        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()));
+        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(
+            *(dag_history.back()), *new_graph, *(contraction_maps.back()));
     }
 
     dag_history.emplace_back(std::move(new_graph));
@@ -185,31 +199,35 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::v
     }
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
-RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map, const Graph_t_coarse &contracted_graph) {
+template <typename Graph_t, typename Graph_t_coarse>
+RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(
+    const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map, const Graph_t_coarse &contracted_graph) {
     std::unique_ptr<Graph_t_coarse> graph_ptr(new Graph_t_coarse(contracted_graph));
     dag_history.emplace_back(std::move(graph_ptr));
 
-    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(new std::vector<vertex_idx_t<Graph_t_coarse>>(contraction_map));
+    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(
+        new std::vector<vertex_idx_t<Graph_t_coarse>>(contraction_map));
     contraction_maps.emplace_back(std::move(contr_map_ptr));
 
     compactify_dag_history();
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
-RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map, Graph_t_coarse &&contracted_graph) {
+template <typename Graph_t, typename Graph_t_coarse>
+RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(
+    std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map, Graph_t_coarse &&contracted_graph) {
     std::unique_ptr<Graph_t_coarse> graph_ptr(new Graph_t_coarse(std::move(contracted_graph)));
     dag_history.emplace_back(std::move(graph_ptr));
 
-    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(new std::vector<vertex_idx_t<Graph_t_coarse>>(std::move(contraction_map)));
+    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(
+        new std::vector<vertex_idx_t<Graph_t_coarse>>(std::move(contraction_map)));
     contraction_maps.emplace_back(std::move(contr_map_ptr));
 
     compactify_dag_history();
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 std::vector<vertex_idx_t<Graph_t_coarse>> MultilevelCoarser<Graph_t, Graph_t_coarse>::getCombinedContractionMap() const {
     std::vector<vertex_idx_t<Graph_t_coarse>> combinedContractionMap(original_graph->num_vertices());
     std::iota(combinedContractionMap.begin(), combinedContractionMap.end(), 0);
@@ -223,15 +241,17 @@ std::vector<vertex_idx_t<Graph_t_coarse>> MultilevelCoarser<Graph_t, Graph_t_coa
     return combinedContractionMap;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
-bool MultilevelCoarser<Graph_t, Graph_t_coarse>::coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag,
+template <typename Graph_t, typename Graph_t_coarse>
+bool MultilevelCoarser<Graph_t, Graph_t_coarse>::coarsenDag(const Graph_t &dag_in,
+                                                            Graph_t_coarse &coarsened_dag,
                                                             std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) {
     clear_computation_data();
 
     RETURN_STATUS status = run(dag_in);
 
-    if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND)
+    if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) {
         return false;
+    }
 
     assert(dag_history.size() != 0);
     coarsened_dag = *(dag_history.back());
@@ -241,7 +261,7 @@ bool MultilevelCoarser<Graph_t, Graph_t_coarse>::coarsenDag(const Graph_t &dag_i
     return true;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::add_identity_contraction() {
     std::size_t n_vert;
     if (dag_history.size() == 0) {
@@ -257,4 +277,4 @@ void MultilevelCoarser<Graph_t, Graph_t_coarse>::add_identity_contraction() {
     compactify_dag_history();
 }
 
-} // end namespace osp
\ No newline at end of file
+}    // end namespace osp
diff --git a/include/osp/coarser/Sarkar/Sarkar.hpp b/include/osp/coarser/Sarkar/Sarkar.hpp
index aa0d5560..672fc6d2 100644
--- a/include/osp/coarser/Sarkar/Sarkar.hpp
+++ b/include/osp/coarser/Sarkar/Sarkar.hpp
@@ -35,63 +35,96 @@ limitations under the License.
 namespace osp {
 
 namespace SarkarParams {
-enum class Mode { LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER };
 
-template<typename commCostType>
+enum class Mode {
+    LINES,
+    FAN_IN_FULL,
+    FAN_IN_PARTIAL,
+    FAN_OUT_FULL,
+    FAN_OUT_PARTIAL,
+    LEVEL_EVEN,
+    LEVEL_ODD,
+    FAN_IN_BUFFER,
+    FAN_OUT_BUFFER,
+    HOMOGENEOUS_BUFFER
+};
+
+template <typename commCostType>
 struct Parameters {
     double geomDecay{0.875};
     double leniency{0.0};
     Mode mode{Mode::LINES};
-    commCostType commCost{ static_cast<commCostType>(0) };
-    commCostType maxWeight{ std::numeric_limits<commCostType>::max() };
-    commCostType smallWeightThreshold{ std::numeric_limits<commCostType>::lowest() };
+    commCostType commCost{static_cast<commCostType>(0)};
+    commCostType maxWeight{std::numeric_limits<commCostType>::max()};
+    commCostType smallWeightThreshold{std::numeric_limits<commCostType>::lowest()};
     bool useTopPoset{true};
 };
-} // end namespace SarkarParams
 
-template<typename Graph_t_in, typename Graph_t_out>
+}    // end namespace SarkarParams
+
+template <typename Graph_t_in, typename Graph_t_out>
 class Sarkar : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
-    private:
-        SarkarParams::Parameters< v_workw_t<Graph_t_in> > params;
-        
-        std::vector< vertex_idx_t<Graph_t_in> > getBotPosetMap(const Graph_t_in &graph) const;
-        std::vector< v_workw_t<Graph_t_in> > getTopDistance(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph) const;
-        std::vector< v_workw_t<Graph_t_in> > getBotDistance(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph) const;
-        
-        vertex_idx_t<Graph_t_in> singleContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
-        vertex_idx_t<Graph_t_in> allChildrenContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
-        vertex_idx_t<Graph_t_in> someChildrenContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
-        vertex_idx_t<Graph_t_in> allParentsContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
-        vertex_idx_t<Graph_t_in> someParentsContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
-        vertex_idx_t<Graph_t_in> levelContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
-        
-        vertex_idx_t<Graph_t_in> homogeneous_buffer_merge(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
-        std::vector<std::size_t> homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const;
-
-        std::vector<std::size_t> computeNodeHashes(const Graph_t_in &graph, const std::vector< vertex_idx_t<Graph_t_in> > &vertexPoset, const std::vector< v_workw_t<Graph_t_in> > &dist) const;
-
-    public:
-        virtual std::vector<std::vector<vertex_idx_t<Graph_t_in>>> generate_vertex_expansion_map(const Graph_t_in &dag_in) override;
-        std::vector<std::vector<vertex_idx_t<Graph_t_in>>> generate_vertex_expansion_map(const Graph_t_in &dag_in, vertex_idx_t<Graph_t_in> &diff);
-
-        inline void setParameters(const SarkarParams::Parameters< v_workw_t<Graph_t_in> >& params_) { params = params_; };
-        inline SarkarParams::Parameters< v_workw_t<Graph_t_in> >& getParameters() { return params; };
-        inline const SarkarParams::Parameters< v_workw_t<Graph_t_in> >& getParameters() const { return params; };
-
-        Sarkar(SarkarParams::Parameters< v_workw_t<Graph_t_in> > params_ = SarkarParams::Parameters< v_workw_t<Graph_t_in> >()) : params(params_) {};
-
-        Sarkar(const Sarkar &) = default;
-        Sarkar(Sarkar &&) = default;
-        Sarkar &operator=(const Sarkar &) = default;
-        Sarkar &operator=(Sarkar &&) = default;
-        virtual ~Sarkar() override = default;
-
-        std::string getCoarserName() const override { return "Sarkar"; }
+  private:
+    SarkarParams::Parameters<v_workw_t<Graph_t_in>> params;
+
+    std::vector<vertex_idx_t<Graph_t_in>> getBotPosetMap(const Graph_t_in &graph) const;
+    std::vector<v_workw_t<Graph_t_in>> getTopDistance(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph) const;
+    std::vector<v_workw_t<Graph_t_in>> getBotDistance(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph) const;
+
+    vertex_idx_t<Graph_t_in> singleContraction(v_workw_t<Graph_t_in> commCost,
+                                               const Graph_t_in &graph,
+                                               std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
+    vertex_idx_t<Graph_t_in> allChildrenContraction(v_workw_t<Graph_t_in> commCost,
+                                                    const Graph_t_in &graph,
+                                                    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
+    vertex_idx_t<Graph_t_in> someChildrenContraction(v_workw_t<Graph_t_in> commCost,
+                                                     const Graph_t_in &graph,
+                                                     std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
+    vertex_idx_t<Graph_t_in> allParentsContraction(v_workw_t<Graph_t_in> commCost,
+                                                   const Graph_t_in &graph,
+                                                   std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
+    vertex_idx_t<Graph_t_in> someParentsContraction(v_workw_t<Graph_t_in> commCost,
+                                                    const Graph_t_in &graph,
+                                                    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
+    vertex_idx_t<Graph_t_in> levelContraction(v_workw_t<Graph_t_in> commCost,
+                                              const Graph_t_in &graph,
+                                              std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
+
+    vertex_idx_t<Graph_t_in> homogeneous_buffer_merge(v_workw_t<Graph_t_in> commCost,
+                                                      const Graph_t_in &graph,
+                                                      std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
+    std::vector<std::size_t> homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const;
+
+    std::vector<std::size_t> computeNodeHashes(const Graph_t_in &graph,
+                                               const std::vector<vertex_idx_t<Graph_t_in>> &vertexPoset,
+                                               const std::vector<v_workw_t<Graph_t_in>> &dist) const;
+
+  public:
+    virtual std::vector<std::vector<vertex_idx_t<Graph_t_in>>> generate_vertex_expansion_map(const Graph_t_in &dag_in) override;
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> generate_vertex_expansion_map(const Graph_t_in &dag_in,
+                                                                                     vertex_idx_t<Graph_t_in> &diff);
+
+    inline void setParameters(const SarkarParams::Parameters<v_workw_t<Graph_t_in>> &params_) { params = params_; };
+
+    inline SarkarParams::Parameters<v_workw_t<Graph_t_in>> &getParameters() { return params; };
+
+    inline const SarkarParams::Parameters<v_workw_t<Graph_t_in>> &getParameters() const { return params; };
+
+    Sarkar(SarkarParams::Parameters<v_workw_t<Graph_t_in>> params_ = SarkarParams::Parameters<v_workw_t<Graph_t_in>>())
+        : params(params_) {};
+
+    Sarkar(const Sarkar &) = default;
+    Sarkar(Sarkar &&) = default;
+    Sarkar &operator=(const Sarkar &) = default;
+    Sarkar &operator=(Sarkar &&) = default;
+    virtual ~Sarkar() override = default;
+
+    std::string getCoarserName() const override { return "Sarkar"; }
 };
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector< vertex_idx_t<Graph_t_in> > Sarkar<Graph_t_in, Graph_t_out>::getBotPosetMap(const Graph_t_in &graph) const {
-    std::vector< vertex_idx_t<Graph_t_in> > botPosetMap = get_bottom_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<vertex_idx_t<Graph_t_in>> Sarkar<Graph_t_in, Graph_t_out>::getBotPosetMap(const Graph_t_in &graph) const {
+    std::vector<vertex_idx_t<Graph_t_in>> botPosetMap = get_bottom_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
 
     vertex_idx_t<Graph_t_in> max = *std::max_element(botPosetMap.begin(), botPosetMap.end());
     ++max;
@@ -103,9 +136,10 @@ std::vector< vertex_idx_t<Graph_t_in> > Sarkar<Graph_t_in, Graph_t_out>::getBotP
     return botPosetMap;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector< v_workw_t<Graph_t_in> > Sarkar<Graph_t_in, Graph_t_out>::getTopDistance(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph) const {
-    std::vector< v_workw_t<Graph_t_in> > topDist(graph.num_vertices(), 0);
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<v_workw_t<Graph_t_in>> Sarkar<Graph_t_in, Graph_t_out>::getTopDistance(v_workw_t<Graph_t_in> commCost,
+                                                                                   const Graph_t_in &graph) const {
+    std::vector<v_workw_t<Graph_t_in>> topDist(graph.num_vertices(), 0);
 
     for (const auto &vertex : GetTopOrder<Graph_t_in>(graph)) {
         v_workw_t<Graph_t_in> max_temp = 0;
@@ -123,9 +157,10 @@ std::vector< v_workw_t<Graph_t_in> > Sarkar<Graph_t_in, Graph_t_out>::getTopDist
     return topDist;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector< v_workw_t<Graph_t_in> > Sarkar<Graph_t_in, Graph_t_out>::getBotDistance(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph) const {
-    std::vector< v_workw_t<Graph_t_in> > botDist(graph.num_vertices(), 0);
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<v_workw_t<Graph_t_in>> Sarkar<Graph_t_in, Graph_t_out>::getBotDistance(v_workw_t<Graph_t_in> commCost,
+                                                                                   const Graph_t_in &graph) const {
+    std::vector<v_workw_t<Graph_t_in>> botDist(graph.num_vertices(), 0);
 
     for (const auto &vertex : GetTopOrderReverse<Graph_t_in>(graph)) {
         v_workw_t<Graph_t_in> max_temp = 0;
@@ -143,33 +178,47 @@ std::vector< v_workw_t<Graph_t_in> > Sarkar<Graph_t_in, Graph_t_out>::getBotDist
     return botDist;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::singleContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
+template <typename Graph_t_in, typename Graph_t_out>
+vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::singleContraction(
+    v_workw_t<Graph_t_in> commCost,
+    const Graph_t_in &graph,
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
     using VertexType = vertex_idx_t<Graph_t_in>;
     assert(expansionMapOutput.size() == 0);
 
-    const std::vector< vertex_idx_t<Graph_t_in> > vertexPoset = params.useTopPoset ? get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph) : getBotPosetMap(graph);
-    const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
-    const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
+    const std::vector<vertex_idx_t<Graph_t_in>> vertexPoset
+        = params.useTopPoset ? get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph) : getBotPosetMap(graph);
+    const std::vector<v_workw_t<Graph_t_in>> topDist = getTopDistance(commCost, graph);
+    const std::vector<v_workw_t<Graph_t_in>> botDist = getBotDistance(commCost, graph);
 
     auto cmp = [](const std::tuple<long, VertexType, VertexType> &lhs, const std::tuple<long, VertexType, VertexType> &rhs) {
         return (std::get<0>(lhs) > std::get<0>(rhs))
-                || ((std::get<0>(lhs) == std::get<0>(rhs)) && (std::get<1>(lhs) < std::get<1>(rhs)))
-                || ((std::get<0>(lhs) == std::get<0>(rhs)) && (std::get<1>(lhs) == std::get<1>(rhs)) && (std::get<2>(lhs) < std::get<2>(rhs)));
+               || ((std::get<0>(lhs) == std::get<0>(rhs)) && (std::get<1>(lhs) < std::get<1>(rhs)))
+               || ((std::get<0>(lhs) == std::get<0>(rhs)) && (std::get<1>(lhs) == std::get<1>(rhs))
+                   && (std::get<2>(lhs) < std::get<2>(rhs)));
     };
     std::set<std::tuple<long, VertexType, VertexType>, decltype(cmp)> edgePriority(cmp);
 
     for (const VertexType &edgeSrc : graph.vertices()) {
         for (const VertexType &edgeTgt : graph.children(edgeSrc)) {
-
             if constexpr (has_typed_vertices_v<Graph_t_in>) {
-                if (graph.vertex_type(edgeSrc) != graph.vertex_type(edgeTgt)) continue;
+                if (graph.vertex_type(edgeSrc) != graph.vertex_type(edgeTgt)) {
+                    continue;
+                }
             }
 
-            if (vertexPoset[edgeSrc] + 1 != vertexPoset[edgeTgt]) continue;
-            if (topDist[edgeSrc] + commCost + graph.vertex_work_weight(edgeTgt) != topDist[edgeTgt]) continue;
-            if (botDist[edgeTgt] + commCost + graph.vertex_work_weight(edgeSrc) != botDist[edgeSrc]) continue;
-            if (graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt) > params.maxWeight) continue;
+            if (vertexPoset[edgeSrc] + 1 != vertexPoset[edgeTgt]) {
+                continue;
+            }
+            if (topDist[edgeSrc] + commCost + graph.vertex_work_weight(edgeTgt) != topDist[edgeTgt]) {
+                continue;
+            }
+            if (botDist[edgeTgt] + commCost + graph.vertex_work_weight(edgeSrc) != botDist[edgeSrc]) {
+                continue;
+            }
+            if (graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt) > params.maxWeight) {
+                continue;
+            }
 
             v_workw_t<Graph_t_in> maxPath = topDist[edgeSrc] + botDist[edgeTgt] + commCost;
             v_workw_t<Graph_t_in> maxParentDist = 0;
@@ -179,19 +228,24 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::singleContraction(v_wo
                 maxParentDist = std::max(maxParentDist, topDist[par] + commCost);
             }
             for (const auto &par : graph.parents(edgeTgt)) {
-                if (par == edgeSrc) continue;
+                if (par == edgeSrc) {
+                    continue;
+                }
                 maxParentDist = std::max(maxParentDist, topDist[par] + commCost);
             }
 
             for (const auto &chld : graph.children(edgeSrc)) {
-                if (chld == edgeTgt) continue;
+                if (chld == edgeTgt) {
+                    continue;
+                }
                 maxChildDist = std::max(maxChildDist, botDist[chld] + commCost);
             }
             for (const auto &chld : graph.children(edgeTgt)) {
                 maxChildDist = std::max(maxChildDist, botDist[chld] + commCost);
             }
 
-            v_workw_t<Graph_t_in> newMaxPath = maxParentDist + maxChildDist + graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt);
+            v_workw_t<Graph_t_in> newMaxPath
+                = maxParentDist + maxChildDist + graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt);
             long savings = static_cast<long>(maxPath) - static_cast<long>(newMaxPath);
 
             // cannot have leniency here as it may destroy symmetries
@@ -204,8 +258,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::singleContraction(v_wo
     std::vector<bool> partitionedSourceFlag(graph.num_vertices(), false);
     std::vector<bool> partitionedTargetFlag(graph.num_vertices(), false);
 
-    vertex_idx_t<Graph_t_in> maxCorseningNum = graph.num_vertices() - static_cast< vertex_idx_t<Graph_t_in> >(static_cast<double>(graph.num_vertices()) * params.geomDecay);
-
+    vertex_idx_t<Graph_t_in> maxCorseningNum
+        = graph.num_vertices()
+          - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
 
     vertex_idx_t<Graph_t_in> counter = 0;
     long minSave = std::numeric_limits<long>::lowest();
@@ -215,13 +270,23 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::singleContraction(v_wo
         const VertexType &edgeTgt = std::get<2>(*prioIter);
 
         // Iterations halt
-        if (edgeSave < minSave) break;
+        if (edgeSave < minSave) {
+            break;
+        }
 
         // Check whether we can glue
-        if (partitionedSourceFlag[edgeSrc]) continue;
-        if (partitionedSourceFlag[edgeTgt]) continue;
-        if (partitionedTargetFlag[edgeSrc]) continue;
-        if (partitionedTargetFlag[edgeTgt]) continue;
+        if (partitionedSourceFlag[edgeSrc]) {
+            continue;
+        }
+        if (partitionedSourceFlag[edgeTgt]) {
+            continue;
+        }
+        if (partitionedTargetFlag[edgeSrc]) {
+            continue;
+        }
+        if (partitionedTargetFlag[edgeTgt]) {
+            continue;
+        }
 
         bool shouldSkipSrc = false;
         for (const VertexType &chld : graph.children(edgeSrc)) {
@@ -237,7 +302,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::singleContraction(v_wo
                 break;
             }
         }
-        if (shouldSkipSrc && shouldSkipTgt) continue;
+        if (shouldSkipSrc && shouldSkipTgt) {
+            continue;
+        }
 
         // Adding to partition
         expansionMapOutput.emplace_back(std::initializer_list<VertexType>{edgeSrc, edgeTgt});
@@ -251,8 +318,12 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::singleContraction(v_wo
 
     expansionMapOutput.reserve(graph.num_vertices() - counter);
     for (const VertexType &vert : graph.vertices()) {
-        if (partitionedSourceFlag[vert]) continue;
-        if (partitionedTargetFlag[vert]) continue;
+        if (partitionedSourceFlag[vert]) {
+            continue;
+        }
+        if (partitionedTargetFlag[vert]) {
+            continue;
+        }
 
         expansionMapOutput.emplace_back(std::initializer_list<VertexType>{vert});
     }
@@ -260,23 +331,27 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::singleContraction(v_wo
     return counter;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
+template <typename Graph_t_in, typename Graph_t_out>
+vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction(
+    v_workw_t<Graph_t_in> commCost,
+    const Graph_t_in &graph,
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
     using VertexType = vertex_idx_t<Graph_t_in>;
     assert(expansionMapOutput.size() == 0);
 
-    const std::vector< vertex_idx_t<Graph_t_in> > vertexPoset = get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
-    const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
-    const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
+    const std::vector<vertex_idx_t<Graph_t_in>> vertexPoset = get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
+    const std::vector<v_workw_t<Graph_t_in>> topDist = getTopDistance(commCost, graph);
+    const std::vector<v_workw_t<Graph_t_in>> botDist = getBotDistance(commCost, graph);
 
     auto cmp = [](const std::pair<long, VertexType> &lhs, const std::pair<long, VertexType> &rhs) {
-        return (lhs.first > rhs.first)
-                || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
+        return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
     };
     std::set<std::pair<long, VertexType>, decltype(cmp)> vertPriority(cmp);
 
     for (const VertexType &groupHead : graph.vertices()) {
-        if (graph.out_degree(groupHead) < 2) continue;
+        if (graph.out_degree(groupHead) < 2) {
+            continue;
+        }
 
         bool shouldSkip = false;
         if constexpr (has_typed_vertices_v<Graph_t_in>) {
@@ -287,19 +362,25 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction
                 }
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
         for (const VertexType &groupFoot : graph.children(groupHead)) {
             if (vertexPoset[groupFoot] != vertexPoset[groupHead] + 1) {
                 shouldSkip = true;
                 break;
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
         v_workw_t<Graph_t_in> combined_weight = graph.vertex_work_weight(groupHead);
         for (const VertexType &groupFoot : graph.children(groupHead)) {
             combined_weight += graph.vertex_work_weight(groupFoot);
         }
-        if (combined_weight > params.maxWeight) continue;
+        if (combined_weight > params.maxWeight) {
+            continue;
+        }
 
         v_workw_t<Graph_t_in> maxPath = topDist[groupHead] + botDist[groupHead] - graph.vertex_work_weight(groupHead);
         for (const VertexType &chld : graph.children(groupHead)) {
@@ -314,7 +395,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction
         }
         for (const VertexType &groupFoot : graph.children(groupHead)) {
             for (const VertexType &par : graph.parents(groupFoot)) {
-                if (par == groupHead) continue;
+                if (par == groupHead) {
+                    continue;
+                }
                 maxParentDist = std::max(maxParentDist, topDist[par] + commCost);
             }
         }
@@ -338,7 +421,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction
 
     std::vector<bool> partitionedFlag(graph.num_vertices(), false);
 
-    vertex_idx_t<Graph_t_in> maxCorseningNum = graph.num_vertices() - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
+    vertex_idx_t<Graph_t_in> maxCorseningNum
+        = graph.num_vertices()
+          - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
 
     vertex_idx_t<Graph_t_in> counter = 0;
     long minSave = std::numeric_limits<long>::lowest();
@@ -347,10 +432,14 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction
         const VertexType &groupHead = prioIter->second;
 
         // Iterations halt
-        if (vertSave < minSave) break;
+        if (vertSave < minSave) {
+            break;
+        }
 
         // Check whether we can glue
-        if (partitionedFlag[groupHead]) continue;
+        if (partitionedFlag[groupHead]) {
+            continue;
+        }
         bool shouldSkip = false;
         for (const VertexType &groupFoot : graph.children(groupHead)) {
             if (partitionedFlag[groupFoot]) {
@@ -358,7 +447,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction
                 break;
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
 
         // Adding to partition
         std::vector<VertexType> part;
@@ -368,8 +459,8 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction
             part.emplace_back(groupFoot);
         }
 
-        expansionMapOutput.emplace_back( std::move(part) );
-        counter += static_cast<vertex_idx_t<Graph_t_in>>( graph.out_degree(groupHead) );
+        expansionMapOutput.emplace_back(std::move(part));
+        counter += static_cast<vertex_idx_t<Graph_t_in>>(graph.out_degree(groupHead));
         if (counter > maxCorseningNum) {
             minSave = vertSave;
         }
@@ -380,30 +471,36 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allChildrenContraction
     }
 
     for (const VertexType &vert : graph.vertices()) {
-        if (partitionedFlag[vert]) continue;
+        if (partitionedFlag[vert]) {
+            continue;
+        }
         expansionMapOutput.emplace_back(std::initializer_list<VertexType>{vert});
     }
 
     return counter;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
+template <typename Graph_t_in, typename Graph_t_out>
+vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(
+    v_workw_t<Graph_t_in> commCost,
+    const Graph_t_in &graph,
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
     using VertexType = vertex_idx_t<Graph_t_in>;
     assert(expansionMapOutput.size() == 0);
 
-    const std::vector< vertex_idx_t<Graph_t_in> > vertexPoset = getBotPosetMap(graph);
-    const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
-    const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
+    const std::vector<vertex_idx_t<Graph_t_in>> vertexPoset = getBotPosetMap(graph);
+    const std::vector<v_workw_t<Graph_t_in>> topDist = getTopDistance(commCost, graph);
+    const std::vector<v_workw_t<Graph_t_in>> botDist = getBotDistance(commCost, graph);
 
     auto cmp = [](const std::pair<long, VertexType> &lhs, const std::pair<long, VertexType> &rhs) {
-        return (lhs.first > rhs.first)
-                || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
+        return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
     };
     std::set<std::pair<long, VertexType>, decltype(cmp)> vertPriority(cmp);
 
     for (const VertexType &groupFoot : graph.vertices()) {
-        if (graph.in_degree(groupFoot) < 2) continue;
+        if (graph.in_degree(groupFoot) < 2) {
+            continue;
+        }
 
         bool shouldSkip = false;
         if constexpr (has_typed_vertices_v<Graph_t_in>) {
@@ -414,19 +511,25 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(
                 }
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
         for (const VertexType &groupHead : graph.parents(groupFoot)) {
             if (vertexPoset[groupFoot] != vertexPoset[groupHead] + 1) {
                 shouldSkip = true;
                 break;
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
         v_workw_t<Graph_t_in> combined_weight = graph.vertex_work_weight(groupFoot);
         for (const VertexType &groupHead : graph.parents(groupFoot)) {
             combined_weight += graph.vertex_work_weight(groupHead);
         }
-        if (combined_weight > params.maxWeight) continue;
+        if (combined_weight > params.maxWeight) {
+            continue;
+        }
 
         v_workw_t<Graph_t_in> maxPath = topDist[groupFoot] + botDist[groupFoot] - graph.vertex_work_weight(groupFoot);
         for (const VertexType &par : graph.parents(groupFoot)) {
@@ -441,7 +544,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(
         }
         for (const VertexType &groupHead : graph.parents(groupFoot)) {
             for (const VertexType &chld : graph.children(groupHead)) {
-                if (chld == groupFoot) continue;
+                if (chld == groupFoot) {
+                    continue;
+                }
                 maxChildDist = std::max(maxChildDist, botDist[chld] + commCost);
             }
         }
@@ -465,7 +570,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(
 
     std::vector<bool> partitionedFlag(graph.num_vertices(), false);
 
-    vertex_idx_t<Graph_t_in> maxCorseningNum = graph.num_vertices() - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
+    vertex_idx_t<Graph_t_in> maxCorseningNum
+        = graph.num_vertices()
+          - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
 
     vertex_idx_t<Graph_t_in> counter = 0;
     long minSave = std::numeric_limits<long>::lowest();
@@ -474,10 +581,14 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(
         const VertexType &groupFoot = prioIter->second;
 
         // Iterations halt
-        if (vertSave < minSave) break;
+        if (vertSave < minSave) {
+            break;
+        }
 
         // Check whether we can glue
-        if (partitionedFlag[groupFoot]) continue;
+        if (partitionedFlag[groupFoot]) {
+            continue;
+        }
         bool shouldSkip = false;
         for (const VertexType &groupHead : graph.parents(groupFoot)) {
             if (partitionedFlag[groupHead]) {
@@ -485,7 +596,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(
                 break;
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
 
         // Adding to partition
         std::vector<VertexType> part;
@@ -495,8 +608,8 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(
             part.emplace_back(groupHead);
         }
 
-        expansionMapOutput.emplace_back( std::move(part) );
-        counter += static_cast<vertex_idx_t<Graph_t_in>>( graph.in_degree(groupFoot) );
+        expansionMapOutput.emplace_back(std::move(part));
+        counter += static_cast<vertex_idx_t<Graph_t_in>>(graph.in_degree(groupFoot));
         if (counter > maxCorseningNum) {
             minSave = vertSave;
         }
@@ -507,89 +620,62 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::allParentsContraction(
     }
 
     for (const VertexType &vert : graph.vertices()) {
-        if (partitionedFlag[vert]) continue;
+        if (partitionedFlag[vert]) {
+            continue;
+        }
         expansionMapOutput.emplace_back(std::initializer_list<VertexType>{vert});
     }
 
     return counter;
 }
 
-
-
-
-
-
-
-
-
-
-
-
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector<std::vector<vertex_idx_t<Graph_t_in>>> Sarkar<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(const Graph_t_in &dag_in, vertex_idx_t<Graph_t_in> &diff) {
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<std::vector<vertex_idx_t<Graph_t_in>>> Sarkar<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(
+    const Graph_t_in &dag_in, vertex_idx_t<Graph_t_in> &diff) {
     std::vector<std::vector<vertex_idx_t<Graph_t_in>>> expansionMap;
 
     // std::cout << "Mode: " << static_cast<int>(params.mode) << "\n";
-    switch (params.mode)
-    {
-        case SarkarParams::Mode::LINES:
-            {
-                diff = singleContraction(params.commCost, dag_in, expansionMap);
-            }
-            break;
+    switch (params.mode) {
+        case SarkarParams::Mode::LINES: {
+            diff = singleContraction(params.commCost, dag_in, expansionMap);
+        } break;
 
-        case SarkarParams::Mode::FAN_IN_FULL:
-            {
-                diff = allParentsContraction(params.commCost, dag_in, expansionMap);
-            }
-            break;
+        case SarkarParams::Mode::FAN_IN_FULL: {
+            diff = allParentsContraction(params.commCost, dag_in, expansionMap);
+        } break;
 
-        case SarkarParams::Mode::FAN_IN_PARTIAL:
-            {
-                diff = someParentsContraction(params.commCost, dag_in, expansionMap);
-            }
-            break;
+        case SarkarParams::Mode::FAN_IN_PARTIAL: {
+            diff = someParentsContraction(params.commCost, dag_in, expansionMap);
+        } break;
 
-        case SarkarParams::Mode::FAN_OUT_FULL:
-            {
-                diff = allChildrenContraction(params.commCost, dag_in, expansionMap);
-            }
-            break;
+        case SarkarParams::Mode::FAN_OUT_FULL: {
+            diff = allChildrenContraction(params.commCost, dag_in, expansionMap);
+        } break;
 
-        case SarkarParams::Mode::FAN_OUT_PARTIAL:
-            {
-                diff = someChildrenContraction(params.commCost, dag_in, expansionMap);
-            }
-            break;
+        case SarkarParams::Mode::FAN_OUT_PARTIAL: {
+            diff = someChildrenContraction(params.commCost, dag_in, expansionMap);
+        } break;
 
-        case SarkarParams::Mode::LEVEL_EVEN:
-            {
-                diff = levelContraction(params.commCost, dag_in, expansionMap);
-            }
-            break;
+        case SarkarParams::Mode::LEVEL_EVEN: {
+            diff = levelContraction(params.commCost, dag_in, expansionMap);
+        } break;
 
-        case SarkarParams::Mode::LEVEL_ODD:
-            {
-                diff = levelContraction(params.commCost, dag_in, expansionMap);
-            }
-            break;
+        case SarkarParams::Mode::LEVEL_ODD: {
+            diff = levelContraction(params.commCost, dag_in, expansionMap);
+        } break;
 
         case SarkarParams::Mode::FAN_IN_BUFFER:
         case SarkarParams::Mode::FAN_OUT_BUFFER:
-        case SarkarParams::Mode::HOMOGENEOUS_BUFFER:
-            {
-                diff = homogeneous_buffer_merge(params.commCost, dag_in, expansionMap);
-            }
-            break;
-
-        default:
-            {
-                #ifdef __cpp_lib_unreachable
-                    std::unreachable();
-                #endif
-                assert(false);
-            }
-            break;
+        case SarkarParams::Mode::HOMOGENEOUS_BUFFER: {
+            diff = homogeneous_buffer_merge(params.commCost, dag_in, expansionMap);
+        } break;
+
+        default: {
+#ifdef __cpp_lib_unreachable
+            std::unreachable();
+#endif
+            assert(false);
+        } break;
     }
 
     // std::cout << " Diff: " << diff << '\n';
@@ -597,34 +683,38 @@ std::vector<std::vector<vertex_idx_t<Graph_t_in>>> Sarkar<Graph_t_in, Graph_t_ou
     return expansionMap;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector<std::vector<vertex_idx_t<Graph_t_in>>> Sarkar<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(const Graph_t_in &dag_in) {
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<std::vector<vertex_idx_t<Graph_t_in>>> Sarkar<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(
+    const Graph_t_in &dag_in) {
     vertex_idx_t<Graph_t_in> dummy;
     return generate_vertex_expansion_map(dag_in, dummy);
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
+template <typename Graph_t_in, typename Graph_t_out>
+vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContraction(
+    v_workw_t<Graph_t_in> commCost,
+    const Graph_t_in &graph,
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
     using VertexType = vertex_idx_t<Graph_t_in>;
     assert(expansionMapOutput.size() == 0);
 
-    const std::vector< vertex_idx_t<Graph_t_in> > vertexPoset = get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
-    const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
-    const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
+    const std::vector<vertex_idx_t<Graph_t_in>> vertexPoset = get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
+    const std::vector<v_workw_t<Graph_t_in>> topDist = getTopDistance(commCost, graph);
+    const std::vector<v_workw_t<Graph_t_in>> botDist = getBotDistance(commCost, graph);
 
     auto cmp = [](const std::pair<long, std::vector<VertexType>> &lhs, const std::pair<long, std::vector<VertexType>> &rhs) {
-        return (lhs.first > rhs.first)
-                || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
+        return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
     };
     std::set<std::pair<long, std::vector<VertexType>>, decltype(cmp)> vertPriority(cmp);
 
     for (const VertexType &groupHead : graph.vertices()) {
-        if (graph.out_degree(groupHead) < 2) continue;
+        if (graph.out_degree(groupHead) < 2) {
+            continue;
+        }
 
         auto cmp_chld = [&topDist, &botDist](const VertexType &lhs, const VertexType &rhs) {
-            return (topDist[lhs] < topDist[rhs])
-                    || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] > botDist[rhs]))
-                    || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] == botDist[rhs]) && (lhs < rhs));
+            return (topDist[lhs] < topDist[rhs]) || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] > botDist[rhs]))
+                   || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] == botDist[rhs]) && (lhs < rhs));
         };
         std::set<VertexType, decltype(cmp_chld)> childrenPriority(cmp_chld);
         for (const VertexType &chld : graph.children(groupHead)) {
@@ -632,9 +722,13 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContractio
                 childrenPriority.emplace(chld);
             }
         }
-        if (childrenPriority.size() < 2) continue;
+        if (childrenPriority.size() < 2) {
+            continue;
+        }
 
-        std::vector< std::pair< typename std::set<VertexType, decltype(cmp_chld)>::const_iterator, typename std::set<VertexType, decltype(cmp_chld)>::const_iterator > > admissble_children_groups;
+        std::vector<std::pair<typename std::set<VertexType, decltype(cmp_chld)>::const_iterator,
+                              typename std::set<VertexType, decltype(cmp_chld)>::const_iterator>>
+            admissble_children_groups;
         for (auto chld_iter_start = childrenPriority.cbegin(); chld_iter_start != childrenPriority.cend();) {
             if constexpr (has_typed_vertices_v<Graph_t_in>) {
                 if (graph.vertex_type(groupHead) != graph.vertex_type(*chld_iter_start)) {
@@ -646,7 +740,8 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContractio
             const v_workw_t<Graph_t_in> t_dist = topDist[*chld_iter_start];
             const v_workw_t<Graph_t_in> b_dist = botDist[*chld_iter_start];
             auto chld_iter_end = chld_iter_start;
-            while (chld_iter_end != childrenPriority.cend() && t_dist == topDist[*chld_iter_end] && b_dist == botDist[*chld_iter_end]) {
+            while (chld_iter_end != childrenPriority.cend() && t_dist == topDist[*chld_iter_end]
+                   && b_dist == botDist[*chld_iter_end]) {
                 if constexpr (has_typed_vertices_v<Graph_t_in>) {
                     if (graph.vertex_type(groupHead) != graph.vertex_type(*chld_iter_end)) {
                         break;
@@ -674,7 +769,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContractio
                 contractionChildrenSet.emplace(*it);
                 added_weight += graph.vertex_work_weight(*it);
             }
-            if (added_weight > params.maxWeight) break;
+            if (added_weight > params.maxWeight) {
+                break;
+            }
 
             v_workw_t<Graph_t_in> maxPath = 0;
             for (const VertexType &vert : contractionEnsemble) {
@@ -686,7 +783,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContractio
 
             for (const VertexType &vert : contractionEnsemble) {
                 for (const VertexType &par : graph.parents(vert)) {
-                    if (par == groupHead) continue;
+                    if (par == groupHead) {
+                        continue;
+                    }
                     maxParentDist = std::max(maxParentDist, topDist[par] + commCost);
                 }
             }
@@ -718,7 +817,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContractio
     std::vector<bool> partitionedFlag(graph.num_vertices(), false);
     std::vector<bool> partitionedHeadFlag(graph.num_vertices(), false);
 
-    vertex_idx_t<Graph_t_in> maxCorseningNum = graph.num_vertices() - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
+    vertex_idx_t<Graph_t_in> maxCorseningNum
+        = graph.num_vertices()
+          - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
 
     vertex_idx_t<Graph_t_in> counter = 0;
     long minSave = std::numeric_limits<long>::lowest();
@@ -728,7 +829,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContractio
         const std::vector<VertexType> &contractionEnsemble = prioIter->second;
 
         // Iterations halt
-        if (vertSave < minSave) break;
+        if (vertSave < minSave) {
+            break;
+        }
 
         // Check whether we can glue
         bool shouldSkip = false;
@@ -738,21 +841,26 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContractio
                 break;
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
 
         for (const VertexType &chld : graph.children(groupHead)) {
-            if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), chld) == contractionEnsemble.cend()) && (vertexPoset[chld] == vertexPoset[groupHead] + 1)) {
+            if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), chld) == contractionEnsemble.cend())
+                && (vertexPoset[chld] == vertexPoset[groupHead] + 1)) {
                 if ((partitionedFlag[chld]) && (!partitionedHeadFlag[chld])) {
                     shouldSkip = true;
                     break;
                 }
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
 
         // Adding to partition
         expansionMapOutput.emplace_back(contractionEnsemble);
-        counter += static_cast<vertex_idx_t<Graph_t_in>>( contractionEnsemble.size() ) - 1;
+        counter += static_cast<vertex_idx_t<Graph_t_in>>(contractionEnsemble.size()) - 1;
         if (counter > maxCorseningNum) {
             minSave = vertSave;
         }
@@ -763,35 +871,40 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someChildrenContractio
     }
 
     for (const VertexType &vert : graph.vertices()) {
-        if (partitionedFlag[vert]) continue;
+        if (partitionedFlag[vert]) {
+            continue;
+        }
         expansionMapOutput.emplace_back(std::initializer_list<VertexType>{vert});
     }
 
     return counter;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someParentsContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
+template <typename Graph_t_in, typename Graph_t_out>
+vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someParentsContraction(
+    v_workw_t<Graph_t_in> commCost,
+    const Graph_t_in &graph,
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
     using VertexType = vertex_idx_t<Graph_t_in>;
     assert(expansionMapOutput.size() == 0);
 
-    const std::vector< vertex_idx_t<Graph_t_in> > vertexPoset = getBotPosetMap(graph);
-    const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
-    const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
+    const std::vector<vertex_idx_t<Graph_t_in>> vertexPoset = getBotPosetMap(graph);
+    const std::vector<v_workw_t<Graph_t_in>> topDist = getTopDistance(commCost, graph);
+    const std::vector<v_workw_t<Graph_t_in>> botDist = getBotDistance(commCost, graph);
 
     auto cmp = [](const std::pair<long, std::vector<VertexType>> &lhs, const std::pair<long, std::vector<VertexType>> &rhs) {
-        return (lhs.first > rhs.first)
-                || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
+        return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
     };
     std::set<std::pair<long, std::vector<VertexType>>, decltype(cmp)> vertPriority(cmp);
 
     for (const VertexType &groupFoot : graph.vertices()) {
-        if (graph.in_degree(groupFoot) < 2) continue;
+        if (graph.in_degree(groupFoot) < 2) {
+            continue;
+        }
 
         auto cmp_par = [&topDist, &botDist](const VertexType &lhs, const VertexType &rhs) {
-            return (botDist[lhs] < botDist[rhs])
-                    || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] > topDist[rhs]))
-                    || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] == topDist[rhs]) && (lhs < rhs));
+            return (botDist[lhs] < botDist[rhs]) || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] > topDist[rhs]))
+                   || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] == topDist[rhs]) && (lhs < rhs));
         };
         std::set<VertexType, decltype(cmp_par)> parentsPriority(cmp_par);
         for (const VertexType &par : graph.parents(groupFoot)) {
@@ -799,9 +912,13 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someParentsContraction
                 parentsPriority.emplace(par);
             }
         }
-        if (parentsPriority.size() < 2) continue;
+        if (parentsPriority.size() < 2) {
+            continue;
+        }
 
-        std::vector< std::pair< typename std::set<VertexType, decltype(cmp_par)>::const_iterator, typename std::set<VertexType, decltype(cmp_par)>::const_iterator > > admissble_parent_groups;
+        std::vector<std::pair<typename std::set<VertexType, decltype(cmp_par)>::const_iterator,
+                              typename std::set<VertexType, decltype(cmp_par)>::const_iterator>>
+            admissble_parent_groups;
         for (auto par_iter_start = parentsPriority.cbegin(); par_iter_start != parentsPriority.cend();) {
             if constexpr (has_typed_vertices_v<Graph_t_in>) {
                 if (graph.vertex_type(groupFoot) != graph.vertex_type(*par_iter_start)) {
@@ -841,19 +958,23 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someParentsContraction
                 contractionParentsSet.emplace(*it);
                 added_weight += graph.vertex_work_weight(*it);
             }
-            if (added_weight > params.maxWeight) break;
+            if (added_weight > params.maxWeight) {
+                break;
+            }
 
             v_workw_t<Graph_t_in> maxPath = 0;
             for (const VertexType &vert : contractionEnsemble) {
                 maxPath = std::max(maxPath, topDist[vert] + botDist[vert] - graph.vertex_work_weight(vert));
             }
-            
+
             v_workw_t<Graph_t_in> maxParentDist = 0;
-            v_workw_t<Graph_t_in> maxChildDist = 0;       
+            v_workw_t<Graph_t_in> maxChildDist = 0;
 
             for (const VertexType &vert : contractionEnsemble) {
                 for (const VertexType &chld : graph.children(vert)) {
-                    if (chld == groupFoot) continue;
+                    if (chld == groupFoot) {
+                        continue;
+                    }
                     maxChildDist = std::max(maxChildDist, botDist[chld] + commCost);
                 }
             }
@@ -885,7 +1006,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someParentsContraction
     std::vector<bool> partitionedFlag(graph.num_vertices(), false);
     std::vector<bool> partitionedFootFlag(graph.num_vertices(), false);
 
-    vertex_idx_t<Graph_t_in> maxCorseningNum = graph.num_vertices() - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
+    vertex_idx_t<Graph_t_in> maxCorseningNum
+        = graph.num_vertices()
+          - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
 
     vertex_idx_t<Graph_t_in> counter = 0;
     long minSave = std::numeric_limits<long>::lowest();
@@ -895,7 +1018,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someParentsContraction
         const std::vector<VertexType> &contractionEnsemble = prioIter->second;
 
         // Iterations halt
-        if (vertSave < minSave) break;
+        if (vertSave < minSave) {
+            break;
+        }
 
         // Check whether we can glue
         bool shouldSkip = false;
@@ -905,21 +1030,26 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someParentsContraction
                 break;
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
 
         for (const VertexType &par : graph.parents(groupFoot)) {
-            if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), par) == contractionEnsemble.cend()) && (vertexPoset[par] + 1 == vertexPoset[groupFoot])) {
+            if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), par) == contractionEnsemble.cend())
+                && (vertexPoset[par] + 1 == vertexPoset[groupFoot])) {
                 if ((partitionedFlag[par]) && (!partitionedFootFlag[par])) {
                     shouldSkip = true;
                     break;
                 }
             }
         }
-        if (shouldSkip) continue;
+        if (shouldSkip) {
+            continue;
+        }
 
         // Adding to partition
         expansionMapOutput.emplace_back(contractionEnsemble);
-        counter += static_cast<vertex_idx_t<Graph_t_in>>( contractionEnsemble.size() ) - 1;
+        counter += static_cast<vertex_idx_t<Graph_t_in>>(contractionEnsemble.size()) - 1;
         if (counter > maxCorseningNum) {
             minSave = vertSave;
         }
@@ -930,43 +1060,48 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::someParentsContraction
     }
 
     for (const VertexType &vert : graph.vertices()) {
-        if (partitionedFlag[vert]) continue;
+        if (partitionedFlag[vert]) {
+            continue;
+        }
         expansionMapOutput.emplace_back(std::initializer_list<VertexType>{vert});
     }
 
     return counter;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
+template <typename Graph_t_in, typename Graph_t_out>
+vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(
+    v_workw_t<Graph_t_in> commCost,
+    const Graph_t_in &graph,
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
     using VertexType = vertex_idx_t<Graph_t_in>;
     assert(expansionMapOutput.size() == 0);
 
-    const std::vector< vertex_idx_t<Graph_t_in> > vertexPoset = params.useTopPoset ? get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph) : getBotPosetMap(graph);
-    const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
-    const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
+    const std::vector<vertex_idx_t<Graph_t_in>> vertexPoset
+        = params.useTopPoset ? get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph) : getBotPosetMap(graph);
+    const std::vector<v_workw_t<Graph_t_in>> topDist = getTopDistance(commCost, graph);
+    const std::vector<v_workw_t<Graph_t_in>> botDist = getBotDistance(commCost, graph);
 
     auto cmp = [](const std::pair<long, std::vector<VertexType>> &lhs, const std::pair<long, std::vector<VertexType>> &rhs) {
-        return (lhs.first > rhs.first)
-                || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
+        return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
     };
     std::set<std::pair<long, std::vector<VertexType>>, decltype(cmp)> vertPriority(cmp);
 
     const vertex_idx_t<Graph_t_in> minLevel = *std::min_element(vertexPoset.cbegin(), vertexPoset.cend());
     const vertex_idx_t<Graph_t_in> maxLevel = *std::max_element(vertexPoset.cbegin(), vertexPoset.cend());
 
-    const vertex_idx_t<Graph_t_in> parity = params.mode == SarkarParams::Mode::LEVEL_EVEN? 0 : 1;
+    const vertex_idx_t<Graph_t_in> parity = params.mode == SarkarParams::Mode::LEVEL_EVEN ? 0 : 1;
 
     std::vector<std::vector<vertex_idx_t<Graph_t_in>>> levels(maxLevel - minLevel + 1);
     for (const VertexType &vert : graph.vertices()) {
-        levels[ vertexPoset[vert] - minLevel ].emplace_back(vert);
+        levels[vertexPoset[vert] - minLevel].emplace_back(vert);
     }
 
     for (vertex_idx_t<Graph_t_in> headLevel = minLevel + parity; headLevel < maxLevel; headLevel += 2) {
         const vertex_idx_t<Graph_t_in> footLevel = headLevel + 1;
-        
-        const std::vector<vertex_idx_t<Graph_t_in>> &headVertices = levels[ headLevel - minLevel ];
-        const std::vector<vertex_idx_t<Graph_t_in>> &footVertices = levels[ footLevel - minLevel ];
+
+        const std::vector<vertex_idx_t<Graph_t_in>> &headVertices = levels[headLevel - minLevel];
+        const std::vector<vertex_idx_t<Graph_t_in>> &footVertices = levels[footLevel - minLevel];
 
         Union_Find_Universe<VertexType, std::size_t, v_workw_t<Graph_t_in>, v_memw_t<Graph_t_in>> uf;
         for (const VertexType &vert : headVertices) {
@@ -978,10 +1113,14 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(v_wor
 
         for (const VertexType &srcVert : headVertices) {
             for (const VertexType &tgtVert : graph.children(srcVert)) {
-                if (vertexPoset[tgtVert] != footLevel) continue;
-                
+                if (vertexPoset[tgtVert] != footLevel) {
+                    continue;
+                }
+
                 if constexpr (has_typed_vertices_v<Graph_t_in>) {
-                    if (graph.vertex_type(srcVert) != graph.vertex_type(tgtVert)) continue;
+                    if (graph.vertex_type(srcVert) != graph.vertex_type(tgtVert)) {
+                        continue;
+                    }
                 }
 
                 uf.join_by_name(srcVert, tgtVert);
@@ -990,8 +1129,12 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(v_wor
 
         std::vector<std::vector<VertexType>> components = uf.get_connected_components();
         for (std::vector<VertexType> &comp : components) {
-            if (comp.size() < 2) continue;
-            if (uf.get_weight_of_component_by_name(comp.at(0)) > params.maxWeight) continue; 
+            if (comp.size() < 2) {
+                continue;
+            }
+            if (uf.get_weight_of_component_by_name(comp.at(0)) > params.maxWeight) {
+                continue;
+            }
 
             std::sort(comp.begin(), comp.end());
 
@@ -1003,7 +1146,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(v_wor
             v_workw_t<Graph_t_in> maxParentDist = 0;
             for (const VertexType &vert : comp) {
                 for (const VertexType &par : graph.parents(vert)) {
-                    if (std::binary_search(comp.cbegin(), comp.cend(), par)) continue;
+                    if (std::binary_search(comp.cbegin(), comp.cend(), par)) {
+                        continue;
+                    }
 
                     maxParentDist = std::max(maxParentDist, topDist[par] + commCost);
                 }
@@ -1012,31 +1157,32 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(v_wor
             v_workw_t<Graph_t_in> maxChildDist = 0;
             for (const VertexType &vert : comp) {
                 for (const VertexType &chld : graph.children(vert)) {
-                    if (std::binary_search(comp.cbegin(), comp.cend(), chld)) continue;
+                    if (std::binary_search(comp.cbegin(), comp.cend(), chld)) {
+                        continue;
+                    }
 
                     maxChildDist = std::max(maxChildDist, botDist[chld] + commCost);
                 }
             }
 
-
             v_workw_t<Graph_t_in> newMaxPath = maxParentDist + maxChildDist;
             for (const VertexType &vert : comp) {
                 newMaxPath += graph.vertex_work_weight(vert);
             }
 
             long savings = static_cast<long>(maxPath) - static_cast<long>(newMaxPath);
-    
+
             if (savings + static_cast<long>(params.leniency * static_cast<double>(maxPath)) >= 0) {
                 vertPriority.emplace(savings, comp);
             }
-
         }
     }
 
     std::vector<bool> partitionedFlag(graph.num_vertices(), false);
 
-    vertex_idx_t<Graph_t_in> maxCorseningNum = graph.num_vertices() - static_cast< vertex_idx_t<Graph_t_in> >(static_cast<double>(graph.num_vertices()) * params.geomDecay);
-
+    vertex_idx_t<Graph_t_in> maxCorseningNum
+        = graph.num_vertices()
+          - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(graph.num_vertices()) * params.geomDecay);
 
     vertex_idx_t<Graph_t_in> counter = 0;
     long minSave = std::numeric_limits<long>::lowest();
@@ -1045,13 +1191,15 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(v_wor
         const std::vector<VertexType> &comp = prioIter->second;
 
         // Iterations halt
-        if (compSave < minSave) break;
+        if (compSave < minSave) {
+            break;
+        }
 
         // Check whether we can glue
         bool shouldSkipHead = false;
         bool shouldSkipFoot = false;
         for (const VertexType &vert : comp) {
-            if (((vertexPoset[vert] - minLevel - parity) % 2) == 0) {   // head vertex
+            if (((vertexPoset[vert] - minLevel - parity) % 2) == 0) {    // head vertex
                 for (const VertexType &chld : graph.children(vert)) {
                     if ((vertexPoset[chld] == vertexPoset[vert] + 1) && partitionedFlag[chld]) {
                         shouldSkipHead = true;
@@ -1066,11 +1214,13 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(v_wor
             }
         }
 
-        if (shouldSkipHead && shouldSkipFoot) continue;
+        if (shouldSkipHead && shouldSkipFoot) {
+            continue;
+        }
 
         // Adding to partition
         expansionMapOutput.emplace_back(comp);
-        counter += static_cast<vertex_idx_t<Graph_t_in>>( comp.size() - 1 );
+        counter += static_cast<vertex_idx_t<Graph_t_in>>(comp.size() - 1);
         if (counter > maxCorseningNum) {
             minSave = compSave;
         }
@@ -1082,16 +1232,20 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::levelContraction(v_wor
 
     expansionMapOutput.reserve(graph.num_vertices() - counter);
     for (const VertexType &vert : graph.vertices()) {
-        if (partitionedFlag[vert]) continue;
-        
+        if (partitionedFlag[vert]) {
+            continue;
+        }
+
         expansionMapOutput.emplace_back(std::initializer_list<VertexType>{vert});
     }
 
     return counter;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::computeNodeHashes(const Graph_t_in &graph, const std::vector< vertex_idx_t<Graph_t_in> > &vertexPoset, const std::vector< v_workw_t<Graph_t_in> > &dist) const {
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::computeNodeHashes(const Graph_t_in &graph,
+                                                                            const std::vector<vertex_idx_t<Graph_t_in>> &vertexPoset,
+                                                                            const std::vector<v_workw_t<Graph_t_in>> &dist) const {
     using VertexType = vertex_idx_t<Graph_t_in>;
 
     std::vector<std::size_t> hashes(graph.num_vertices());
@@ -1108,14 +1262,18 @@ std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::computeNodeHashes(cons
     return hashes;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const {
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::homogeneousMerge(const std::size_t number,
+                                                                           const std::size_t minSize,
+                                                                           const std::size_t maxSize) const {
     assert(minSize <= maxSize);
     assert(number > 0);
 
     std::size_t bestDiv = 1U;
     for (std::size_t div : divisorsList(number)) {
-        if (div > maxSize) continue;
+        if (div > maxSize) {
+            continue;
+        }
 
         if (div < minSize && bestDiv < div) {
             bestDiv = div;
@@ -1131,12 +1289,12 @@ std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::homogeneousMerge(const
 
     std::size_t bestScore = 0U;
     std::size_t bestBins = number / minSize;
-    for (std::size_t bins = std::max( number / maxSize, static_cast<std::size_t>(2U)); bins <= number / minSize; ++bins) {
+    for (std::size_t bins = std::max(number / maxSize, static_cast<std::size_t>(2U)); bins <= number / minSize; ++bins) {
         if (number % bins == 0U && number != bins) {
             return std::vector<std::size_t>(bins, number / bins);
         }
 
-        std::size_t score = std::min( divisorsList(number / bins).size(), divisorsList((number / bins) + 1).size() );
+        std::size_t score = std::min(divisorsList(number / bins).size(), divisorsList((number / bins) + 1).size());
         if (score >= bestScore) {
             bestScore = score;
             bestBins = bins;
@@ -1145,7 +1303,7 @@ std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::homogeneousMerge(const
 
     std::size_t remainder = number % bestBins;
     std::size_t size = number / bestBins;
-    
+
     std::vector<std::size_t> groups;
     for (std::size_t i = 0U; i < bestBins; ++i) {
         if (remainder != 0U) {
@@ -1159,15 +1317,18 @@ std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::homogeneousMerge(const
     return groups;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_merge(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
+template <typename Graph_t_in, typename Graph_t_out>
+vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_merge(
+    v_workw_t<Graph_t_in> commCost,
+    const Graph_t_in &graph,
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
     using VertexType = vertex_idx_t<Graph_t_in>;
     assert(expansionMapOutput.size() == 0);
 
-    const std::vector< vertex_idx_t<Graph_t_in> > vertexTopPoset = get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
-    const std::vector< vertex_idx_t<Graph_t_in> > vertexBotPoset = getBotPosetMap(graph);
-    const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
-    const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
+    const std::vector<vertex_idx_t<Graph_t_in>> vertexTopPoset = get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
+    const std::vector<vertex_idx_t<Graph_t_in>> vertexBotPoset = getBotPosetMap(graph);
+    const std::vector<v_workw_t<Graph_t_in>> topDist = getTopDistance(commCost, graph);
+    const std::vector<v_workw_t<Graph_t_in>> botDist = getBotDistance(commCost, graph);
 
     std::vector<std::size_t> hashValuesCombined(graph.num_vertices(), 1729U);
 
@@ -1177,7 +1338,7 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
         for (const VertexType &par : graph.vertices()) {
             for (const VertexType &chld : graph.children(par)) {
                 hash_combine(hashValuesWithParents[chld], hashValues[par]);
-            }        
+            }
         }
         for (const VertexType &vert : graph.vertices()) {
             hash_combine(hashValuesCombined[vert], hashValuesWithParents[vert]);
@@ -1189,7 +1350,7 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
         for (const VertexType &chld : graph.vertices()) {
             for (const VertexType &par : graph.parents(chld)) {
                 hash_combine(hashValuesWithChildren[par], hashValues[chld]);
-            }        
+            }
         }
         for (const VertexType &vert : graph.vertices()) {
             hash_combine(hashValuesCombined[vert], hashValuesWithChildren[vert]);
@@ -1198,12 +1359,16 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
 
     std::unordered_map<std::size_t, std::set<VertexType>> orbits;
     for (const VertexType &vert : graph.vertices()) {
-        if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) continue;
+        if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) {
+            continue;
+        }
 
         const std::size_t hash = hashValuesCombined[vert];
-        auto found_iter = orbits.find(hash); 
+        auto found_iter = orbits.find(hash);
         if (found_iter == orbits.end()) {
-            orbits.emplace(std::piecewise_construct, std::forward_as_tuple(hash), std::forward_as_tuple(std::initializer_list< vertex_idx_t<Graph_t_in> >{vert}));
+            orbits.emplace(std::piecewise_construct,
+                           std::forward_as_tuple(hash),
+                           std::forward_as_tuple(std::initializer_list<vertex_idx_t<Graph_t_in>>{vert}));
         } else {
             found_iter->second.emplace(vert);
         }
@@ -1211,13 +1376,19 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
 
     vertex_idx_t<Graph_t_in> counter = 0;
     std::vector<bool> partitionedFlag(graph.num_vertices(), false);
-    
+
     for (const VertexType &vert : graph.vertices()) {
-        if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) continue;
-        if (partitionedFlag[vert]) continue;
+        if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) {
+            continue;
+        }
+        if (partitionedFlag[vert]) {
+            continue;
+        }
 
         const std::set<VertexType> &orb = orbits.at(hashValuesCombined[vert]);
-        if (orb.size() <= 1U) continue;
+        if (orb.size() <= 1U) {
+            continue;
+        }
 
         std::set<VertexType> parents;
         if (params.mode == SarkarParams::Mode::FAN_OUT_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) {
@@ -1235,13 +1406,25 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
 
         std::set<VertexType> secureOrb;
         for (const VertexType &vertCandidate : orb) {
-            if (vertexTopPoset[vertCandidate] != vertexTopPoset[vert]) continue;
-            if (vertexBotPoset[vertCandidate] != vertexBotPoset[vert]) continue;
-            if (graph.vertex_work_weight(vertCandidate) != graph.vertex_work_weight(vert)) continue;
-            if (topDist[vertCandidate] != topDist[vert]) continue;
-            if (botDist[vertCandidate] != botDist[vert]) continue;
+            if (vertexTopPoset[vertCandidate] != vertexTopPoset[vert]) {
+                continue;
+            }
+            if (vertexBotPoset[vertCandidate] != vertexBotPoset[vert]) {
+                continue;
+            }
+            if (graph.vertex_work_weight(vertCandidate) != graph.vertex_work_weight(vert)) {
+                continue;
+            }
+            if (topDist[vertCandidate] != topDist[vert]) {
+                continue;
+            }
+            if (botDist[vertCandidate] != botDist[vert]) {
+                continue;
+            }
             if constexpr (has_typed_vertices_v<Graph_t_in>) {
-                if (graph.vertex_type(vertCandidate) != graph.vertex_type(vert)) continue;
+                if (graph.vertex_type(vertCandidate) != graph.vertex_type(vert)) {
+                    continue;
+                }
             }
 
             if (params.mode == SarkarParams::Mode::FAN_OUT_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) {
@@ -1249,7 +1432,9 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
                 for (const VertexType &par : graph.parents(vertCandidate)) {
                     candidateParents.emplace(par);
                 }
-                if (candidateParents != parents) continue;
+                if (candidateParents != parents) {
+                    continue;
+                }
             }
 
             if (params.mode == SarkarParams::Mode::FAN_IN_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) {
@@ -1257,18 +1442,27 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
                 for (const VertexType &chld : graph.children(vertCandidate)) {
                     candidateChildren.emplace(chld);
                 }
-                if (candidateChildren != children) continue;
+                if (candidateChildren != children) {
+                    continue;
+                }
             }
 
             secureOrb.emplace(vertCandidate);
         }
-        if (secureOrb.size() <= 1U) continue;
+        if (secureOrb.size() <= 1U) {
+            continue;
+        }
 
-        const v_workw_t<Graph_t_in> desiredVerticesInGroup = graph.vertex_work_weight(vert) == 0 ? std::numeric_limits<v_workw_t<Graph_t_in>>::lowest() : params.smallWeightThreshold / graph.vertex_work_weight(vert);
-        const v_workw_t<Graph_t_in> maxVerticesInGroup = graph.vertex_work_weight(vert) == 0 ? std::numeric_limits<v_workw_t<Graph_t_in>>::max() : params.maxWeight / graph.vertex_work_weight(vert);
+        const v_workw_t<Graph_t_in> desiredVerticesInGroup = graph.vertex_work_weight(vert) == 0
+                                                                 ? std::numeric_limits<v_workw_t<Graph_t_in>>::lowest()
+                                                                 : params.smallWeightThreshold / graph.vertex_work_weight(vert);
+        const v_workw_t<Graph_t_in> maxVerticesInGroup = graph.vertex_work_weight(vert) == 0
+                                                             ? std::numeric_limits<v_workw_t<Graph_t_in>>::max()
+                                                             : params.maxWeight / graph.vertex_work_weight(vert);
 
         const std::size_t minDesiredSize = desiredVerticesInGroup < 2 ? 2U : static_cast<std::size_t>(desiredVerticesInGroup);
-        const std::size_t maxDesiredSize = std::max(minDesiredSize, std::min(minDesiredSize * 2U, static_cast<std::size_t>(maxVerticesInGroup)));
+        const std::size_t maxDesiredSize
+            = std::max(minDesiredSize, std::min(minDesiredSize * 2U, static_cast<std::size_t>(maxVerticesInGroup)));
 
         std::vector<std::size_t> groups = homogeneousMerge(secureOrb.size(), minDesiredSize, maxDesiredSize);
 
@@ -1279,7 +1473,7 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
                 cluster.emplace_back(*secureOrbIter);
                 ++secureOrbIter;
             }
-            expansionMapOutput.emplace_back( std::move(cluster) );
+            expansionMapOutput.emplace_back(std::move(cluster));
             counter += static_cast<VertexType>(groupSize) - 1;
         }
 
@@ -1289,11 +1483,13 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_mer
     }
 
     for (const VertexType &vert : graph.vertices()) {
-        if (partitionedFlag[vert]) continue;
+        if (partitionedFlag[vert]) {
+            continue;
+        }
         expansionMapOutput.emplace_back(std::initializer_list<VertexType>{vert});
     }
 
     return counter;
 }
 
-} // end namespace osp
\ No newline at end of file
+}    // end namespace osp
diff --git a/include/osp/coarser/Sarkar/SarkarMul.hpp b/include/osp/coarser/Sarkar/SarkarMul.hpp
index 4eb91732..a89bd1bf 100644
--- a/include/osp/coarser/Sarkar/SarkarMul.hpp
+++ b/include/osp/coarser/Sarkar/SarkarMul.hpp
@@ -28,58 +28,63 @@ namespace SarkarParams {
 
 enum class BufferMergeMode { OFF, FAN_IN, FAN_OUT, HOMOGENEOUS, FULL };
 
-template<typename commCostType>
+template <typename commCostType>
 struct MulParameters {
     std::size_t seed{42U};
     double geomDecay{0.875};
     double leniency{0.0};
-    std::vector< commCostType > commCostVec{ std::initializer_list<commCostType>{} };
-    commCostType maxWeight{ std::numeric_limits<commCostType>::max() };
-    commCostType smallWeightThreshold{ std::numeric_limits<commCostType>::lowest() };
+    std::vector<commCostType> commCostVec{std::initializer_list<commCostType>{}};
+    commCostType maxWeight{std::numeric_limits<commCostType>::max()};
+    commCostType smallWeightThreshold{std::numeric_limits<commCostType>::lowest()};
     unsigned max_num_iteration_without_changes{3U};
     BufferMergeMode buffer_merge_mode{BufferMergeMode::OFF};
 };
-} // end namespace SarkarParams
 
-template<typename Graph_t, typename Graph_t_coarse>
+}    // end namespace SarkarParams
+
+template <typename Graph_t, typename Graph_t_coarse>
 class SarkarMul : public MultilevelCoarser<Graph_t, Graph_t_coarse> {
-    private:
-        bool first_coarsen{true};
-        Thue_Morse_Sequence thue_coin{42U};
-        Biased_Random balanced_random{42U};
-
-        // Multilevel coarser parameters
-        SarkarParams::MulParameters< v_workw_t<Graph_t> > ml_params;
-        // Coarser parameters
-        SarkarParams::Parameters< v_workw_t<Graph_t> > params;
-        // Initial coarser
-        Sarkar<Graph_t, Graph_t_coarse> coarser_initial;
-        // Subsequent coarser
-        Sarkar<Graph_t_coarse, Graph_t_coarse> coarser_secondary;
-
-        void setSeed();
-        void initParams();
-        void updateParams();
-        
-        RETURN_STATUS run_single_contraction_mode(vertex_idx_t<Graph_t> &diff_vertices);
-        RETURN_STATUS run_buffer_merges();
-        RETURN_STATUS run_contractions(v_workw_t<Graph_t> commCost);
-        RETURN_STATUS run_contractions() override;
-        
-    public:
-        void setParameters(SarkarParams::MulParameters< v_workw_t<Graph_t> > ml_params_) { ml_params = std::move(ml_params_); setSeed(); initParams(); };
-        
-        std::string getCoarserName() const { return "Sarkar"; };
+  private:
+    bool first_coarsen{true};
+    Thue_Morse_Sequence thue_coin{42U};
+    Biased_Random balanced_random{42U};
+
+    // Multilevel coarser parameters
+    SarkarParams::MulParameters<v_workw_t<Graph_t>> ml_params;
+    // Coarser parameters
+    SarkarParams::Parameters<v_workw_t<Graph_t>> params;
+    // Initial coarser
+    Sarkar<Graph_t, Graph_t_coarse> coarser_initial;
+    // Subsequent coarser
+    Sarkar<Graph_t_coarse, Graph_t_coarse> coarser_secondary;
+
+    void setSeed();
+    void initParams();
+    void updateParams();
+
+    RETURN_STATUS run_single_contraction_mode(vertex_idx_t<Graph_t> &diff_vertices);
+    RETURN_STATUS run_buffer_merges();
+    RETURN_STATUS run_contractions(v_workw_t<Graph_t> commCost);
+    RETURN_STATUS run_contractions() override;
+
+  public:
+    void setParameters(SarkarParams::MulParameters<v_workw_t<Graph_t>> ml_params_) {
+        ml_params = std::move(ml_params_);
+        setSeed();
+        initParams();
+    };
+
+    std::string getCoarserName() const { return "Sarkar"; };
 };
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 void SarkarMul<Graph_t, Graph_t_coarse>::setSeed() {
     constexpr std::size_t seedReduction = 4096U;
     thue_coin = Thue_Morse_Sequence(ml_params.seed % seedReduction);
     balanced_random = Biased_Random(ml_params.seed);
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 void SarkarMul<Graph_t, Graph_t_coarse>::initParams() {
     first_coarsen = true;
 
@@ -91,25 +96,25 @@ void SarkarMul<Graph_t, Graph_t_coarse>::initParams() {
     if (ml_params.commCostVec.empty()) {
         v_workw_t<Graph_t> syncCosts = 128;
         syncCosts = std::max(syncCosts, static_cast<v_workw_t<Graph_t>>(1));
-        
+
         while (syncCosts >= static_cast<v_workw_t<Graph_t>>(1)) {
-            ml_params.commCostVec.emplace_back( syncCosts );
+            ml_params.commCostVec.emplace_back(syncCosts);
             syncCosts /= 2;
         }
     }
 
     std::sort(ml_params.commCostVec.begin(), ml_params.commCostVec.end());
-    
+
     updateParams();
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 void SarkarMul<Graph_t, Graph_t_coarse>::updateParams() {
     coarser_initial.setParameters(params);
     coarser_secondary.setParameters(params);
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_single_contraction_mode(vertex_idx_t<Graph_t> &diff_vertices) {
     RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS;
 
@@ -125,32 +130,35 @@ RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_single_contraction_mode(ve
     bool coarsen_success;
 
     if (first_coarsen) {
-        coarsen_success = coarser_initial.coarsenDag(*(MultilevelCoarser<Graph_t, Graph_t_coarse>::getOriginalGraph()), coarsened_dag, contraction_map);
+        coarsen_success = coarser_initial.coarsenDag(
+            *(MultilevelCoarser<Graph_t, Graph_t_coarse>::getOriginalGraph()), coarsened_dag, contraction_map);
         first_coarsen = false;
     } else {
-        coarsen_success = coarser_secondary.coarsenDag(*(MultilevelCoarser<Graph_t, Graph_t_coarse>::dag_history.back()), coarsened_dag, contraction_map);
+        coarsen_success = coarser_secondary.coarsenDag(
+            *(MultilevelCoarser<Graph_t, Graph_t_coarse>::dag_history.back()), coarsened_dag, contraction_map);
     }
-    
+
     if (!coarsen_success) {
         status = RETURN_STATUS::ERROR;
     }
 
-    status = std::max(status, MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::move(contraction_map), std::move(coarsened_dag)));
-    
+    status = std::max(
+        status, MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::move(contraction_map), std::move(coarsened_dag)));
+
     vertex_idx_t<Graph_t> new_num_vertices = MultilevelCoarser<Graph_t, Graph_t_coarse>::dag_history.back()->num_vertices();
     diff_vertices = current_num_vertices - new_num_vertices;
 
     return status;
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_contractions(v_workw_t<Graph_t> commCost) {
     RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS;
     vertex_idx_t<Graph_t> diff = 0;
-    
+
     params.commCost = commCost;
     updateParams();
-    
+
     unsigned outer_no_change = 0;
     while (outer_no_change < ml_params.max_num_iteration_without_changes) {
         unsigned inner_no_change = 0;
@@ -207,7 +215,7 @@ RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_contractions(v_workw_t<Gra
 
         // Levels
         while (inner_no_change < ml_params.max_num_iteration_without_changes) {
-            params.mode = thue_coin.get_flip()? SarkarParams::Mode::LEVEL_EVEN : SarkarParams::Mode::LEVEL_ODD;
+            params.mode = thue_coin.get_flip() ? SarkarParams::Mode::LEVEL_EVEN : SarkarParams::Mode::LEVEL_ODD;
             params.useTopPoset = balanced_random.get_flip();
             updateParams();
 
@@ -221,8 +229,6 @@ RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_contractions(v_workw_t<Gra
             }
         }
 
-
-
         if (outer_change) {
             outer_no_change = 0;
         } else {
@@ -233,15 +239,15 @@ RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_contractions(v_workw_t<Gra
     return status;
 }
 
-
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_buffer_merges() {
     RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS;
 
     unsigned no_change = 0;
-    while (no_change < ml_params.max_num_iteration_without_changes) {        
+    while (no_change < ml_params.max_num_iteration_without_changes) {
         vertex_idx_t<Graph_t> diff = 0;
-        if ((ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::HOMOGENEOUS) || (ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::FULL && diff == 0)) {
+        if ((ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::HOMOGENEOUS)
+            || (ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::FULL && diff == 0)) {
             params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER;
             updateParams();
             status = std::max(status, run_single_contraction_mode(diff));
@@ -271,7 +277,7 @@ RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_buffer_merges() {
 
         if (diff > 0) {
             no_change = 0;
-            status = std::max(status, run_contractions( ml_params.commCostVec.back() ));        
+            status = std::max(status, run_contractions(ml_params.commCostVec.back()));
         } else {
             no_change++;
         }
@@ -280,13 +286,12 @@ RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_buffer_merges() {
     return status;
 }
 
-
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_contractions() {
     initParams();
 
     RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS;
-    
+
     for (const v_workw_t<Graph_t> commCost : ml_params.commCostVec) {
         status = std::max(status, run_contractions(commCost));
     }
@@ -298,10 +303,4 @@ RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_contractions() {
     return status;
 }
 
-
-
-
-
-
-
-} // end namespace osp
\ No newline at end of file
+}    // end namespace osp
diff --git a/include/osp/coarser/SquashA/SquashA.hpp b/include/osp/coarser/SquashA/SquashA.hpp
index ac5910df..f41b3c5c 100644
--- a/include/osp/coarser/SquashA/SquashA.hpp
+++ b/include/osp/coarser/SquashA/SquashA.hpp
@@ -31,7 +31,9 @@ limitations under the License.
 namespace osp {
 
 namespace SquashAParams {
+
 enum class Mode { EDGE_WEIGHT, TRIANGLES };
+
 struct Parameters {
     double geom_decay_num_nodes{17.0 / 16.0};
     double poisson_par{0.0};
@@ -44,27 +46,28 @@ struct Parameters {
     bool use_structured_poset{false};
     bool use_top_poset{true};
 };
-} // end namespace SquashAParams
 
-template<typename Graph_t_in, typename Graph_t_out>
+}    // end namespace SquashAParams
+
+template <typename Graph_t_in, typename Graph_t_out>
 class SquashA : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
   private:
     SquashAParams::Parameters params;
 
     std::vector<int> generate_poset_in_map(const Graph_t_in &dag_in);
 
-    template<typename T, typename CMP>
-    std::vector<std::vector<vertex_idx_t<Graph_t_in>>>
-    gen_exp_map_from_contractable_edges(const std::multiset<std::pair<edge_desc_t<Graph_t_in>, T>, CMP> &edge_weights,
-                                        const std::vector<int> &poset_int_mapping, const Graph_t_in &dag_in) {
+    template <typename T, typename CMP>
+    std::vector<std::vector<vertex_idx_t<Graph_t_in>>> gen_exp_map_from_contractable_edges(
+        const std::multiset<std::pair<edge_desc_t<Graph_t_in>, T>, CMP> &edge_weights,
+        const std::vector<int> &poset_int_mapping,
+        const Graph_t_in &dag_in) {
         static_assert(std::is_arithmetic_v<T>, "T must be of arithmetic type!");
 
         auto lower_third_it = edge_weights.begin();
         std::advance(lower_third_it, edge_weights.size() / 3);
-        T lower_third_wt = std::max(lower_third_it->second, static_cast<T>(1)); // Could be 0
+        T lower_third_wt = std::max(lower_third_it->second, static_cast<T>(1));    // Could be 0
 
-        Union_Find_Universe<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_in>, v_workw_t<Graph_t_in>,
-                            v_memw_t<Graph_t_in>>
+        Union_Find_Universe<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_in>, v_workw_t<Graph_t_in>, v_memw_t<Graph_t_in>>
             connected_components;
         for (const auto &vert : dag_in.vertices()) {
             connected_components.add_object(vert, dag_in.vertex_work_weight(vert), dag_in.vertex_mem_weight(vert));
@@ -73,30 +76,32 @@ class SquashA : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
         std::vector<bool> merged_nodes(dag_in.num_vertices(), false);
 
         vertex_idx_t<Graph_t_in> num_nodes_decrease = 0;
-        vertex_idx_t<Graph_t_in> num_nodes_aim =
-            dag_in.num_vertices() - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(dag_in.num_vertices()) /
-                                                                          params.geom_decay_num_nodes);
+        vertex_idx_t<Graph_t_in> num_nodes_aim
+            = dag_in.num_vertices()
+              - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(dag_in.num_vertices()) / params.geom_decay_num_nodes);
 
         double temperature = 1;
         unsigned temperature_increase_iteration = 0;
-        while (num_nodes_decrease < num_nodes_aim &&
-               temperature_increase_iteration <= params.number_of_temperature_increases) {
+        while (num_nodes_decrease < num_nodes_aim && temperature_increase_iteration <= params.number_of_temperature_increases) {
             for (const auto &wt_edge : edge_weights) {
                 const auto &edge_d = wt_edge.first;
                 const vertex_idx_t<Graph_t_in> edge_source = source(edge_d, dag_in);
                 const vertex_idx_t<Graph_t_in> edge_target = target(edge_d, dag_in);
 
                 // Previously merged
-                if (merged_nodes[edge_source])
+                if (merged_nodes[edge_source]) {
                     continue;
-                if (merged_nodes[edge_target])
+                }
+                if (merged_nodes[edge_target]) {
                     continue;
+                }
 
                 // weight check
-                if (connected_components.get_weight_of_component_by_name(edge_source) +
-                        connected_components.get_weight_of_component_by_name(edge_target) >
-                    static_cast<double>(lower_third_wt) * temperature)
+                if (connected_components.get_weight_of_component_by_name(edge_source)
+                        + connected_components.get_weight_of_component_by_name(edge_target)
+                    > static_cast<double>(lower_third_wt) * temperature) {
                     continue;
+                }
 
                 // no loops criteria check
                 bool check_failed = false;
@@ -105,56 +110,72 @@ class SquashA : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                 // Checks over all affected edges
                 // In edges first
                 for (const auto &node : dag_in.parents(edge_source)) {
-                    if (node == edge_target)
+                    if (node == edge_target) {
                         continue;
-                    if (!merged_nodes[node])
+                    }
+                    if (!merged_nodes[node]) {
                         continue;
-                    if (poset_int_mapping[edge_source] >= poset_int_mapping[node] + 2)
+                    }
+                    if (poset_int_mapping[edge_source] >= poset_int_mapping[node] + 2) {
                         continue;
+                    }
                     check_failed = true;
                     break;
                 }
-                if (check_failed)
+                if (check_failed) {
                     continue;
+                }
                 // Out edges first
                 for (const auto &node : dag_in.children(edge_source)) {
-                    if (node == edge_target)
+                    if (node == edge_target) {
                         continue;
-                    if (!merged_nodes[node])
+                    }
+                    if (!merged_nodes[node]) {
                         continue;
-                    if (poset_int_mapping[node] >= poset_int_mapping[edge_source] + 2)
+                    }
+                    if (poset_int_mapping[node] >= poset_int_mapping[edge_source] + 2) {
                         continue;
+                    }
                     check_failed = true;
                     break;
                 }
-                if (check_failed)
+                if (check_failed) {
                     continue;
+                }
                 // In edges second
                 for (const auto &node : dag_in.parents(edge_target)) {
-                    if (node == edge_source)
+                    if (node == edge_source) {
                         continue;
-                    if (!merged_nodes[node])
+                    }
+                    if (!merged_nodes[node]) {
                         continue;
-                    if (poset_int_mapping[edge_target] >= poset_int_mapping[node] + 2)
+                    }
+                    if (poset_int_mapping[edge_target] >= poset_int_mapping[node] + 2) {
                         continue;
+                    }
                     check_failed = true;
                     break;
                 }
-                if (check_failed)
+                if (check_failed) {
                     continue;
+                }
                 // Out edges second
                 for (const auto &node : dag_in.children(edge_target)) {
-                    if (node == edge_source)
+                    if (node == edge_source) {
                         continue;
-                    if (!merged_nodes[node])
+                    }
+                    if (!merged_nodes[node]) {
                         continue;
-                    if (poset_int_mapping[node] >= poset_int_mapping[edge_target] + 2)
+                    }
+                    if (poset_int_mapping[node] >= poset_int_mapping[edge_target] + 2) {
                         continue;
+                    }
                     check_failed = true;
                     break;
                 }
-                if (check_failed)
+                if (check_failed) {
                     continue;
+                }
 
                 // merging
                 connected_components.join_by_name(edge_source, edge_target);
@@ -170,9 +191,10 @@ class SquashA : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
         // Getting components to contract and adding graph contraction
         std::vector<std::vector<vertex_idx_t<Graph_t_in>>> partition_vec;
 
-        vertex_idx_t<Graph_t_in> min_node_decrease =
-            dag_in.num_vertices() - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(dag_in.num_vertices()) /
-                                                                          std::pow(params.geom_decay_num_nodes, 0.25));
+        vertex_idx_t<Graph_t_in> min_node_decrease
+            = dag_in.num_vertices()
+              - static_cast<vertex_idx_t<Graph_t_in>>(static_cast<double>(dag_in.num_vertices())
+                                                      / std::pow(params.geom_decay_num_nodes, 0.25));
         if (num_nodes_decrease > 0 && num_nodes_decrease >= min_node_decrease) {
             partition_vec = connected_components.get_connected_components();
 
@@ -189,8 +211,7 @@ class SquashA : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
     }
 
   public:
-    virtual std::vector<std::vector<vertex_idx_t<Graph_t_in>>>
-    generate_vertex_expansion_map(const Graph_t_in &dag_in) override;
+    virtual std::vector<std::vector<vertex_idx_t<Graph_t_in>>> generate_vertex_expansion_map(const Graph_t_in &dag_in) override;
 
     SquashA(SquashAParams::Parameters params_ = SquashAParams::Parameters()) : params(params_) {};
 
@@ -201,12 +222,13 @@ class SquashA : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
     virtual ~SquashA() override = default;
 
     inline SquashAParams::Parameters &getParams() { return params; }
+
     inline void setParams(SquashAParams::Parameters params_) { params = params_; }
 
     std::string getCoarserName() const override { return "SquashA"; }
 };
 
-template<typename Graph_t_in, typename Graph_t_out>
+template <typename Graph_t_in, typename Graph_t_out>
 std::vector<int> SquashA<Graph_t_in, Graph_t_out>::generate_poset_in_map(const Graph_t_in &dag_in) {
     std::vector<int> poset_int_mapping;
     if (!params.use_structured_poset) {
@@ -225,11 +247,10 @@ std::vector<int> SquashA<Graph_t_in, Graph_t_out>::generate_poset_in_map(const G
     return poset_int_mapping;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector<std::vector<vertex_idx_t<Graph_t_in>>>
-SquashA<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(const Graph_t_in &dag_in) {
-    static_assert(is_directed_graph_edge_desc_v<Graph_t_in>,
-                  "Graph_t_in must satisfy the directed_graph_edge_desc concept");
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<std::vector<vertex_idx_t<Graph_t_in>>> SquashA<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(
+    const Graph_t_in &dag_in) {
+    static_assert(is_directed_graph_edge_desc_v<Graph_t_in>, "Graph_t_in must satisfy the directed_graph_edge_desc concept");
     static_assert(is_computational_dag_edge_desc_v<Graph_t_in>,
                   "Graph_t_in must satisfy the is_computational_dag_edge_desc concept");
     // static_assert(has_hashable_edge_desc_v<Graph_t_in>, "Graph_t_in must have hashable edge descriptors");
@@ -238,17 +259,14 @@ SquashA<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(const Graph_t_in
 
     if constexpr (has_edge_weights_v<Graph_t_in>) {
         if (params.mode == SquashAParams::Mode::EDGE_WEIGHT) {
-            auto edge_w_cmp = [](const std::pair<edge_desc_t<Graph_t_in>, e_commw_t<Graph_t_in>> &lhs,
-                                const std::pair<edge_desc_t<Graph_t_in>, e_commw_t<Graph_t_in>> &rhs) {
-                return lhs.second < rhs.second;
-            };
-            std::multiset<std::pair<edge_desc_t<Graph_t_in>, e_commw_t<Graph_t_in>>, decltype(edge_w_cmp)> edge_weights(
-                edge_w_cmp);
+            auto edge_w_cmp
+                = [](const std::pair<edge_desc_t<Graph_t_in>, e_commw_t<Graph_t_in>> &lhs,
+                     const std::pair<edge_desc_t<Graph_t_in>, e_commw_t<Graph_t_in>> &rhs) { return lhs.second < rhs.second; };
+            std::multiset<std::pair<edge_desc_t<Graph_t_in>, e_commw_t<Graph_t_in>>, decltype(edge_w_cmp)> edge_weights(edge_w_cmp);
             {
-                std::vector<edge_desc_t<Graph_t_in>> contractable_edges =
-                    get_contractable_edges_from_poset_int_map<Graph_t_in>(poset_int_mapping, dag_in);
+                std::vector<edge_desc_t<Graph_t_in>> contractable_edges
+                    = get_contractable_edges_from_poset_int_map<Graph_t_in>(poset_int_mapping, dag_in);
                 for (const auto &edge : contractable_edges) {
-
                     if constexpr (has_edge_weights_v<Graph_t_in>) {
                         edge_weights.emplace(edge, dag_in.edge_comm_weight(edge));
                     } else {
@@ -259,28 +277,23 @@ SquashA<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(const Graph_t_in
 
             return gen_exp_map_from_contractable_edges<e_commw_t<Graph_t_in>, decltype(edge_w_cmp)>(
                 edge_weights, poset_int_mapping, dag_in);
-
         }
-    }    
+    }
     if (params.mode == SquashAParams::Mode::TRIANGLES) {
         auto edge_w_cmp = [](const std::pair<edge_desc_t<Graph_t_in>, std::size_t> &lhs,
-                             const std::pair<edge_desc_t<Graph_t_in>, std::size_t> &rhs) {
-            return lhs.second < rhs.second;
-        };
+                             const std::pair<edge_desc_t<Graph_t_in>, std::size_t> &rhs) { return lhs.second < rhs.second; };
         std::multiset<std::pair<edge_desc_t<Graph_t_in>, std::size_t>, decltype(edge_w_cmp)> edge_weights(edge_w_cmp);
         {
-            std::vector<edge_desc_t<Graph_t_in>> contractable_edges =
-                get_contractable_edges_from_poset_int_map<Graph_t_in>(poset_int_mapping, dag_in);
+            std::vector<edge_desc_t<Graph_t_in>> contractable_edges
+                = get_contractable_edges_from_poset_int_map<Graph_t_in>(poset_int_mapping, dag_in);
             for (const auto &edge : contractable_edges) {
-                std::size_t num_common_triangles =
-                    num_common_parents(dag_in, source(edge, dag_in), target(edge, dag_in));
+                std::size_t num_common_triangles = num_common_parents(dag_in, source(edge, dag_in), target(edge, dag_in));
                 num_common_triangles += num_common_children(dag_in, source(edge, dag_in), target(edge, dag_in));
                 edge_weights.emplace(edge, num_common_triangles);
             }
         }
 
-        return gen_exp_map_from_contractable_edges<std::size_t, decltype(edge_w_cmp)>(edge_weights, poset_int_mapping,
-                                                                                      dag_in);
+        return gen_exp_map_from_contractable_edges<std::size_t, decltype(edge_w_cmp)>(edge_weights, poset_int_mapping, dag_in);
 
     } else {
         throw std::runtime_error("Edge sorting mode not recognised.");
@@ -289,4 +302,4 @@ SquashA<Graph_t_in, Graph_t_out>::generate_vertex_expansion_map(const Graph_t_in
     return {};
 }
 
-} // end namespace osp
\ No newline at end of file
+}    // end namespace osp
diff --git a/include/osp/coarser/SquashA/SquashAMul.hpp b/include/osp/coarser/SquashA/SquashAMul.hpp
index e4019566..f238d576 100644
--- a/include/osp/coarser/SquashA/SquashAMul.hpp
+++ b/include/osp/coarser/SquashA/SquashAMul.hpp
@@ -24,32 +24,33 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 class SquashAMul : public MultilevelCoarser<Graph_t, Graph_t_coarse> {
-    private:
-        vertex_idx_t<Graph_t> min_nodes{ 1 };
-        Thue_Morse_Sequence thue_coin{};
-        Biased_Random balanced_random{};
-        
-        // Coarser Params
-        SquashAParams::Parameters params;
-        // Initial coarser
-        SquashA<Graph_t, Graph_t_coarse> coarser_initial;
-        // Subsequent coarser
-        SquashA<Graph_t_coarse, Graph_t_coarse> coarser_secondary;
-
-        void updateParams();
-        
-        RETURN_STATUS run_contractions() override;
-        
-    public:
-        void setParams(SquashAParams::Parameters params_) { params = params_; };
-        void setMinimumNumberVertices(vertex_idx_t<Graph_t> num) { min_nodes = num; };
-        
-        std::string getCoarserName() const { return "SquashA"; };
+  private:
+    vertex_idx_t<Graph_t> min_nodes{1};
+    Thue_Morse_Sequence thue_coin{};
+    Biased_Random balanced_random{};
+
+    // Coarser Params
+    SquashAParams::Parameters params;
+    // Initial coarser
+    SquashA<Graph_t, Graph_t_coarse> coarser_initial;
+    // Subsequent coarser
+    SquashA<Graph_t_coarse, Graph_t_coarse> coarser_secondary;
+
+    void updateParams();
+
+    RETURN_STATUS run_contractions() override;
+
+  public:
+    void setParams(SquashAParams::Parameters params_) { params = params_; };
+
+    void setMinimumNumberVertices(vertex_idx_t<Graph_t> num) { min_nodes = num; };
+
+    std::string getCoarserName() const { return "SquashA"; };
 };
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 void SquashAMul<Graph_t, Graph_t_coarse>::updateParams() {
     params.use_structured_poset = thue_coin.get_flip();
     params.use_top_poset = balanced_random.get_flip();
@@ -58,17 +59,17 @@ void SquashAMul<Graph_t, Graph_t_coarse>::updateParams() {
     coarser_secondary.setParams(params);
 }
 
-template<typename Graph_t, typename Graph_t_coarse>
+template <typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS SquashAMul<Graph_t, Graph_t_coarse>::run_contractions() {
     RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS;
 
-    Biased_Random_with_side_bias coin( params.edge_sort_ratio );
+    Biased_Random_with_side_bias coin(params.edge_sort_ratio);
 
     bool first_coarsen = true;
     unsigned no_change_in_a_row = 0;
     vertex_idx_t<Graph_t> current_num_vertices = MultilevelCoarser<Graph_t, Graph_t_coarse>::getOriginalGraph()->num_vertices();
 
-    while( no_change_in_a_row < params.num_rep_without_node_decrease && current_num_vertices > min_nodes ) {
+    while (no_change_in_a_row < params.num_rep_without_node_decrease && current_num_vertices > min_nodes) {
         updateParams();
 
         Graph_t_coarse coarsened_dag;
@@ -76,18 +77,22 @@ RETURN_STATUS SquashAMul<Graph_t, Graph_t_coarse>::run_contractions() {
         bool coarsen_success;
 
         if (first_coarsen) {
-            coarsen_success = coarser_initial.coarsenDag(*(MultilevelCoarser<Graph_t, Graph_t_coarse>::getOriginalGraph()), coarsened_dag, contraction_map);
+            coarsen_success = coarser_initial.coarsenDag(
+                *(MultilevelCoarser<Graph_t, Graph_t_coarse>::getOriginalGraph()), coarsened_dag, contraction_map);
             first_coarsen = false;
         } else {
-            coarsen_success = coarser_secondary.coarsenDag(*(MultilevelCoarser<Graph_t, Graph_t_coarse>::dag_history.back()), coarsened_dag, contraction_map);
+            coarsen_success = coarser_secondary.coarsenDag(
+                *(MultilevelCoarser<Graph_t, Graph_t_coarse>::dag_history.back()), coarsened_dag, contraction_map);
         }
-        
+
         if (!coarsen_success) {
             status = RETURN_STATUS::ERROR;
         }
 
-        status = std::max(status, MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::move(contraction_map), std::move(coarsened_dag)));
-        
+        status = std::max(
+            status,
+            MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::move(contraction_map), std::move(coarsened_dag)));
+
         vertex_idx_t<Graph_t> new_num_vertices = MultilevelCoarser<Graph_t, Graph_t_coarse>::dag_history.back()->num_vertices();
 
         if (new_num_vertices == current_num_vertices) {
@@ -101,9 +106,4 @@ RETURN_STATUS SquashAMul<Graph_t, Graph_t_coarse>::run_contractions() {
     return status;
 }
 
-
-
-
-
-
-} // end namespace osp
\ No newline at end of file
+}    // end namespace osp
diff --git a/include/osp/coarser/StepByStep/StepByStepCoarser.hpp b/include/osp/coarser/StepByStep/StepByStepCoarser.hpp
index ba2dbdf1..4f655d62 100644
--- a/include/osp/coarser/StepByStep/StepByStepCoarser.hpp
+++ b/include/osp/coarser/StepByStep/StepByStepCoarser.hpp
@@ -18,45 +18,41 @@ limitations under the License.
 
 #pragma once
 
+#include "osp/coarser/Coarser.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/concepts/constructable_computational_dag_concept.hpp"
-#include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 #include "osp/graph_algorithms/computational_dag_construction_util.hpp"
+#include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
-#include "osp/coarser/Coarser.hpp"
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class StepByStepCoarser : public CoarserGenContractionMap<Graph_t, Graph_t> {
-
     using vertex_idx = vertex_idx_t<Graph_t>;
 
-    using vertex_type_t_or_default   = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
-    using edge_commw_t_or_default   = std::conditional_t<has_edge_weights_v<Graph_t>, e_commw_t<Graph_t>, v_commw_t<Graph_t>>;
-    
-    using boost_graph_t = boost_graph<v_workw_t<Graph_t>, v_commw_t<Graph_t>, v_memw_t<Graph_t>, vertex_type_t_or_default, edge_commw_t_or_default >;
+    using vertex_type_t_or_default
+        = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
+    using edge_commw_t_or_default = std::conditional_t<has_edge_weights_v<Graph_t>, e_commw_t<Graph_t>, v_commw_t<Graph_t>>;
+
+    using boost_graph_t
+        = boost_graph<v_workw_t<Graph_t>, v_commw_t<Graph_t>, v_memw_t<Graph_t>, vertex_type_t_or_default, edge_commw_t_or_default>;
 
   public:
-    enum COARSENING_STRATEGY
-    {
-        EDGE_BY_EDGE,
-        BOTTOM_LEVEL_CLUSTERS
-    };
+    enum COARSENING_STRATEGY { EDGE_BY_EDGE, BOTTOM_LEVEL_CLUSTERS };
 
-    enum PROBLEM_TYPE
-    {
-        SCHEDULING,
-        PEBBLING
-    };
+    enum PROBLEM_TYPE { SCHEDULING, PEBBLING };
 
-    struct EdgeToContract{
+    struct EdgeToContract {
         std::pair<vertex_idx, vertex_idx> edge;
         v_workw_t<Graph_t> work_weight;
         v_commw_t<Graph_t> comm_weight;
 
-        EdgeToContract(const vertex_idx source, const vertex_idx target, const v_workw_t<Graph_t> work_weight_, const v_commw_t<Graph_t> comm_weight_)
+        EdgeToContract(const vertex_idx source,
+                       const vertex_idx target,
+                       const v_workw_t<Graph_t> work_weight_,
+                       const v_commw_t<Graph_t> comm_weight_)
             : edge(source, target), work_weight(work_weight_), comm_weight(comm_weight_) {}
 
         bool operator<(const EdgeToContract &other) const {
@@ -65,8 +61,7 @@ class StepByStepCoarser : public CoarserGenContractionMap<Graph_t, Graph_t> {
     };
 
   private:
-
-    std::vector<std::pair<vertex_idx, vertex_idx> > contractionHistory;
+    std::vector<std::pair<vertex_idx, vertex_idx>> contractionHistory;
 
     COARSENING_STRATEGY coarsening_strategy = COARSENING_STRATEGY::EDGE_BY_EDGE;
     PROBLEM_TYPE problem_type = PROBLEM_TYPE::SCHEDULING;
@@ -78,12 +73,12 @@ class StepByStepCoarser : public CoarserGenContractionMap<Graph_t, Graph_t> {
 
     std::vector<std::set<vertex_idx>> contains;
 
-    std::map<std::pair<vertex_idx, vertex_idx>, v_commw_t<Graph_t> > edgeWeights;
-    std::map<std::pair<vertex_idx, vertex_idx>, v_commw_t<Graph_t> > contractable;
+    std::map<std::pair<vertex_idx, vertex_idx>, v_commw_t<Graph_t>> edgeWeights;
+    std::map<std::pair<vertex_idx, vertex_idx>, v_commw_t<Graph_t>> contractable;
     std::vector<bool> node_valid;
     std::vector<vertex_idx> top_order_idx;
 
-    v_memw_t<Graph_t> fast_mem_capacity = std::numeric_limits<v_memw_t<Graph_t>>::max(); // for pebbling
+    v_memw_t<Graph_t> fast_mem_capacity = std::numeric_limits<v_memw_t<Graph_t>>::max();    // for pebbling
 
     // Utility functions for coarsening in general
     void ContractSingleEdge(std::pair<vertex_idx, vertex_idx> edge);
@@ -95,48 +90,44 @@ class StepByStepCoarser : public CoarserGenContractionMap<Graph_t, Graph_t> {
     std::set<vertex_idx> getContractableParents(vertex_idx node) const;
     void updateDistantEdgeContractibility(std::pair<vertex_idx, vertex_idx> edge);
 
-    std::pair<vertex_idx, vertex_idx> PickEdgeToContract(const std::vector<EdgeToContract>& candidates) const;
+    std::pair<vertex_idx, vertex_idx> PickEdgeToContract(const std::vector<EdgeToContract> &candidates) const;
     std::vector<EdgeToContract> CreateEdgeCandidateList() const;
 
     // Utility functions for cluster coarsening
-    std::vector<std::pair<vertex_idx, vertex_idx> > ClusterCoarsen() const;
+    std::vector<std::pair<vertex_idx, vertex_idx>> ClusterCoarsen() const;
     std::vector<unsigned> ComputeFilteredTopLevel() const;
 
     // Utility functions for coarsening in a pebbling problem
-    bool IncontractableForPebbling(const std::pair<vertex_idx, vertex_idx>&) const;
+    bool IncontractableForPebbling(const std::pair<vertex_idx, vertex_idx> &) const;
     void MergeSourcesInPebbling();
 
     // Utility for contracting into final format
     void SetIdVector(std::vector<vertex_idx_t<Graph_t>> &new_vertex_id) const;
-    static std::vector<vertex_idx> GetFilteredTopOrderIdx(const Graph_t& G, const std::vector<bool>& is_valid);
-
+    static std::vector<vertex_idx> GetFilteredTopOrderIdx(const Graph_t &G, const std::vector<bool> &is_valid);
 
   public:
     virtual ~StepByStepCoarser() = default;
 
     virtual std::string getCoarserName() const override { return "StepByStepCoarsening"; }
 
-
-
     // DAG coarsening
     virtual std::vector<vertex_idx_t<Graph_t>> generate_vertex_contraction_map(const Graph_t &dag_in) override;
 
-
-
     // Coarsening for pebbling problems - leaves source nodes intact, considers memory bound
-    void coarsenForPebbling(const Graph_t& dag_in, Graph_t &coarsened_dag,
-                           std::vector<vertex_idx_t<Graph_t>> &new_vertex_id);
+    void coarsenForPebbling(const Graph_t &dag_in, Graph_t &coarsened_dag, std::vector<vertex_idx_t<Graph_t>> &new_vertex_id);
+
+    void setCoarseningStrategy(COARSENING_STRATEGY strategy_) { coarsening_strategy = strategy_; }
 
+    void setTargetNumberOfNodes(const unsigned nr_nodes_) { target_nr_of_nodes = nr_nodes_; }
 
+    void setFastMemCapacity(const v_memw_t<Graph_t> capacity_) { fast_mem_capacity = capacity_; }
 
-    void setCoarseningStrategy(COARSENING_STRATEGY strategy_){ coarsening_strategy = strategy_;}
-    void setTargetNumberOfNodes(const unsigned nr_nodes_){ target_nr_of_nodes = nr_nodes_;}
-    void setFastMemCapacity(const v_memw_t<Graph_t> capacity_){ fast_mem_capacity = capacity_;}
+    std::vector<std::pair<vertex_idx, vertex_idx>> getContractionHistory() const { return contractionHistory; }
 
-    std::vector<std::pair<vertex_idx, vertex_idx> > getContractionHistory() const {return contractionHistory;}
     std::vector<vertex_idx> GetIntermediateIDs(vertex_idx until_which_step) const;
     Graph_t Contract(const std::vector<vertex_idx_t<Graph_t>> &new_vertex_id) const;
-    const Graph_t& getOriginalDag() const {return G_full;}
+
+    const Graph_t &getOriginalDag() const { return G_full; }
 };
 
 // template<typename Graph_t>
@@ -144,14 +135,12 @@ class StepByStepCoarser : public CoarserGenContractionMap<Graph_t, Graph_t> {
 //                         std::vector<std::vector<vertex_idx_t<Graph_t>>> &old_vertex_ids,
 //                         std::vector<vertex_idx_t<Graph_t>> &new_vertex_id)
 
-template<typename Graph_t>
-std::vector<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::generate_vertex_contraction_map(const Graph_t &dag_in)
-{
+template <typename Graph_t>
+std::vector<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::generate_vertex_contraction_map(const Graph_t &dag_in) {
     const unsigned N = static_cast<unsigned>(dag_in.num_vertices());
 
     G_full = dag_in;
-    for(vertex_idx node = G_coarse.num_vertices(); node > 0;)
-    {
+    for (vertex_idx node = G_coarse.num_vertices(); node > 0;) {
         --node;
         G_coarse.remove_vertex(node);
     }
@@ -161,8 +150,9 @@ std::vector<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::generate_vertex_c
     contractionHistory.clear();
 
     // target nr of nodes must be reasonable
-    if(target_nr_of_nodes == 0 || target_nr_of_nodes > N)
-        target_nr_of_nodes = std::max(N/2, 1U);
+    if (target_nr_of_nodes == 0 || target_nr_of_nodes > N) {
+        target_nr_of_nodes = std::max(N / 2, 1U);
+    }
 
     // list of original node indices contained in each contracted node
     contains.clear();
@@ -171,36 +161,36 @@ std::vector<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::generate_vertex_c
     node_valid.clear();
     node_valid.resize(N, true);
 
-    for (vertex_idx node = 0; node < N; ++node)
+    for (vertex_idx node = 0; node < N; ++node) {
         contains[node].insert(node);
+    }
 
-    //used for original, slow coarsening
+    // used for original, slow coarsening
     edgeWeights.clear();
     contractable.clear();
-    
-    if(coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE)
-    {
+
+    if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) {
         // Init edge weights
-        for (vertex_idx node = 0; node < N; ++node)
-            for (vertex_idx succ: G_full.children(node))
+        for (vertex_idx node = 0; node < N; ++node) {
+            for (vertex_idx succ : G_full.children(node)) {
                 edgeWeights[std::make_pair(node, succ)] = G_full.vertex_comm_weight(node);
+            }
+        }
 
         // get original contractable edges
         InitializeContractableEdges();
     }
 
-    for (unsigned NrOfNodes = N; NrOfNodes > target_nr_of_nodes; ) {
+    for (unsigned NrOfNodes = N; NrOfNodes > target_nr_of_nodes;) {
         // Single contraction step
 
         std::vector<std::pair<vertex_idx, vertex_idx>> edgesToContract;
 
         // choose edges to contract in this step
-        if(coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE)
-        {
+        if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) {
             std::vector<EdgeToContract> candidates = CreateEdgeCandidateList();
-            if(candidates.empty())
-            {
-                std::cout<<"Error: no more edges to contract"<<std::endl;
+            if (candidates.empty()) {
+                std::cout << "Error: no more edges to contract" << std::endl;
                 break;
             }
             std::pair<vertex_idx, vertex_idx> chosenEdge = PickEdgeToContract(candidates);
@@ -208,60 +198,62 @@ std::vector<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::generate_vertex_c
 
             // Update far-away edges that become uncontractable now
             updateDistantEdgeContractibility(chosenEdge);
-        }
-        else
+        } else {
             edgesToContract = ClusterCoarsen();
+        }
 
-        if(edgesToContract.empty())
+        if (edgesToContract.empty()) {
             break;
-        
+        }
+
         // contract these edges
-        for(const std::pair<vertex_idx, vertex_idx>& edge : edgesToContract)
-        {
-            if(coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE)
-            {
-                //Update contractable edges - edge.b
-                for(vertex_idx pred : G_coarse.parents(edge.second))
+        for (const std::pair<vertex_idx, vertex_idx> &edge : edgesToContract) {
+            if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) {
+                // Update contractable edges - edge.b
+                for (vertex_idx pred : G_coarse.parents(edge.second)) {
                     contractable.erase(std::make_pair(pred, edge.second));
-                
-                for(vertex_idx succ : G_coarse.children(edge.second))
+                }
+
+                for (vertex_idx succ : G_coarse.children(edge.second)) {
                     contractable.erase(std::make_pair(edge.second, succ));
+                }
             }
 
             ContractSingleEdge(edge);
             node_valid[edge.second] = false;
 
-            if(coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE)
-            {
+            if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) {
                 ComputeFilteredTopOrderIdx();
 
-                //Update contractable edges - edge.a
+                // Update contractable edges - edge.a
                 std::set<vertex_idx> contractableParents = getContractableParents(edge.first);
-                for (vertex_idx pred : G_coarse.parents(edge.first))
-                {
-                    if(contractableParents.find(pred) != contractableParents.end())
+                for (vertex_idx pred : G_coarse.parents(edge.first)) {
+                    if (contractableParents.find(pred) != contractableParents.end()) {
                         contractable[std::make_pair(pred, edge.first)] = edgeWeights[std::make_pair(pred, edge.first)];
-                    else
+                    } else {
                         contractable.erase(std::make_pair(pred, edge.first));
+                    }
                 }
-                
+
                 std::set<vertex_idx> contractableChildren = getContractableChildren(edge.first);
-                for (vertex_idx succ : G_coarse.children(edge.first))
-                {
-                    if(contractableChildren.find(succ) != contractableChildren.end())
+                for (vertex_idx succ : G_coarse.children(edge.first)) {
+                    if (contractableChildren.find(succ) != contractableChildren.end()) {
                         contractable[std::make_pair(edge.first, succ)] = edgeWeights[std::make_pair(edge.first, succ)];
-                    else
+                    } else {
                         contractable.erase(std::make_pair(edge.first, succ));
+                    }
                 }
             }
             --NrOfNodes;
-            if(NrOfNodes == target_nr_of_nodes)
+            if (NrOfNodes == target_nr_of_nodes) {
                 break;
+            }
         }
     }
 
-    if(problem_type == PROBLEM_TYPE::PEBBLING)
+    if (problem_type == PROBLEM_TYPE::PEBBLING) {
         MergeSourcesInPebbling();
+    }
 
     std::vector<vertex_idx_t<Graph_t>> new_vertex_id;
     SetIdVector(new_vertex_id);
@@ -269,15 +261,14 @@ std::vector<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::generate_vertex_c
     return new_vertex_id;
 }
 
-template<typename Graph_t>
-void StepByStepCoarser<Graph_t>::ContractSingleEdge(std::pair<vertex_idx, vertex_idx> edge)
-{
+template <typename Graph_t>
+void StepByStepCoarser<Graph_t>::ContractSingleEdge(std::pair<vertex_idx, vertex_idx> edge) {
     G_coarse.set_vertex_work_weight(edge.first, G_coarse.vertex_work_weight(edge.first) + G_coarse.vertex_work_weight(edge.second));
     G_coarse.set_vertex_work_weight(edge.second, 0);
 
     G_coarse.set_vertex_comm_weight(edge.first, G_coarse.vertex_comm_weight(edge.first) + G_coarse.vertex_comm_weight(edge.second));
     G_coarse.set_vertex_comm_weight(edge.second, 0);
-    
+
     G_coarse.set_vertex_mem_weight(edge.first, G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second));
     G_coarse.set_vertex_mem_weight(edge.second, 0);
 
@@ -285,24 +276,27 @@ void StepByStepCoarser<Graph_t>::ContractSingleEdge(std::pair<vertex_idx, vertex
 
     // process incoming edges
     std::set<vertex_idx> parents_of_source;
-    for(vertex_idx pred : G_coarse.parents(edge.first))
+    for (vertex_idx pred : G_coarse.parents(edge.first)) {
         parents_of_source.insert(pred);
+    }
 
-    for(vertex_idx pred : G_coarse.parents(edge.second))
-    {
-        if(pred == edge.first)
+    for (vertex_idx pred : G_coarse.parents(edge.second)) {
+        if (pred == edge.first) {
             continue;
-        if(parents_of_source.find(pred) != parents_of_source.end()) // combine edges
+        }
+        if (parents_of_source.find(pred) != parents_of_source.end())    // combine edges
         {
             edgeWeights[std::make_pair(pred, edge.first)] = 0;
-            for (vertex_idx node: contains[pred])
-                for (vertex_idx succ: G_coarse.children(node))
-                    if (succ == edge.first || succ == edge.second)
+            for (vertex_idx node : contains[pred]) {
+                for (vertex_idx succ : G_coarse.children(node)) {
+                    if (succ == edge.first || succ == edge.second) {
                         edgeWeights[std::make_pair(pred, edge.first)] += G_full.vertex_comm_weight(node);
-            
+                    }
+                }
+            }
+
             edgeWeights.erase(std::make_pair(pred, edge.second));
-        }
-        else // add incoming edge
+        } else    // add incoming edge
         {
             G_coarse.add_edge(pred, edge.first);
             edgeWeights[std::make_pair(pred, edge.first)] = edgeWeights[std::make_pair(pred, edge.second)];
@@ -311,17 +305,16 @@ void StepByStepCoarser<Graph_t>::ContractSingleEdge(std::pair<vertex_idx, vertex
 
     // process outgoing edges
     std::set<vertex_idx> children_of_source;
-    for(vertex_idx succ : G_coarse.children(edge.first))
+    for (vertex_idx succ : G_coarse.children(edge.first)) {
         children_of_source.insert(succ);
+    }
 
-    for(vertex_idx succ : G_coarse.children(edge.second))
-    {
-        if(children_of_source.find(succ) != children_of_source.end()) // combine edges
+    for (vertex_idx succ : G_coarse.children(edge.second)) {
+        if (children_of_source.find(succ) != children_of_source.end())    // combine edges
         {
-            edgeWeights[std::make_pair(edge.first, succ)] += edgeWeights[std::make_pair(edge.second, succ)]; 
+            edgeWeights[std::make_pair(edge.first, succ)] += edgeWeights[std::make_pair(edge.second, succ)];
             edgeWeights.erase(std::make_pair(edge.second, succ));
-        }
-        else // add outgoing edge
+        } else    // add outgoing edge
         {
             G_coarse.add_edge(edge.first, succ);
             edgeWeights[std::make_pair(edge.first, succ)] = edgeWeights[std::make_pair(edge.second, succ)];
@@ -330,30 +323,31 @@ void StepByStepCoarser<Graph_t>::ContractSingleEdge(std::pair<vertex_idx, vertex
 
     G_coarse.clear_vertex(edge.second);
 
-    for (vertex_idx node: contains[edge.second])
+    for (vertex_idx node : contains[edge.second]) {
         contains[edge.first].insert(node);
+    }
 
     contains[edge.second].clear();
 }
 
-template<typename Graph_t>
-bool StepByStepCoarser<Graph_t>::isContractable(std::pair<vertex_idx, vertex_idx> edge) const
-{
-    
+template <typename Graph_t>
+bool StepByStepCoarser<Graph_t>::isContractable(std::pair<vertex_idx, vertex_idx> edge) const {
     std::deque<vertex_idx> Queue;
     std::set<vertex_idx> visited;
-    for (vertex_idx succ : G_coarse.children(edge.first))
+    for (vertex_idx succ : G_coarse.children(edge.first)) {
         if (node_valid[succ] && top_order_idx[succ] < top_order_idx[edge.second]) {
             Queue.push_back(succ);
             visited.insert(succ);
         }
+    }
 
     while (!Queue.empty()) {
         const vertex_idx node = Queue.front();
         Queue.pop_front();
         for (vertex_idx succ : G_coarse.children(node)) {
-            if (succ == edge.second)
+            if (succ == edge.second) {
                 return false;
+            }
 
             if (node_valid[succ] && top_order_idx[succ] < top_order_idx[edge.second] && visited.count(succ) == 0) {
                 Queue.push_back(succ);
@@ -364,21 +358,21 @@ bool StepByStepCoarser<Graph_t>::isContractable(std::pair<vertex_idx, vertex_idx
     return true;
 }
 
-template<typename Graph_t>
-std::set<vertex_idx_t<Graph_t> > StepByStepCoarser<Graph_t>::getContractableChildren(const vertex_idx node) const
-{
+template <typename Graph_t>
+std::set<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::getContractableChildren(const vertex_idx node) const {
     std::deque<vertex_idx> Queue;
     std::set<vertex_idx> visited;
     std::set<vertex_idx> succ_contractable;
     vertex_idx topOrderMax = top_order_idx[node];
 
-    for (vertex_idx succ : G_coarse.children(node))
-    {
-        if(node_valid[succ])
+    for (vertex_idx succ : G_coarse.children(node)) {
+        if (node_valid[succ]) {
             succ_contractable.insert(succ);
-        
-        if(top_order_idx[succ] > topOrderMax)
+        }
+
+        if (top_order_idx[succ] > topOrderMax) {
             topOrderMax = top_order_idx[succ];
+        }
 
         if (node_valid[succ]) {
             Queue.push_back(succ);
@@ -390,7 +384,6 @@ std::set<vertex_idx_t<Graph_t> > StepByStepCoarser<Graph_t>::getContractableChil
         const vertex_idx node_local = Queue.front();
         Queue.pop_front();
         for (vertex_idx succ : G_coarse.children(node_local)) {
-            
             succ_contractable.erase(succ);
 
             if (node_valid[succ] && top_order_idx[succ] < topOrderMax && visited.count(succ) == 0) {
@@ -403,21 +396,21 @@ std::set<vertex_idx_t<Graph_t> > StepByStepCoarser<Graph_t>::getContractableChil
     return succ_contractable;
 }
 
-template<typename Graph_t>
-std::set<vertex_idx_t<Graph_t> > StepByStepCoarser<Graph_t>::getContractableParents(const vertex_idx node) const
-{
+template <typename Graph_t>
+std::set<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::getContractableParents(const vertex_idx node) const {
     std::deque<vertex_idx> Queue;
     std::set<vertex_idx> visited;
     std::set<vertex_idx> pred_contractable;
     vertex_idx topOrderMin = top_order_idx[node];
 
-    for (vertex_idx pred : G_coarse.parents(node))
-    {
-        if(node_valid[pred])
+    for (vertex_idx pred : G_coarse.parents(node)) {
+        if (node_valid[pred]) {
             pred_contractable.insert(pred);
-        
-        if(top_order_idx[pred] < topOrderMin)
+        }
+
+        if (top_order_idx[pred] < topOrderMin) {
             topOrderMin = top_order_idx[pred];
+        }
 
         if (node_valid[pred]) {
             Queue.push_back(pred);
@@ -429,7 +422,6 @@ std::set<vertex_idx_t<Graph_t> > StepByStepCoarser<Graph_t>::getContractablePare
         const vertex_idx node_local = Queue.front();
         Queue.pop_front();
         for (vertex_idx pred : G_coarse.parents(node_local)) {
-            
             pred_contractable.erase(pred);
 
             if (node_valid[pred] && top_order_idx[pred] > topOrderMin && visited.count(pred) == 0) {
@@ -442,108 +434,116 @@ std::set<vertex_idx_t<Graph_t> > StepByStepCoarser<Graph_t>::getContractablePare
     return pred_contractable;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void StepByStepCoarser<Graph_t>::InitializeContractableEdges() {
-
     ComputeFilteredTopOrderIdx();
 
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
-    {
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
         std::set<vertex_idx> succ_contractable = getContractableChildren(node);
-        for(vertex_idx succ : succ_contractable)          
+        for (vertex_idx succ : succ_contractable) {
             contractable[std::make_pair(node, succ)] = G_full.vertex_comm_weight(node);
+        }
     }
 }
 
-template<typename Graph_t>
-void StepByStepCoarser<Graph_t>::updateDistantEdgeContractibility(std::pair<vertex_idx, vertex_idx> edge)
-{
-
+template <typename Graph_t>
+void StepByStepCoarser<Graph_t>::updateDistantEdgeContractibility(std::pair<vertex_idx, vertex_idx> edge) {
     std::unordered_set<vertex_idx> ancestors, descendant;
     std::deque<vertex_idx> Queue;
-    for (vertex_idx succ : G_coarse.children(edge.first))
+    for (vertex_idx succ : G_coarse.children(edge.first)) {
         if (succ != edge.second) {
             Queue.push_back(succ);
             descendant.insert(succ);
         }
+    }
     while (!Queue.empty()) {
         const vertex_idx node = Queue.front();
         Queue.pop_front();
-        for (vertex_idx succ : G_coarse.children(node))
+        for (vertex_idx succ : G_coarse.children(node)) {
             if (descendant.count(succ) == 0) {
                 Queue.push_back(succ);
                 descendant.insert(succ);
             }
+        }
     }
 
-    for (vertex_idx pred : G_coarse.parents(edge.second))
+    for (vertex_idx pred : G_coarse.parents(edge.second)) {
         if (pred != edge.first) {
             Queue.push_back(pred);
             ancestors.insert(pred);
         }
+    }
     while (!Queue.empty()) {
         const vertex_idx node = Queue.front();
         Queue.pop_front();
-        for (vertex_idx pred : G_coarse.parents(node))
+        for (vertex_idx pred : G_coarse.parents(node)) {
             if (ancestors.count(pred) == 0) {
                 Queue.push_back(pred);
                 ancestors.insert(pred);
             }
+        }
     }
 
-    for (const vertex_idx node : ancestors)
-        for (const vertex_idx succ : G_coarse.children(node))
-            if (descendant.count(succ) > 0)
+    for (const vertex_idx node : ancestors) {
+        for (const vertex_idx succ : G_coarse.children(node)) {
+            if (descendant.count(succ) > 0) {
                 contractable.erase(std::make_pair(node, succ));
+            }
+        }
+    }
 }
 
-template<typename Graph_t>
-std::vector<typename StepByStepCoarser<Graph_t>::EdgeToContract> StepByStepCoarser<Graph_t>::CreateEdgeCandidateList() const
-{
+template <typename Graph_t>
+std::vector<typename StepByStepCoarser<Graph_t>::EdgeToContract> StepByStepCoarser<Graph_t>::CreateEdgeCandidateList() const {
     std::vector<EdgeToContract> candidates;
 
-    for (auto it = contractable.cbegin(); it != contractable.cend(); ++it)
-    {
-        if(problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(it->first))
+    for (auto it = contractable.cbegin(); it != contractable.cend(); ++it) {
+        if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(it->first)) {
             continue;
+        }
 
-        candidates.emplace_back(it->first.first, it->first.second, contains[it->first.first].size() + contains[it->first.second].size(), it->second);
+        candidates.emplace_back(
+            it->first.first, it->first.second, contains[it->first.first].size() + contains[it->first.second].size(), it->second);
     }
 
     std::sort(candidates.begin(), candidates.end());
     return candidates;
 }
 
-template<typename Graph_t>
-std::pair<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::PickEdgeToContract(const std::vector<EdgeToContract>& candidates) const
-{
+template <typename Graph_t>
+std::pair<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::PickEdgeToContract(
+    const std::vector<EdgeToContract> &candidates) const {
     size_t limit = (candidates.size() + 2) / 3;
     v_workw_t<Graph_t> limitCardinality = candidates[limit].work_weight;
-    while (limit < candidates.size() - 1 && candidates[limit + 1].work_weight == limitCardinality)
+    while (limit < candidates.size() - 1 && candidates[limit + 1].work_weight == limitCardinality) {
         ++limit;
+    }
 
     // an edge case
-    if (candidates.size() == 1)
+    if (candidates.size() == 1) {
         limit = 0;
+    }
 
     EdgeToContract chosen = candidates[0];
     unsigned best = 0;
-    for (unsigned idx = 1; idx <= limit; ++idx)
-        if (candidates[idx].comm_weight > candidates[best].comm_weight)
+    for (unsigned idx = 1; idx <= limit; ++idx) {
+        if (candidates[idx].comm_weight > candidates[best].comm_weight) {
             best = idx;
+        }
+    }
 
     chosen = candidates[best];
     return chosen.edge;
 }
 
 /**
- * @brief Acyclic graph contractor based on (Herrmann, Julien, et al. "Acyclic partitioning of large directed acyclic graphs." 2017 17th IEEE/ACM international symposium on cluster, cloud and grid computing (CCGRID). IEEE, 2017.))
+ * @brief Acyclic graph contractor based on (Herrmann, Julien, et al. "Acyclic partitioning of large directed acyclic graphs."
+ * 2017 17th IEEE/ACM international symposium on cluster, cloud and grid computing (CCGRID). IEEE, 2017.))
  * @brief with minor changes and fixes
- * 
+ *
  */
-template<typename Graph_t>
-std::vector<std::pair<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t> > > StepByStepCoarser<Graph_t>::ClusterCoarsen() const
-{
+template <typename Graph_t>
+std::vector<std::pair<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t>>> StepByStepCoarser<Graph_t>::ClusterCoarsen() const {
     std::vector<bool> singleton(G_full.num_vertices(), true);
     std::vector<vertex_idx> leader(G_full.num_vertices());
     std::vector<unsigned> weight(G_full.num_vertices());
@@ -554,87 +554,98 @@ std::vector<std::pair<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t> > > StepBySte
     std::vector<unsigned> maxTopLevel(G_full.num_vertices());
     std::vector<vertex_idx> clusterNewID(G_full.num_vertices());
 
-    std::vector<std::pair<vertex_idx, vertex_idx> > contractionSteps;
+    std::vector<std::pair<vertex_idx, vertex_idx>> contractionSteps;
     std::vector<unsigned> topLevel = ComputeFilteredTopLevel();
-    for(vertex_idx node = 0; node < G_full.num_vertices(); ++node)
-        if(node_valid[node])
-        {
-            leader[node]=node;
-            weight[node]=1 /*G_coarse.vertex_work_weight(node)*/;
-            nrBadNeighbors[node]=0;
-            leaderBadNeighbors[node]=UINT_MAX;
-            clusterNewID[node]=node;
-            minTopLevel[node]=topLevel[node];
-            maxTopLevel[node]=topLevel[node];
-        }
-
-    for(vertex_idx node = 0; node < G_full.num_vertices(); ++node)
-    {
-        if(!node_valid[node] || !singleton[node])
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
+        if (node_valid[node]) {
+            leader[node] = node;
+            weight[node] = 1 /*G_coarse.vertex_work_weight(node)*/;
+            nrBadNeighbors[node] = 0;
+            leaderBadNeighbors[node] = UINT_MAX;
+            clusterNewID[node] = node;
+            minTopLevel[node] = topLevel[node];
+            maxTopLevel[node] = topLevel[node];
+        }
+    }
+
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
+        if (!node_valid[node] || !singleton[node]) {
             continue;
+        }
 
-        if(nrBadNeighbors[node] > 1)
+        if (nrBadNeighbors[node] > 1) {
             continue;
+        }
 
         std::vector<vertex_idx> validNeighbors;
-        for(vertex_idx pred: G_coarse.parents(node))
-        {
+        for (vertex_idx pred : G_coarse.parents(node)) {
             // direct check of condition 1
-            if(topLevel[node] < maxTopLevel[leader[pred]]-1 || topLevel[node] > minTopLevel[leader[pred]]+1)
+            if (topLevel[node] < maxTopLevel[leader[pred]] - 1 || topLevel[node] > minTopLevel[leader[pred]] + 1) {
                 continue;
+            }
             // indirect check of condition 2
-            if(nrBadNeighbors[node] > 1 || (nrBadNeighbors[node] == 1 && leaderBadNeighbors[node] != leader[pred]))
+            if (nrBadNeighbors[node] > 1 || (nrBadNeighbors[node] == 1 && leaderBadNeighbors[node] != leader[pred])) {
                 continue;
-            //check condition 2 for pred if it is a singleton
-            if(singleton[pred] && nrBadNeighbors[pred] > 0)
+            }
+            // check condition 2 for pred if it is a singleton
+            if (singleton[pred] && nrBadNeighbors[pred] > 0) {
                 continue;
+            }
 
             // check viability for pebbling
-            if(problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(pred, node)))
+            if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(pred, node))) {
                 continue;
+            }
 
             validNeighbors.push_back(pred);
         }
-        for(vertex_idx succ: G_coarse.children(node))
-        {
+        for (vertex_idx succ : G_coarse.children(node)) {
             // direct check of condition 1
-            if(topLevel[node] < maxTopLevel[leader[succ]]-1 || topLevel[node] > minTopLevel[leader[succ]]+1)
+            if (topLevel[node] < maxTopLevel[leader[succ]] - 1 || topLevel[node] > minTopLevel[leader[succ]] + 1) {
                 continue;
+            }
             // indirect check of condition 2
-            if(nrBadNeighbors[node] > 1 || (nrBadNeighbors[node] == 1 && leaderBadNeighbors[node] != leader[succ]))
+            if (nrBadNeighbors[node] > 1 || (nrBadNeighbors[node] == 1 && leaderBadNeighbors[node] != leader[succ])) {
                 continue;
-            //check condition 2 for pred if it is a singleton
-            if(singleton[succ] && nrBadNeighbors[succ] > 0)
+            }
+            // check condition 2 for pred if it is a singleton
+            if (singleton[succ] && nrBadNeighbors[succ] > 0) {
                 continue;
+            }
 
             // check viability for pebbling
-            if(problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(node, succ)))
+            if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(node, succ))) {
                 continue;
+            }
 
             validNeighbors.push_back(succ);
         }
 
         vertex_idx bestNeighbor = std::numeric_limits<vertex_idx>::max();
-        for(vertex_idx neigh : validNeighbors)
-            if(bestNeighbor == std::numeric_limits<vertex_idx>::max() || weight[leader[neigh]] < weight[leader[bestNeighbor]])
+        for (vertex_idx neigh : validNeighbors) {
+            if (bestNeighbor == std::numeric_limits<vertex_idx>::max() || weight[leader[neigh]] < weight[leader[bestNeighbor]]) {
                 bestNeighbor = neigh;
+            }
+        }
 
-        if(bestNeighbor == std::numeric_limits<vertex_idx>::max())
+        if (bestNeighbor == std::numeric_limits<vertex_idx>::max()) {
             continue;
+        }
 
         vertex_idx newLead = leader[bestNeighbor];
         leader[node] = newLead;
         weight[newLead] += weight[node];
 
         bool is_parent = false;
-        for(vertex_idx pred : G_coarse.parents(node))
-            if(pred == bestNeighbor)
+        for (vertex_idx pred : G_coarse.parents(node)) {
+            if (pred == bestNeighbor) {
                 is_parent = true;
+            }
+        }
 
-        if(is_parent)
+        if (is_parent) {
             contractionSteps.emplace_back(clusterNewID[newLead], node);
-        else
-        {
+        } else {
             contractionSteps.emplace_back(node, clusterNewID[newLead]);
             clusterNewID[newLead] = node;
         }
@@ -642,64 +653,59 @@ std::vector<std::pair<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t> > > StepBySte
         minTopLevel[newLead] = std::min(minTopLevel[newLead], topLevel[node]);
         maxTopLevel[newLead] = std::max(maxTopLevel[newLead], topLevel[node]);
 
-        for(vertex_idx pred: G_coarse.parents(node))
-        {
-            if(std::abs( static_cast<int>(topLevel[pred]) - static_cast<int>(maxTopLevel[newLead]) ) != 1 &&
-                std::abs( static_cast<int>(topLevel[pred]) - static_cast<int>(minTopLevel[newLead]) ) != 1)
+        for (vertex_idx pred : G_coarse.parents(node)) {
+            if (std::abs(static_cast<int>(topLevel[pred]) - static_cast<int>(maxTopLevel[newLead])) != 1
+                && std::abs(static_cast<int>(topLevel[pred]) - static_cast<int>(minTopLevel[newLead])) != 1) {
                 continue;
+            }
 
-            if(nrBadNeighbors[pred] == 0)
-            {
+            if (nrBadNeighbors[pred] == 0) {
                 ++nrBadNeighbors[pred];
                 leaderBadNeighbors[pred] = newLead;
-            }
-            else if(nrBadNeighbors[pred] == 1 && leaderBadNeighbors[pred] != newLead)
+            } else if (nrBadNeighbors[pred] == 1 && leaderBadNeighbors[pred] != newLead) {
                 ++nrBadNeighbors[pred];
+            }
         }
-        for(vertex_idx succ: G_coarse.children(node))
-        {
-            if(std::abs( static_cast<int>(topLevel[succ]) - static_cast<int>(maxTopLevel[newLead]) ) != 1 &&
-                std::abs( static_cast<int>(topLevel[succ]) - static_cast<int>(minTopLevel[newLead]) ) != 1)
+        for (vertex_idx succ : G_coarse.children(node)) {
+            if (std::abs(static_cast<int>(topLevel[succ]) - static_cast<int>(maxTopLevel[newLead])) != 1
+                && std::abs(static_cast<int>(topLevel[succ]) - static_cast<int>(minTopLevel[newLead])) != 1) {
                 continue;
+            }
 
-            if(nrBadNeighbors[succ]==0)
-            {
+            if (nrBadNeighbors[succ] == 0) {
                 ++nrBadNeighbors[succ];
                 leaderBadNeighbors[succ] = newLead;
-            }
-            else if(nrBadNeighbors[succ] == 1 && leaderBadNeighbors[succ] != newLead)
+            } else if (nrBadNeighbors[succ] == 1 && leaderBadNeighbors[succ] != newLead) {
                 ++nrBadNeighbors[succ];
+            }
         }
 
-        if(singleton[bestNeighbor])
-        {
-            for(vertex_idx pred: G_coarse.parents(bestNeighbor) )
-            {
-                if(std::abs( static_cast<int>(topLevel[pred]) - static_cast<int>(maxTopLevel[newLead]) ) != 1 &&
-                    std::abs( static_cast<int>(topLevel[pred]) - static_cast<int>(minTopLevel[newLead]) ) != 1)
+        if (singleton[bestNeighbor]) {
+            for (vertex_idx pred : G_coarse.parents(bestNeighbor)) {
+                if (std::abs(static_cast<int>(topLevel[pred]) - static_cast<int>(maxTopLevel[newLead])) != 1
+                    && std::abs(static_cast<int>(topLevel[pred]) - static_cast<int>(minTopLevel[newLead])) != 1) {
                     continue;
+                }
 
-                if(nrBadNeighbors[pred] == 0)
-                {
+                if (nrBadNeighbors[pred] == 0) {
                     ++nrBadNeighbors[pred];
                     leaderBadNeighbors[pred] = newLead;
-                }
-                else if(nrBadNeighbors[pred] == 1 && leaderBadNeighbors[pred] != newLead)
+                } else if (nrBadNeighbors[pred] == 1 && leaderBadNeighbors[pred] != newLead) {
                     ++nrBadNeighbors[pred];
+                }
             }
-            for(vertex_idx succ: G_coarse.children(bestNeighbor))
-            {
-                if(std::abs( static_cast<int>(topLevel[succ]) - static_cast<int>(maxTopLevel[newLead]) ) != 1 &&
-                    std::abs( static_cast<int>(topLevel[succ]) - static_cast<int>(minTopLevel[newLead]) ) != 1)
+            for (vertex_idx succ : G_coarse.children(bestNeighbor)) {
+                if (std::abs(static_cast<int>(topLevel[succ]) - static_cast<int>(maxTopLevel[newLead])) != 1
+                    && std::abs(static_cast<int>(topLevel[succ]) - static_cast<int>(minTopLevel[newLead])) != 1) {
                     continue;
+                }
 
-                if(nrBadNeighbors[succ]==0)
-                {
+                if (nrBadNeighbors[succ] == 0) {
                     ++nrBadNeighbors[succ];
                     leaderBadNeighbors[succ] = newLead;
-                }
-                else if(nrBadNeighbors[succ] == 1 && leaderBadNeighbors[succ] != newLead)
+                } else if (nrBadNeighbors[succ] == 1 && leaderBadNeighbors[succ] != newLead) {
                     ++nrBadNeighbors[succ];
+                }
             }
             singleton[bestNeighbor] = false;
         }
@@ -709,290 +715,312 @@ std::vector<std::pair<vertex_idx_t<Graph_t>, vertex_idx_t<Graph_t> > > StepBySte
     return contractionSteps;
 }
 
-template<typename Graph_t> 
-std::vector<unsigned> StepByStepCoarser<Graph_t>::ComputeFilteredTopLevel() const
-{
+template <typename Graph_t>
+std::vector<unsigned> StepByStepCoarser<Graph_t>::ComputeFilteredTopLevel() const {
     std::vector<unsigned> TopLevel(G_full.num_vertices());
     for (const vertex_idx node : top_sort_view(G_coarse)) {
-        if(!node_valid[node])
+        if (!node_valid[node]) {
             continue;
+        }
 
         TopLevel[node] = 0;
-        for (const vertex_idx pred: G_coarse.parents(node) )
+        for (const vertex_idx pred : G_coarse.parents(node)) {
             TopLevel[node] = std::max(TopLevel[node], TopLevel[pred] + 1);
-
+        }
     }
     return TopLevel;
 }
 
-template<typename Graph_t> 
+template <typename Graph_t>
 void StepByStepCoarser<Graph_t>::ComputeFilteredTopOrderIdx() {
     top_order_idx = GetFilteredTopOrderIdx(G_coarse, node_valid);
 }
 
-template<typename Graph_t> 
-std::vector<vertex_idx_t<Graph_t> > StepByStepCoarser<Graph_t>::GetFilteredTopOrderIdx(const Graph_t& G, const std::vector<bool>& is_valid) {
+template <typename Graph_t>
+std::vector<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::GetFilteredTopOrderIdx(const Graph_t &G,
+                                                                                      const std::vector<bool> &is_valid) {
     std::vector<vertex_idx> top_order = GetFilteredTopOrder(is_valid, G);
     std::vector<vertex_idx> idx(G.num_vertices());
-    for (vertex_idx node = 0; node < top_order.size(); ++node)
+    for (vertex_idx node = 0; node < top_order.size(); ++node) {
         idx[top_order[node]] = node;
+    }
     return idx;
 }
 
-
-template<typename Graph_t> 
-void StepByStepCoarser<Graph_t>::coarsenForPebbling(const Graph_t& dag_in, Graph_t &coarsened_dag,
-                           std::vector<vertex_idx_t<Graph_t>> &new_vertex_id)
-{
-
+template <typename Graph_t>
+void StepByStepCoarser<Graph_t>::coarsenForPebbling(const Graph_t &dag_in,
+                                                    Graph_t &coarsened_dag,
+                                                    std::vector<vertex_idx_t<Graph_t>> &new_vertex_id) {
     problem_type = PROBLEM_TYPE::PEBBLING;
     coarsening_strategy = COARSENING_STRATEGY::EDGE_BY_EDGE;
 
     unsigned nr_sources = 0;
-    for(vertex_idx node = 0; node < dag_in.num_vertices(); ++node)
-        if(dag_in.in_degree(node) == 0)
+    for (vertex_idx node = 0; node < dag_in.num_vertices(); ++node) {
+        if (dag_in.in_degree(node) == 0) {
             ++nr_sources;
+        }
+    }
 
     target_nr_of_nodes = std::max(target_nr_of_nodes, nr_sources + 1);
 
     CoarserGenContractionMap<Graph_t, Graph_t>::coarsenDag(dag_in, coarsened_dag, new_vertex_id);
 }
 
-template<typename Graph_t> 
-bool StepByStepCoarser<Graph_t>::IncontractableForPebbling(const std::pair<vertex_idx, vertex_idx>& edge) const
-{
-    if(G_coarse.in_degree(edge.first) == 0)
+template <typename Graph_t>
+bool StepByStepCoarser<Graph_t>::IncontractableForPebbling(const std::pair<vertex_idx, vertex_idx> &edge) const {
+    if (G_coarse.in_degree(edge.first) == 0) {
         return true;
+    }
 
     v_memw_t<Graph_t> sum_weight = G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second);
     std::set<vertex_idx> parents;
-    for(vertex_idx pred : G_coarse.parents(edge.first))
+    for (vertex_idx pred : G_coarse.parents(edge.first)) {
         parents.insert(pred);
-    for(vertex_idx pred : G_coarse.parents(edge.second))
-        if(pred != edge.first)
+    }
+    for (vertex_idx pred : G_coarse.parents(edge.second)) {
+        if (pred != edge.first) {
             parents.insert(pred);
-    for(vertex_idx node : parents)
+        }
+    }
+    for (vertex_idx node : parents) {
         sum_weight += G_coarse.vertex_mem_weight(node);
+    }
 
-    if(sum_weight > fast_mem_capacity)
+    if (sum_weight > fast_mem_capacity) {
         return true;
-    
+    }
+
     std::set<vertex_idx> children;
-    for(vertex_idx succ: G_coarse.children(edge.second))
+    for (vertex_idx succ : G_coarse.children(edge.second)) {
         children.insert(succ);
-    for(vertex_idx succ: G_coarse.children(edge.first))
-        if(succ != edge.second)
+    }
+    for (vertex_idx succ : G_coarse.children(edge.first)) {
+        if (succ != edge.second) {
             children.insert(succ);
+        }
+    }
 
-    for(vertex_idx child : children)
-    {
-        sum_weight = G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second) + G_coarse.vertex_mem_weight(child);
-        for(vertex_idx pred: G_coarse.parents(child))
-        {
-            if(pred != edge.first && pred != edge.second)
+    for (vertex_idx child : children) {
+        sum_weight = G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second)
+                     + G_coarse.vertex_mem_weight(child);
+        for (vertex_idx pred : G_coarse.parents(child)) {
+            if (pred != edge.first && pred != edge.second) {
                 sum_weight += G_coarse.vertex_mem_weight(pred);
+            }
         }
-        
-        if(sum_weight > fast_mem_capacity)
+
+        if (sum_weight > fast_mem_capacity) {
             return true;
+        }
     }
     return false;
 }
 
-template<typename Graph_t> 
-void StepByStepCoarser<Graph_t>::MergeSourcesInPebbling()
-{
+template <typename Graph_t>
+void StepByStepCoarser<Graph_t>::MergeSourcesInPebbling() {
     // initialize memory requirement sums to check viability later
-    std::vector<v_memw_t<Graph_t> > memory_sum(G_coarse.num_vertices(), 0);
+    std::vector<v_memw_t<Graph_t>> memory_sum(G_coarse.num_vertices(), 0);
     std::vector<vertex_idx> sources;
-    for(vertex_idx node = 0; node < G_coarse.num_vertices(); ++node)
-    {
-        if(!node_valid[node])
+    for (vertex_idx node = 0; node < G_coarse.num_vertices(); ++node) {
+        if (!node_valid[node]) {
             continue;
+        }
 
-        if(G_coarse.in_degree(node)>0)
-        {
+        if (G_coarse.in_degree(node) > 0) {
             memory_sum[node] = G_coarse.vertex_mem_weight(node);
-            for(vertex_idx pred: G_coarse.parents(node))
+            for (vertex_idx pred : G_coarse.parents(node)) {
                 memory_sum[node] += G_coarse.vertex_mem_weight(pred);
-        }
-        else 
+            }
+        } else {
             sources.push_back(node);
+        }
     }
-    
+
     std::set<vertex_idx> invalidated_sources;
     bool could_merge = true;
-    while(could_merge)
-    {
+    while (could_merge) {
         could_merge = false;
-        for(unsigned idx1 = 0; idx1 < sources.size(); ++idx1)
-        {
+        for (unsigned idx1 = 0; idx1 < sources.size(); ++idx1) {
             vertex_idx source_a = sources[idx1];
-            if(invalidated_sources.find(source_a) != invalidated_sources.end())
+            if (invalidated_sources.find(source_a) != invalidated_sources.end()) {
                 continue;
-            
-            for(unsigned idx2 = idx1 + 1; idx2 < sources.size(); ++idx2)
-            {
+            }
+
+            for (unsigned idx2 = idx1 + 1; idx2 < sources.size(); ++idx2) {
                 vertex_idx source_b = sources[idx2];
-                if(invalidated_sources.find(source_b) != invalidated_sources.end())
+                if (invalidated_sources.find(source_b) != invalidated_sources.end()) {
                     continue;
-                
+                }
+
                 // check if we can merge source_a and source_b
                 std::set<vertex_idx> a_children, b_children;
-                for(vertex_idx succ: G_coarse.children(source_a))
+                for (vertex_idx succ : G_coarse.children(source_a)) {
                     a_children.insert(succ);
-                for(vertex_idx succ: G_coarse.children(source_b))
+                }
+                for (vertex_idx succ : G_coarse.children(source_b)) {
                     b_children.insert(succ);
-                
+                }
+
                 std::set<vertex_idx> only_a, only_b, both;
-                for(vertex_idx succ: G_coarse.children(source_a))
-                {
-                    if(b_children.find(succ) == b_children.end())
+                for (vertex_idx succ : G_coarse.children(source_a)) {
+                    if (b_children.find(succ) == b_children.end()) {
                         only_a.insert(succ);
-                    else
+                    } else {
                         both.insert(succ);
+                    }
                 }
-                for(vertex_idx succ: G_coarse.children(source_b))
-                {
-                    if(a_children.find(succ) == a_children.end())
+                for (vertex_idx succ : G_coarse.children(source_b)) {
+                    if (a_children.find(succ) == a_children.end()) {
                         only_b.insert(succ);
+                    }
                 }
 
                 bool violates_constraint = false;
-                for(vertex_idx node : only_a)
-                    if(memory_sum[node] + G_coarse.vertex_mem_weight(source_b) > fast_mem_capacity)
+                for (vertex_idx node : only_a) {
+                    if (memory_sum[node] + G_coarse.vertex_mem_weight(source_b) > fast_mem_capacity) {
                         violates_constraint = true;
-                for(vertex_idx node : only_b)
-                    if(memory_sum[node] + G_coarse.vertex_mem_weight(source_a) > fast_mem_capacity)
+                    }
+                }
+                for (vertex_idx node : only_b) {
+                    if (memory_sum[node] + G_coarse.vertex_mem_weight(source_a) > fast_mem_capacity) {
                         violates_constraint = true;
+                    }
+                }
 
-                if(violates_constraint)
+                if (violates_constraint) {
                     continue;
+                }
 
                 // check if we want to merge source_a and source_b
-                double sim_diff = (only_a.size() + only_b.size() == 0) ? 0.0001 : static_cast<double>(only_a.size() + only_b.size());
+                double sim_diff = (only_a.size() + only_b.size() == 0) ? 0.0001
+                                                                       : static_cast<double>(only_a.size() + only_b.size());
                 double ratio = static_cast<double>(both.size()) / sim_diff;
-                
-                if(ratio > 2)
-                {
+
+                if (ratio > 2) {
                     ContractSingleEdge(std::make_pair(source_a, source_b));
                     invalidated_sources.insert(source_b);
                     could_merge = true;
 
-                    for(vertex_idx node : only_a)
+                    for (vertex_idx node : only_a) {
                         memory_sum[node] += G_coarse.vertex_mem_weight(source_b);
-                    for(vertex_idx node : only_b)
+                    }
+                    for (vertex_idx node : only_b) {
                         memory_sum[node] += G_coarse.vertex_mem_weight(source_a);
+                    }
                 }
             }
         }
     }
 }
 
-template<typename Graph_t> 
-Graph_t StepByStepCoarser<Graph_t>::Contract(const std::vector<vertex_idx_t<Graph_t>> &new_vertex_id) const
-{
+template <typename Graph_t>
+Graph_t StepByStepCoarser<Graph_t>::Contract(const std::vector<vertex_idx_t<Graph_t>> &new_vertex_id) const {
     Graph_t G_contracted;
     std::vector<bool> is_valid(G_full.num_vertices(), false);
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
         is_valid[new_vertex_id[node]] = true;
+    }
 
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
-        if(is_valid[node])
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
+        if (is_valid[node]) {
             G_contracted.add_vertex(0, 0, 0, 0);
+        }
+    }
 
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
-    {
-        G_contracted.set_vertex_work_weight(new_vertex_id[node],
-                G_contracted.vertex_work_weight(new_vertex_id[node]) + G_full.vertex_work_weight(node));
-        G_contracted.set_vertex_comm_weight(new_vertex_id[node],
-                G_contracted.vertex_comm_weight(new_vertex_id[node]) + G_full.vertex_comm_weight(node));
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
+        G_contracted.set_vertex_work_weight(
+            new_vertex_id[node], G_contracted.vertex_work_weight(new_vertex_id[node]) + G_full.vertex_work_weight(node));
+        G_contracted.set_vertex_comm_weight(
+            new_vertex_id[node], G_contracted.vertex_comm_weight(new_vertex_id[node]) + G_full.vertex_comm_weight(node));
         G_contracted.set_vertex_mem_weight(new_vertex_id[node],
-                G_contracted.vertex_mem_weight(new_vertex_id[node]) + G_full.vertex_mem_weight(node));
+                                           G_contracted.vertex_mem_weight(new_vertex_id[node]) + G_full.vertex_mem_weight(node));
         G_contracted.set_vertex_type(new_vertex_id[node], G_full.vertex_type(node));
     }
 
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
-        for (const auto &out_edge : out_edges(node, G_full))
-        {
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
+        for (const auto &out_edge : out_edges(node, G_full)) {
             const vertex_idx succ = target(out_edge, G_full);
 
-            if (new_vertex_id[node] == new_vertex_id[succ])
+            if (new_vertex_id[node] == new_vertex_id[succ]) {
                 continue;
-            
-            if constexpr (has_edge_weights_v<Graph_t>) {
+            }
 
+            if constexpr (has_edge_weights_v<Graph_t>) {
                 const auto pair = edge_desc(new_vertex_id[node], new_vertex_id[succ], G_contracted);
 
                 if (pair.second) {
-                    G_contracted.set_edge_comm_weight(pair.first, G_contracted.edge_comm_weight(pair.first) +
-                                                                    G_full.edge_comm_weight(out_edge));
+                    G_contracted.set_edge_comm_weight(
+                        pair.first, G_contracted.edge_comm_weight(pair.first) + G_full.edge_comm_weight(out_edge));
                 } else {
                     G_contracted.add_edge(new_vertex_id[node], new_vertex_id[succ], G_full.edge_comm_weight(out_edge));
                 }
 
             } else {
-
                 if (not edge(new_vertex_id[node], new_vertex_id[succ], G_contracted)) {
                     G_contracted.add_edge(new_vertex_id[node], new_vertex_id[succ]);
                 }
             }
         }
-    
+    }
+
     return G_contracted;
 }
 
-template<typename Graph_t> 
-void StepByStepCoarser<Graph_t>::SetIdVector(std::vector<vertex_idx_t<Graph_t>> &new_vertex_id) const
-{
+template <typename Graph_t>
+void StepByStepCoarser<Graph_t>::SetIdVector(std::vector<vertex_idx_t<Graph_t>> &new_vertex_id) const {
     new_vertex_id.clear();
     new_vertex_id.resize(G_full.num_vertices());
 
     new_vertex_id = GetIntermediateIDs(contractionHistory.size());
 }
 
-template<typename Graph_t>
-std::vector<vertex_idx_t<Graph_t> > StepByStepCoarser<Graph_t>::GetIntermediateIDs(vertex_idx until_which_step) const {
-    
+template <typename Graph_t>
+std::vector<vertex_idx_t<Graph_t>> StepByStepCoarser<Graph_t>::GetIntermediateIDs(vertex_idx until_which_step) const {
     std::vector<vertex_idx> target(G_full.num_vertices()), pointsTo(G_full.num_vertices(), std::numeric_limits<vertex_idx>::max());
 
-    for(vertex_idx iterate = 0; iterate < contractionHistory.size() && iterate < until_which_step; ++iterate)
-    {
-        const std::pair<vertex_idx, vertex_idx>& contractionStep = contractionHistory[iterate];
+    for (vertex_idx iterate = 0; iterate < contractionHistory.size() && iterate < until_which_step; ++iterate) {
+        const std::pair<vertex_idx, vertex_idx> &contractionStep = contractionHistory[iterate];
         pointsTo[contractionStep.second] = contractionStep.first;
     }
 
     for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
         target[node] = node;
-        while (pointsTo[target[node]] != std::numeric_limits<vertex_idx>::max())
+        while (pointsTo[target[node]] != std::numeric_limits<vertex_idx>::max()) {
             target[node] = pointsTo[target[node]];
+        }
     }
 
-    if (contractionHistory.empty() || until_which_step == 0) 
+    if (contractionHistory.empty() || until_which_step == 0) {
         return target;
+    }
 
     std::vector<bool> is_valid(G_full.num_vertices(), false);
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
         is_valid[target[node]] = true;
+    }
 
     std::vector<vertex_idx> new_id(G_full.num_vertices());
     vertex_idx current_index = 0;
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
-        if(is_valid[node])
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
+        if (is_valid[node]) {
             new_id[node] = current_index++;
+        }
+    }
 
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
         target[node] = new_id[target[node]];
+    }
 
     boost_graph_t temp_dag;
     temp_dag = Contract(target);
     std::vector<bool> all_valid(temp_dag.num_vertices(), true);
     std::vector<vertex_idx> top_idx = GetFilteredTopOrderIdx(temp_dag, all_valid);
 
-    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node)
+    for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) {
         target[node] = top_idx[target[node]];
+    }
 
     return target;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/coarser/coarser_util.hpp b/include/osp/coarser/coarser_util.hpp
index e3e7fa79..e2b1fa31 100644
--- a/include/osp/coarser/coarser_util.hpp
+++ b/include/osp/coarser/coarser_util.hpp
@@ -31,9 +31,10 @@ limitations under the License.
 #include "osp/concepts/specific_graph_impl.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 
-namespace osp { namespace coarser_util {
+namespace osp {
+namespace coarser_util {
 
-template<typename Graph_t_out>
+template <typename Graph_t_out>
 bool check_valid_contraction_map(const std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map) {
     std::set<vertex_idx_t<Graph_t_out>> image(vertex_contraction_map.cbegin(), vertex_contraction_map.cend());
     const vertex_idx_t<Graph_t_out> image_size = static_cast<vertex_idx_t<Graph_t_out>>(image.size());
@@ -42,15 +43,13 @@ bool check_valid_contraction_map(const std::vector<vertex_idx_t<Graph_t_out>> &v
     });
 }
 
-template<typename T>
+template <typename T>
 struct acc_sum {
-
     T operator()(const T &a, const T &b) { return a + b; }
 };
 
-template<typename T>
+template <typename T>
 struct acc_max {
-
     T operator()(const T &a, const T &b) { return std::max(a, b); }
 };
 
@@ -63,13 +62,15 @@ struct acc_max {
  * @return A status code indicating the success or failure of the coarsening operation.
  */
 
-template <typename Graph_t_in, class Graph_t_out, typename v_work_acc_method, typename v_comm_acc_method,
-          typename v_mem_acc_method, typename e_comm_acc_method>
-bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
+template <typename Graph_t_in, class Graph_t_out, typename v_work_acc_method, typename v_comm_acc_method, typename v_mem_acc_method, typename e_comm_acc_method>
+bool construct_coarse_dag(const Graph_t_in &dag_in,
+                          Graph_t_out &coarsened_dag,
                           const std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map) {
-    static_assert(is_directed_graph_v<Graph_t_in> && is_directed_graph_v<Graph_t_out>, "Graph types need to satisfy the is_directed_graph concept.");
+    static_assert(is_directed_graph_v<Graph_t_in> && is_directed_graph_v<Graph_t_out>,
+                  "Graph types need to satisfy the is_directed_graph concept.");
     static_assert(is_computational_dag_v<Graph_t_in>, "Graph_t_in must be a computational DAG");
-    static_assert(is_constructable_cdag_v<Graph_t_out> || is_direct_constructable_cdag_v<Graph_t_out>, "Graph_t_out must be a (direct) constructable computational DAG");
+    static_assert(is_constructable_cdag_v<Graph_t_out> || is_direct_constructable_cdag_v<Graph_t_out>,
+                  "Graph_t_out must be a (direct) constructable computational DAG");
 
     assert(check_valid_contraction_map<Graph_t_out>(vertex_contraction_map));
 
@@ -79,8 +80,8 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
     }
 
     if constexpr (is_direct_constructable_cdag_v<Graph_t_out>) {
-        const vertex_idx_t<Graph_t_out> num_vert_quotient =
-            (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1;
+        const vertex_idx_t<Graph_t_out> num_vert_quotient
+            = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1;
 
         std::set<std::pair<vertex_idx_t<Graph_t_out>, vertex_idx_t<Graph_t_out>>> quotient_edges;
 
@@ -96,9 +97,12 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
         coarsened_dag = Graph_t_out(num_vert_quotient, quotient_edges);
 
         if constexpr (has_vertex_weights_v<Graph_t_in> && is_modifiable_cdag_vertex_v<Graph_t_out>) {
-            static_assert(std::is_same_v<v_workw_t<Graph_t_in>, v_workw_t<Graph_t_out>>, "Work weight types of in-graph and out-graph must be the same.");
-            static_assert(std::is_same_v<v_commw_t<Graph_t_in>, v_commw_t<Graph_t_out>>, "Vertex communication types of in-graph and out-graph must be the same.");
-            static_assert(std::is_same_v<v_memw_t<Graph_t_in>, v_memw_t<Graph_t_out>>, "Memory weight types of in-graph and out-graph must be the same.");
+            static_assert(std::is_same_v<v_workw_t<Graph_t_in>, v_workw_t<Graph_t_out>>,
+                          "Work weight types of in-graph and out-graph must be the same.");
+            static_assert(std::is_same_v<v_commw_t<Graph_t_in>, v_commw_t<Graph_t_out>>,
+                          "Vertex communication types of in-graph and out-graph must be the same.");
+            static_assert(std::is_same_v<v_memw_t<Graph_t_in>, v_memw_t<Graph_t_out>>,
+                          "Memory weight types of in-graph and out-graph must be the same.");
 
             for (const vertex_idx_t<Graph_t_in> &vert : coarsened_dag.vertices()) {
                 coarsened_dag.set_vertex_work_weight(vert, 0);
@@ -110,17 +114,17 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
                 coarsened_dag.set_vertex_work_weight(
                     vertex_contraction_map[vert],
                     v_work_acc_method()(coarsened_dag.vertex_work_weight(vertex_contraction_map[vert]),
-                                    dag_in.vertex_work_weight(vert)));
+                                        dag_in.vertex_work_weight(vert)));
 
                 coarsened_dag.set_vertex_comm_weight(
                     vertex_contraction_map[vert],
                     v_comm_acc_method()(coarsened_dag.vertex_comm_weight(vertex_contraction_map[vert]),
-                                    dag_in.vertex_comm_weight(vert)));
+                                        dag_in.vertex_comm_weight(vert)));
 
                 coarsened_dag.set_vertex_mem_weight(
                     vertex_contraction_map[vert],
                     v_mem_acc_method()(coarsened_dag.vertex_mem_weight(vertex_contraction_map[vert]),
-                                    dag_in.vertex_mem_weight(vert)));
+                                       dag_in.vertex_mem_weight(vert)));
             }
         }
 
@@ -139,7 +143,7 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
 
         if constexpr (has_edge_weights_v<Graph_t_in> && is_modifiable_cdag_comm_edge_v<Graph_t_out>) {
             static_assert(std::is_same_v<e_commw_t<Graph_t_in>, e_commw_t<Graph_t_out>>,
-                        "Edge weight type of in graph and out graph must be the same!");
+                          "Edge weight type of in graph and out graph must be the same!");
 
             for (const auto &edge : edges(coarsened_dag)) {
                 coarsened_dag.set_edge_comm_weight(edge, 0);
@@ -148,43 +152,42 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
             for (const auto &ori_edge : edges(dag_in)) {
                 vertex_idx_t<Graph_t_out> src = vertex_contraction_map[source(ori_edge, dag_in)];
                 vertex_idx_t<Graph_t_out> tgt = vertex_contraction_map[target(ori_edge, dag_in)];
-                
-                if (src == tgt) continue;
+
+                if (src == tgt) {
+                    continue;
+                }
 
                 const auto [cont_edge, found] = edge_desc(src, tgt, coarsened_dag);
                 assert(found && "The edge should already exist");
-                coarsened_dag.set_edge_comm_weight(cont_edge, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge)));
+                coarsened_dag.set_edge_comm_weight(
+                    cont_edge, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge)));
             }
-        }        
+        }
         return true;
     }
 
     if constexpr (is_constructable_cdag_v<Graph_t_out>) {
         coarsened_dag = Graph_t_out();
 
-        const vertex_idx_t<Graph_t_out> num_vert_quotient =
-            (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1;
+        const vertex_idx_t<Graph_t_out> num_vert_quotient
+            = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1;
 
         for (vertex_idx_t<Graph_t_out> vert = 0; vert < num_vert_quotient; ++vert) {
             coarsened_dag.add_vertex(0, 0, 0);
         }
 
         for (const vertex_idx_t<Graph_t_in> &vert : dag_in.vertices()) {
+            coarsened_dag.set_vertex_work_weight(vertex_contraction_map[vert],
+                                                 v_work_acc_method()(coarsened_dag.vertex_work_weight(vertex_contraction_map[vert]),
+                                                                     dag_in.vertex_work_weight(vert)));
 
-            coarsened_dag.set_vertex_work_weight(
-                vertex_contraction_map[vert],
-                v_work_acc_method()(coarsened_dag.vertex_work_weight(vertex_contraction_map[vert]),
-                                  dag_in.vertex_work_weight(vert)));
-
-            coarsened_dag.set_vertex_comm_weight(
-                vertex_contraction_map[vert],
-                v_comm_acc_method()(coarsened_dag.vertex_comm_weight(vertex_contraction_map[vert]),
-                                  dag_in.vertex_comm_weight(vert)));
+            coarsened_dag.set_vertex_comm_weight(vertex_contraction_map[vert],
+                                                 v_comm_acc_method()(coarsened_dag.vertex_comm_weight(vertex_contraction_map[vert]),
+                                                                     dag_in.vertex_comm_weight(vert)));
 
             coarsened_dag.set_vertex_mem_weight(
                 vertex_contraction_map[vert],
-                v_mem_acc_method()(coarsened_dag.vertex_mem_weight(vertex_contraction_map[vert]),
-                                 dag_in.vertex_mem_weight(vert)));
+                v_mem_acc_method()(coarsened_dag.vertex_mem_weight(vertex_contraction_map[vert]), dag_in.vertex_mem_weight(vert)));
         }
 
         if constexpr (has_typed_vertices_v<Graph_t_in> && is_constructable_cdag_typed_vertex_v<Graph_t_out>) {
@@ -211,15 +214,14 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
                                   "Edge weight type of in graph and out graph must be the same!");
 
                     edge_desc_t<Graph_t_in> ori_edge = edge_desc(vert, chld, dag_in).first;
-                    const auto pair =
-                        edge_desc(vertex_contraction_map[vert], vertex_contraction_map[chld], coarsened_dag);
+                    const auto pair = edge_desc(vertex_contraction_map[vert], vertex_contraction_map[chld], coarsened_dag);
                     if (pair.second) {
-                        coarsened_dag.set_edge_comm_weight(pair.first,
-                                                           e_comm_acc_method()(coarsened_dag.edge_comm_weight(pair.first),
-                                                                             dag_in.edge_comm_weight(ori_edge)));
+                        coarsened_dag.set_edge_comm_weight(
+                            pair.first,
+                            e_comm_acc_method()(coarsened_dag.edge_comm_weight(pair.first), dag_in.edge_comm_weight(ori_edge)));
                     } else {
-                        coarsened_dag.add_edge(vertex_contraction_map[vert], vertex_contraction_map[chld],
-                                               dag_in.edge_comm_weight(ori_edge));
+                        coarsened_dag.add_edge(
+                            vertex_contraction_map[vert], vertex_contraction_map[chld], dag_in.edge_comm_weight(ori_edge));
                     }
                 } else {
                     if (not edge(vertex_contraction_map[vert], vertex_contraction_map[chld], coarsened_dag)) {
@@ -233,13 +235,15 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
     return false;
 }
 
-template<typename Graph_t_in, class Graph_t_out, typename v_work_acc_method = acc_sum<v_workw_t<Graph_t_in>>,
-         typename v_comm_acc_method = acc_sum<v_commw_t<Graph_t_in>>,
-         typename v_mem_acc_method = acc_sum<v_memw_t<Graph_t_in>>,
-         typename e_comm_acc_method = acc_sum<e_commw_t<Graph_t_in>>>
-bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
+template <typename Graph_t_in,
+          class Graph_t_out,
+          typename v_work_acc_method = acc_sum<v_workw_t<Graph_t_in>>,
+          typename v_comm_acc_method = acc_sum<v_commw_t<Graph_t_in>>,
+          typename v_mem_acc_method = acc_sum<v_memw_t<Graph_t_in>>,
+          typename e_comm_acc_method = acc_sum<e_commw_t<Graph_t_in>>>
+bool construct_coarse_dag(const Graph_t_in &dag_in,
+                          Graph_t_out &coarsened_dag,
                           std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map) {
-
     if constexpr (is_Compact_Sparse_Graph_reorder_v<Graph_t_out>) {
         static_assert(is_directed_graph_v<Graph_t_in> && is_directed_graph_v<Graph_t_out>,
                       "Graph types need to satisfy the is_directed_graph concept.");
@@ -253,8 +257,8 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
             coarsened_dag = Graph_t_out();
             return true;
         }
-        const vertex_idx_t<Graph_t_out> num_vert_quotient =
-            (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1;
+        const vertex_idx_t<Graph_t_out> num_vert_quotient
+            = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1;
 
         std::set<std::pair<vertex_idx_t<Graph_t_out>, vertex_idx_t<Graph_t_out>>> quotient_edges;
 
@@ -269,16 +273,19 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
 
         coarsened_dag = Graph_t_out(num_vert_quotient, quotient_edges);
 
-        const auto& pushforward_map = coarsened_dag.get_pushforward_permutation();
+        const auto &pushforward_map = coarsened_dag.get_pushforward_permutation();
         std::vector<vertex_idx_t<Graph_t_out>> combined_expansion_map(dag_in.num_vertices());
         for (const auto &vert : dag_in.vertices()) {
             combined_expansion_map[vert] = pushforward_map[vertex_contraction_map[vert]];
         }
 
         if constexpr (has_vertex_weights_v<Graph_t_in> && is_modifiable_cdag_vertex_v<Graph_t_out>) {
-            static_assert(std::is_same_v<v_workw_t<Graph_t_in>, v_workw_t<Graph_t_out>>, "Work weight types of in-graph and out-graph must be the same.");
-            static_assert(std::is_same_v<v_commw_t<Graph_t_in>, v_commw_t<Graph_t_out>>, "Vertex communication types of in-graph and out-graph must be the same.");
-            static_assert(std::is_same_v<v_memw_t<Graph_t_in>, v_memw_t<Graph_t_out>>, "Memory weight types of in-graph and out-graph must be the same.");
+            static_assert(std::is_same_v<v_workw_t<Graph_t_in>, v_workw_t<Graph_t_out>>,
+                          "Work weight types of in-graph and out-graph must be the same.");
+            static_assert(std::is_same_v<v_commw_t<Graph_t_in>, v_commw_t<Graph_t_out>>,
+                          "Vertex communication types of in-graph and out-graph must be the same.");
+            static_assert(std::is_same_v<v_memw_t<Graph_t_in>, v_memw_t<Graph_t_out>>,
+                          "Memory weight types of in-graph and out-graph must be the same.");
 
             for (const vertex_idx_t<Graph_t_in> &vert : coarsened_dag.vertices()) {
                 coarsened_dag.set_vertex_work_weight(vert, 0);
@@ -290,23 +297,23 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
                 coarsened_dag.set_vertex_work_weight(
                     vertex_contraction_map[vert],
                     v_work_acc_method()(coarsened_dag.vertex_work_weight(combined_expansion_map[vert]),
-                                    dag_in.vertex_work_weight(vert)));
+                                        dag_in.vertex_work_weight(vert)));
 
                 coarsened_dag.set_vertex_comm_weight(
                     vertex_contraction_map[vert],
                     v_comm_acc_method()(coarsened_dag.vertex_comm_weight(combined_expansion_map[vert]),
-                                    dag_in.vertex_comm_weight(vert)));
+                                        dag_in.vertex_comm_weight(vert)));
 
                 coarsened_dag.set_vertex_mem_weight(
                     vertex_contraction_map[vert],
                     v_mem_acc_method()(coarsened_dag.vertex_mem_weight(combined_expansion_map[vert]),
-                                    dag_in.vertex_mem_weight(vert)));
+                                       dag_in.vertex_mem_weight(vert)));
             }
         }
 
         if constexpr (has_typed_vertices_v<Graph_t_in> && is_modifiable_cdag_typed_vertex_v<Graph_t_out>) {
             static_assert(std::is_same_v<v_type_t<Graph_t_in>, v_type_t<Graph_t_out>>,
-                            "Vertex type types of in graph and out graph must be the same!");
+                          "Vertex type types of in graph and out graph must be the same!");
 
             for (const vertex_idx_t<Graph_t_in> &vert : dag_in.vertices()) {
                 coarsened_dag.set_vertex_type(vertex_contraction_map[vert], dag_in.vertex_type(vert));
@@ -317,16 +324,17 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
             //                 && "Contracted vertices must be of the same type");
         }
 
-
         if constexpr (has_edge_weights_v<Graph_t_in> && has_edge_weights_v<Graph_t_out>) {
             static_assert(std::is_same_v<e_commw_t<Graph_t_in>, e_commw_t<Graph_t_out>>,
-                        "Edge weight type of in graph and out graph must be the same!");
-            
+                          "Edge weight type of in graph and out graph must be the same!");
+
             for (const auto &ori_edge : edges(dag_in)) {
                 vertex_idx_t<Graph_t_out> src = vertex_contraction_map[source(ori_edge, dag_in)];
                 vertex_idx_t<Graph_t_out> tgt = vertex_contraction_map[target(ori_edge, dag_in)];
-                
-                if (src == tgt) continue;
+
+                if (src == tgt) {
+                    continue;
+                }
 
                 coarsened_dag.set_edge_comm_weight(src, tgt, 0);
             }
@@ -334,27 +342,28 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag,
             for (const auto &ori_edge : edges(dag_in)) {
                 vertex_idx_t<Graph_t_out> src = vertex_contraction_map[source(ori_edge, dag_in)];
                 vertex_idx_t<Graph_t_out> tgt = vertex_contraction_map[target(ori_edge, dag_in)];
-                
-                if (src == tgt) continue;
+
+                if (src == tgt) {
+                    continue;
+                }
 
                 const auto cont_edge = coarsened_dag.edge(pushforward_map[src], pushforward_map[tgt]);
-                assert(source(cont_edge, coarsened_dag) == pushforward_map[src] && target(cont_edge, coarsened_dag) == pushforward_map[tgt]);
-                coarsened_dag.set_edge_comm_weight(src, tgt, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge)));
+                assert(source(cont_edge, coarsened_dag) == pushforward_map[src]
+                       && target(cont_edge, coarsened_dag) == pushforward_map[tgt]);
+                coarsened_dag.set_edge_comm_weight(
+                    src, tgt, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge)));
             }
         }
 
         std::swap(vertex_contraction_map, combined_expansion_map);
         return true;
     } else {
-        return construct_coarse_dag<Graph_t_in, Graph_t_out, v_work_acc_method, v_comm_acc_method, v_mem_acc_method,
-                                    e_comm_acc_method>(dag_in, coarsened_dag,
-                                                       static_cast<const std::vector<vertex_idx_t<Graph_t_out>> &>(
-                                                           vertex_contraction_map));
+        return construct_coarse_dag<Graph_t_in, Graph_t_out, v_work_acc_method, v_comm_acc_method, v_mem_acc_method, e_comm_acc_method>(
+            dag_in, coarsened_dag, static_cast<const std::vector<vertex_idx_t<Graph_t_out>> &>(vertex_contraction_map));
     }
 }
 
-
-template<typename Graph_t_in>
+template <typename Graph_t_in>
 bool check_valid_expansion_map(const std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &vertex_expansion_map) {
     std::size_t cntr = 0;
 
@@ -385,15 +394,15 @@ bool check_valid_expansion_map(const std::vector<std::vector<vertex_idx_t<Graph_
     return (cntr == preImage.size());
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector<std::vector<vertex_idx_t<Graph_t_in>>>
-invert_vertex_contraction_map(const std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map) {
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<std::vector<vertex_idx_t<Graph_t_in>>> invert_vertex_contraction_map(
+    const std::vector<vertex_idx_t<Graph_t_out>> &vertex_contraction_map) {
     assert(check_valid_contraction_map<Graph_t_out>(vertex_contraction_map));
 
-
-
-    vertex_idx_t<Graph_t_out> num_vert = vertex_contraction_map.size() == 0? 0 :
-        *std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend()) + 1;
+    vertex_idx_t<Graph_t_out> num_vert
+        = vertex_contraction_map.size() == 0
+              ? 0
+              : *std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend()) + 1;
 
     std::vector<std::vector<vertex_idx_t<Graph_t_in>>> expansion_map(num_vert);
 
@@ -404,9 +413,9 @@ invert_vertex_contraction_map(const std::vector<vertex_idx_t<Graph_t_out>> &vert
     return expansion_map;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector<vertex_idx_t<Graph_t_out>>
-invert_vertex_expansion_map(const std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &vertex_expansion_map) {
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<vertex_idx_t<Graph_t_out>> invert_vertex_expansion_map(
+    const std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &vertex_expansion_map) {
     assert(check_valid_expansion_map<Graph_t_in>(vertex_expansion_map));
 
     vertex_idx_t<Graph_t_in> num_vert = 0;
@@ -426,8 +435,7 @@ invert_vertex_expansion_map(const std::vector<std::vector<vertex_idx_t<Graph_t_i
     return vertex_contraction_map;
 }
 
-
-template<typename Graph_t_in>
+template <typename Graph_t_in>
 void reorder_expansion_map(const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &vertex_expansion_map) {
     assert(check_valid_expansion_map<Graph_t_in>(vertex_expansion_map));
 
@@ -452,7 +460,7 @@ void reorder_expansion_map(const Graph_t_in &graph, std::vector<std::vector<vert
     }
 
     auto cmp = [&vertex_expansion_map](const std::size_t &lhs, const std::size_t &rhs) {
-        return vertex_expansion_map[lhs] > vertex_expansion_map[rhs];                       // because priority queue is a max_priority queue
+        return vertex_expansion_map[lhs] > vertex_expansion_map[rhs];    // because priority queue is a max_priority queue
     };
 
     std::priority_queue<std::size_t, std::vector<std::size_t>, decltype(cmp)> ready(cmp);
@@ -477,7 +485,7 @@ void reorder_expansion_map(const Graph_t_in &graph, std::vector<std::vector<vert
                         ready.emplace(vertex_contraction_map.at(chld));
                     }
                 }
-            } 
+            }
         }
     }
     assert(topOrder.size() == vertex_expansion_map.size());
@@ -487,17 +495,11 @@ void reorder_expansion_map(const Graph_t_in &graph, std::vector<std::vector<vert
     return;
 }
 
-
-
-
-
-template<typename Graph_t_in, typename Graph_t_out>
+template <typename Graph_t_in, typename Graph_t_out>
 bool pull_back_schedule(const BspSchedule<Graph_t_in> &schedule_in,
                         const std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &vertex_map,
                         BspSchedule<Graph_t_out> &schedule_out) {
-
     for (unsigned v = 0; v < vertex_map.size(); ++v) {
-
         const auto proc = schedule_in.assignedProcessor(v);
         const auto step = schedule_in.assignedSuperstep(v);
 
@@ -510,11 +512,10 @@ bool pull_back_schedule(const BspSchedule<Graph_t_in> &schedule_in,
     return true;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
+template <typename Graph_t_in, typename Graph_t_out>
 bool pull_back_schedule(const BspSchedule<Graph_t_in> &schedule_in,
                         const std::vector<vertex_idx_t<Graph_t_out>> &reverse_vertex_map,
                         BspSchedule<Graph_t_out> &schedule_out) {
-
     for (unsigned idx = 0; idx < reverse_vertex_map.size(); ++idx) {
         const auto &v = reverse_vertex_map[idx];
 
@@ -525,17 +526,18 @@ bool pull_back_schedule(const BspSchedule<Graph_t_in> &schedule_in,
     return true;
 }
 
-template<typename IntegralType>
-std::vector<IntegralType> compose_vertex_contraction_map(const std::vector<IntegralType> &firstMap, const std::vector<IntegralType> &secondMap) {
+template <typename IntegralType>
+std::vector<IntegralType> compose_vertex_contraction_map(const std::vector<IntegralType> &firstMap,
+                                                         const std::vector<IntegralType> &secondMap) {
     static_assert(std::is_integral_v<IntegralType>);
     std::vector<IntegralType> composedMap(firstMap.size());
 
     for (std::size_t i = 0; i < composedMap.size(); ++i) {
-        composedMap[i] = secondMap[ firstMap[i] ];
+        composedMap[i] = secondMap[firstMap[i]];
     }
 
     return composedMap;
 }
 
-} // end namespace coarser_util
-} // end namespace osp
\ No newline at end of file
+}    // end namespace coarser_util
+}    // end namespace osp
diff --git a/include/osp/coarser/funnel/FunnelBfs.hpp b/include/osp/coarser/funnel/FunnelBfs.hpp
index da1c38be..07ba93d0 100644
--- a/include/osp/coarser/funnel/FunnelBfs.hpp
+++ b/include/osp/coarser/funnel/FunnelBfs.hpp
@@ -17,9 +17,10 @@ limitations under the License.
 */
 
 #pragma once
+#include <limits>
+
 #include "osp/coarser/Coarser.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp"
-#include <limits>
 
 namespace osp {
 
@@ -28,16 +29,14 @@ namespace osp {
  * (from outside the group)
  *
  */
-template<typename Graph_t_in, typename Graph_t_out, bool use_architecture_memory_contraints = false>
+template <typename Graph_t_in, typename Graph_t_out, bool use_architecture_memory_contraints = false>
 class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
-
   public:
     /**
      * @brief Parameters for Funnel coarsener
      *
      */
     struct FunnelBfs_parameters {
-
         bool funnel_incoming;
 
         bool use_approx_transitive_reduction;
@@ -49,23 +48,23 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
 
         FunnelBfs_parameters(v_workw_t<Graph_t_in> max_work_weight_ = std::numeric_limits<v_workw_t<Graph_t_in>>::max(),
                              v_memw_t<Graph_t_in> max_memory_weight_ = std::numeric_limits<v_memw_t<Graph_t_in>>::max(),
-                             unsigned max_depth_ = std::numeric_limits<unsigned>::max(), 
+                             unsigned max_depth_ = std::numeric_limits<unsigned>::max(),
                              bool funnel_incoming_ = true,
                              bool use_approx_transitive_reduction_ = true)
-            : funnel_incoming(funnel_incoming_), use_approx_transitive_reduction(use_approx_transitive_reduction_),
-              max_work_weight(max_work_weight_), max_memory_weight(max_memory_weight_), max_depth(max_depth_) {};
+            : funnel_incoming(funnel_incoming_),
+              use_approx_transitive_reduction(use_approx_transitive_reduction_),
+              max_work_weight(max_work_weight_),
+              max_memory_weight(max_memory_weight_),
+              max_depth(max_depth_) {};
 
         ~FunnelBfs_parameters() = default;
     };
 
-    FunnelBfs(FunnelBfs_parameters parameters_ = FunnelBfs_parameters())
-        : parameters(parameters_) {}
+    FunnelBfs(FunnelBfs_parameters parameters_ = FunnelBfs_parameters()) : parameters(parameters_) {}
 
     virtual ~FunnelBfs() = default;
 
-    virtual std::vector<std::vector<vertex_idx_t<Graph_t_in>>>
-    generate_vertex_expansion_map(const Graph_t_in &graph) override {
-
+    virtual std::vector<std::vector<vertex_idx_t<Graph_t_in>>> generate_vertex_expansion_map(const Graph_t_in &graph) override {
         if constexpr (use_architecture_memory_contraints) {
             if (max_memory_per_vertex_type.size() < graph.num_vertex_types()) {
                 throw std::runtime_error("FunnelBfs: max_memory_per_vertex_type has insufficient size.");
@@ -93,23 +92,22 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
     std::vector<v_memw_t<Graph_t_in>> max_memory_per_vertex_type;
 
     void run_in_contraction(const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &partition) {
-
         using vertex_idx_t = vertex_idx_t<Graph_t_in>;
 
         const std::unordered_set<edge_desc_t<Graph_t_in>> edge_mask = parameters.use_approx_transitive_reduction
-                                                                       ? long_edges_in_triangles_parallel(graph)
-                                                                       : std::unordered_set<edge_desc_t<Graph_t_in>>();
+                                                                          ? long_edges_in_triangles_parallel(graph)
+                                                                          : std::unordered_set<edge_desc_t<Graph_t_in>>();
 
         std::vector<bool> visited(graph.num_vertices(), false);
 
         const std::vector<vertex_idx_t> top_order = GetTopOrder(graph);
 
         for (auto rev_top_it = top_order.rbegin(); rev_top_it != top_order.crend(); rev_top_it++) {
-
             const vertex_idx_t &bottom_node = *rev_top_it;
 
-            if (visited[bottom_node])
+            if (visited[bottom_node]) {
                 continue;
+            }
 
             v_workw_t<Graph_t_in> work_weight_of_group = 0;
             v_memw_t<Graph_t_in> memory_weight_of_group = 0;
@@ -123,7 +121,6 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
             unsigned depth_counter = 0;
 
             while ((not vertex_processing_fifo.empty()) || (not next_vertex_processing_fifo.empty())) {
-
                 if (vertex_processing_fifo.empty()) {
                     vertex_processing_fifo = next_vertex_processing_fifo;
                     next_vertex_processing_fifo.clear();
@@ -136,19 +133,23 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                 vertex_idx_t active_node = vertex_processing_fifo.front();
                 vertex_processing_fifo.pop_front();
 
-                if (graph.vertex_type(active_node) != graph.vertex_type(bottom_node))
+                if (graph.vertex_type(active_node) != graph.vertex_type(bottom_node)) {
                     continue;
+                }
 
-                if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight)
+                if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight) {
                     continue;
+                }
 
-                if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight)
+                if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight) {
                     continue;
+                }
 
                 if constexpr (use_architecture_memory_contraints) {
-                    if (memory_weight_of_group + graph.vertex_mem_weight(active_node) >
-                        max_memory_per_vertex_type[graph.vertex_type(bottom_node)])
+                    if (memory_weight_of_group + graph.vertex_mem_weight(active_node)
+                        > max_memory_per_vertex_type[graph.vertex_type(bottom_node)]) {
                         continue;
+                    }
                 }
 
                 group.emplace_back(active_node);
@@ -156,9 +157,9 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                 memory_weight_of_group += graph.vertex_mem_weight(active_node);
 
                 for (const auto &in_edge : in_edges(active_node, graph)) {
-
-                    if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend()))
+                    if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend())) {
                         continue;
+                    }
 
                     const vertex_idx_t &par = source(in_edge, graph);
 
@@ -166,14 +167,13 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                         children_not_in_group[par] -= 1;
 
                     } else {
-
                         if (parameters.use_approx_transitive_reduction) {
-
                             children_not_in_group[par] = 0;
 
                             for (const auto out_edge : out_edges(par, graph)) {
-                                if (edge_mask.find(out_edge) != edge_mask.cend())
+                                if (edge_mask.find(out_edge) != edge_mask.cend()) {
                                     continue;
+                                }
                                 children_not_in_group[par] += 1;
                             }
 
@@ -184,9 +184,9 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                     }
                 }
                 for (const auto &in_edge : in_edges(active_node, graph)) {
-
-                    if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend()))
+                    if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend())) {
                         continue;
+                    }
 
                     const vertex_idx_t &par = source(in_edge, graph);
                     if (children_not_in_group[par] == 0) {
@@ -204,19 +204,18 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
     }
 
     void run_out_contraction(const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &partition) {
-
         using vertex_idx_t = vertex_idx_t<Graph_t_in>;
 
         const std::unordered_set<edge_desc_t<Graph_t_in>> edge_mask = parameters.use_approx_transitive_reduction
-                                                                       ? long_edges_in_triangles_parallel(graph)
-                                                                       : std::unordered_set<edge_desc_t<Graph_t_in>>();
+                                                                          ? long_edges_in_triangles_parallel(graph)
+                                                                          : std::unordered_set<edge_desc_t<Graph_t_in>>();
 
         std::vector<bool> visited(graph.num_vertices(), false);
 
         for (const auto &top_node : top_sort_view(graph)) {
-
-            if (visited[top_node])
+            if (visited[top_node]) {
                 continue;
+            }
 
             v_workw_t<Graph_t_in> work_weight_of_group = 0;
             v_memw_t<Graph_t_in> memory_weight_of_group = 0;
@@ -230,7 +229,6 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
             unsigned depth_counter = 0;
 
             while ((not vertex_processing_fifo.empty()) || (not next_vertex_processing_fifo.empty())) {
-
                 if (vertex_processing_fifo.empty()) {
                     vertex_processing_fifo = next_vertex_processing_fifo;
                     next_vertex_processing_fifo.clear();
@@ -243,19 +241,23 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                 vertex_idx_t active_node = vertex_processing_fifo.front();
                 vertex_processing_fifo.pop_front();
 
-                if (graph.vertex_type(active_node) != graph.vertex_type(top_node))
+                if (graph.vertex_type(active_node) != graph.vertex_type(top_node)) {
                     continue;
+                }
 
-                if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight)
+                if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight) {
                     continue;
+                }
 
-                if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight)
+                if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight) {
                     continue;
+                }
 
                 if constexpr (use_architecture_memory_contraints) {
-                    if (memory_weight_of_group + graph.vertex_mem_weight(active_node) >
-                        max_memory_per_vertex_type[graph.vertex_type(top_node)])
+                    if (memory_weight_of_group + graph.vertex_mem_weight(active_node)
+                        > max_memory_per_vertex_type[graph.vertex_type(top_node)]) {
                         continue;
+                    }
                 }
 
                 group.emplace_back(active_node);
@@ -263,9 +265,9 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                 memory_weight_of_group += graph.vertex_mem_weight(active_node);
 
                 for (const auto &out_edge : out_edges(active_node, graph)) {
-
-                    if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend()))
+                    if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend())) {
                         continue;
+                    }
 
                     const vertex_idx_t &child = target(out_edge, graph);
 
@@ -273,14 +275,13 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                         parents_not_in_group[child] -= 1;
 
                     } else {
-
                         if (parameters.use_approx_transitive_reduction) {
-
                             parents_not_in_group[child] = 0;
 
                             for (const auto in_edge : in_edges(child, graph)) {
-                                if (edge_mask.find(in_edge) != edge_mask.cend())
+                                if (edge_mask.find(in_edge) != edge_mask.cend()) {
                                     continue;
+                                }
                                 parents_not_in_group[child] += 1;
                             }
 
@@ -291,9 +292,9 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
                     }
                 }
                 for (const auto &out_edge : out_edges(active_node, graph)) {
-
-                    if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend()))
+                    if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend())) {
                         continue;
+                    }
 
                     const vertex_idx_t &child = target(out_edge, graph);
                     if (parents_not_in_group[child] == 0) {
@@ -310,4 +311,5 @@ class FunnelBfs : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
         }
     }
 };
-} // namespace osp
\ No newline at end of file
+
+}    // namespace osp
diff --git a/include/osp/coarser/hdagg/hdagg_coarser.hpp b/include/osp/coarser/hdagg/hdagg_coarser.hpp
index 489ef5bb..7d206187 100644
--- a/include/osp/coarser/hdagg/hdagg_coarser.hpp
+++ b/include/osp/coarser/hdagg/hdagg_coarser.hpp
@@ -18,19 +18,17 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/coarser/Coarser.hpp"
+#include <limits>
 
+#include "osp/coarser/Coarser.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
-#include <limits>
 
 namespace osp {
 
-template<typename Graph_t_in, typename Graph_t_out>
+template <typename Graph_t_in, typename Graph_t_out>
 class hdagg_coarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
-
-    static_assert(is_directed_graph_edge_desc_v<Graph_t_in>,
-                  "Graph_t_in must satisfy the directed_graph edge desc concept");
+    static_assert(is_directed_graph_edge_desc_v<Graph_t_in>, "Graph_t_in must satisfy the directed_graph edge desc concept");
     static_assert(has_hashable_edge_desc_v<Graph_t_in>, "Graph_t_in must satisfy the has_hashable_edge_desc concept");
     static_assert(has_typed_vertices_v<Graph_t_in>, "Graph_t_in must have typed vertices");
 
@@ -55,7 +53,6 @@ class hdagg_coarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
     v_type_t<Graph_t_in> current_v_type = 0;
 
     void add_new_super_node(const Graph_t_in &dag_in, VertexType_in node) {
-
         v_memw_t<Graph_t_in> node_mem = dag_in.vertex_mem_weight(node);
 
         current_memory = node_mem;
@@ -72,7 +69,6 @@ class hdagg_coarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
     virtual std::string getCoarserName() const override { return "hdagg_coarser"; };
 
     virtual std::vector<vertex_idx_t<Graph_t_out>> generate_vertex_contraction_map(const Graph_t_in &dag_in) override {
-
         std::vector<bool> visited(dag_in.num_vertices(), false);
         std::vector<VertexType_out> reverse_vertex_map(dag_in.num_vertices());
 
@@ -94,21 +90,20 @@ class hdagg_coarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
             add_new_super_node(dag_in, vertex_map[part_ind][vert_ind]);
 
             while (vert_ind < part_size) {
-
                 const VertexType_in vert = vertex_map[part_ind][vert_ind];
                 reverse_vertex_map[vert] = current_super_node_idx;
                 bool indegree_one = true;
 
                 for (const auto &in_edge : in_edges(vert, dag_in)) {
-
-                    if (edge_mask.find(in_edge) != edge_mast_end)
+                    if (edge_mask.find(in_edge) != edge_mast_end) {
                         continue;
+                    }
 
                     unsigned count = 0;
                     for (const auto &out_edge : out_edges(source(in_edge, dag_in), dag_in)) {
-
-                        if (edge_mask.find(out_edge) != edge_mast_end)
+                        if (edge_mask.find(out_edge) != edge_mast_end) {
                             continue;
+                        }
 
                         count++;
                         if (count > 1) {
@@ -124,22 +119,21 @@ class hdagg_coarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
 
                 if (indegree_one) {
                     for (const auto &in_edge : in_edges(vert, dag_in)) {
-
-                        if (edge_mask.find(in_edge) != edge_mast_end)
+                        if (edge_mask.find(in_edge) != edge_mast_end) {
                             continue;
+                        }
 
                         const auto &edge_source = source(in_edge, dag_in);
 
                         v_memw_t<Graph_t_in> node_mem = dag_in.vertex_mem_weight(edge_source);
 
-                        if (((current_memory + node_mem > memory_threshold) ||
-                             (current_work + dag_in.vertex_work_weight(edge_source) > work_threshold) ||
-                             (vertex_map[part_ind].size() >= super_node_size_threshold) ||
-                             (current_communication + dag_in.vertex_comm_weight(edge_source) >
-                              communication_threshold)) ||
+                        if (((current_memory + node_mem > memory_threshold)
+                             || (current_work + dag_in.vertex_work_weight(edge_source) > work_threshold)
+                             || (vertex_map[part_ind].size() >= super_node_size_threshold)
+                             || (current_communication + dag_in.vertex_comm_weight(edge_source) > communication_threshold))
+                            ||
                             // or node type changes
                             (current_v_type != dag_in.vertex_type(edge_source))) {
-
                             if (!visited[edge_source]) {
                                 vertex_map.push_back(std::vector<VertexType_in>({edge_source}));
                                 partition_size++;
@@ -147,7 +141,6 @@ class hdagg_coarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
                             }
 
                         } else {
-
                             current_memory += node_mem;
                             current_work += dag_in.vertex_work_weight(edge_source);
                             current_communication += dag_in.vertex_comm_weight(edge_source);
@@ -158,9 +151,9 @@ class hdagg_coarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
                     }
                 } else {
                     for (const auto &in_edge : in_edges(vert, dag_in)) {
-
-                        if (edge_mask.find(in_edge) != edge_mast_end)
+                        if (edge_mask.find(in_edge) != edge_mast_end) {
                             continue;
+                        }
 
                         const auto &edge_source = source(in_edge, dag_in);
 
@@ -181,13 +174,16 @@ class hdagg_coarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_out> {
     }
 
     inline void set_work_threshold(v_workw_t<Graph_t_in> work_threshold_) { work_threshold = work_threshold_; }
+
     inline void set_memory_threshold(v_memw_t<Graph_t_in> memory_threshold_) { memory_threshold = memory_threshold_; }
+
     inline void set_communication_threshold(v_commw_t<Graph_t_in> communication_threshold_) {
         communication_threshold = communication_threshold_;
     }
+
     inline void set_super_node_size_threshold(std::size_t super_node_size_threshold_) {
         super_node_size_threshold = super_node_size_threshold_;
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/coarser/top_order/top_order_coarser.hpp b/include/osp/coarser/top_order/top_order_coarser.hpp
index bad2c46e..deaf476f 100644
--- a/include/osp/coarser/top_order/top_order_coarser.hpp
+++ b/include/osp/coarser/top_order/top_order_coarser.hpp
@@ -26,10 +26,8 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t_in, typename Graph_t_out,
-         std::vector<vertex_idx_t<Graph_t_in>> (*top_sort_func)(const Graph_t_in &)>
+template <typename Graph_t_in, typename Graph_t_out, std::vector<vertex_idx_t<Graph_t_in>> (*top_sort_func)(const Graph_t_in &)>
 class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
-
   private:
     using VertexType = vertex_idx_t<Graph_t_in>;
 
@@ -46,42 +44,36 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
     v_workw_t<Graph_t_in> current_work = 0;
     v_commw_t<Graph_t_in> current_communication = 0;
     VertexType current_super_node_idx = 0;
-    
-
-    void finish_super_node_add_edges(const Graph_t_in &dag_in, Graph_t_out &dag_out,
-                                     const std::vector<VertexType> &nodes, std::vector<vertex_idx_t<Graph_t_out>> &reverse_vertex_map) {
 
+    void finish_super_node_add_edges(const Graph_t_in &dag_in,
+                                     Graph_t_out &dag_out,
+                                     const std::vector<VertexType> &nodes,
+                                     std::vector<vertex_idx_t<Graph_t_out>> &reverse_vertex_map) {
         dag_out.set_vertex_mem_weight(current_super_node_idx, current_memory);
         dag_out.set_vertex_work_weight(current_super_node_idx, current_work);
         dag_out.set_vertex_comm_weight(current_super_node_idx, current_communication);
 
         for (const auto &node : nodes) {
-
             if constexpr (has_edge_weights_v<Graph_t_in> && has_edge_weights_v<Graph_t_out>) {
-
                 for (const auto &in_edge : in_edges(node, dag_in)) {
-
                     const VertexType parent_rev = reverse_vertex_map[source(in_edge, dag_in)];
                     if (parent_rev != current_super_node_idx && parent_rev != std::numeric_limits<VertexType>::max()) {
-
                         auto pair = edge_desc(parent_rev, current_super_node_idx, dag_out);
                         if (pair.second) {
-                            dag_out.set_edge_comm_weight(pair.first, dag_out.edge_comm_weight(pair.first) +
-                                                                         dag_in.edge_comm_weight(in_edge));
+                            dag_out.set_edge_comm_weight(pair.first,
+                                                         dag_out.edge_comm_weight(pair.first) + dag_in.edge_comm_weight(in_edge));
                         } else {
                             dag_out.add_edge(parent_rev, current_super_node_idx, dag_in.edge_comm_weight(in_edge));
                         }
                     }
                 }
             } else {
-
                 for (const auto &parent : dag_in.parents(node)) {
-
                     const VertexType parent_rev = reverse_vertex_map[parent];
                     if (parent_rev != current_super_node_idx && parent_rev != std::numeric_limits<VertexType>::max()) {
-
-                        if (not edge(parent_rev, current_super_node_idx, dag_out))
+                        if (not edge(parent_rev, current_super_node_idx, dag_out)) {
                             dag_out.add_edge(parent_rev, current_super_node_idx);
+                        }
                     }
                 }
             }
@@ -89,7 +81,6 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
     }
 
     void add_new_super_node(const Graph_t_in &dag_in, Graph_t_out &dag_out, VertexType node) {
-
         // int node_mem = dag_in.nodeMemoryWeight(node);
 
         // if (memory_constraint_type == LOCAL_INC_EDGES_2) {
@@ -103,11 +94,9 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
         current_work = dag_in.vertex_work_weight(node);
         current_communication = dag_in.vertex_comm_weight(node);
 
-        if constexpr (is_computational_dag_typed_vertices_v<Graph_t_in> &&
-                      is_computational_dag_typed_vertices_v<Graph_t_out>) {
-
-            current_super_node_idx =
-                dag_out.add_vertex(current_work, current_communication, current_memory, dag_in.vertex_type(node));
+        if constexpr (is_computational_dag_typed_vertices_v<Graph_t_in> && is_computational_dag_typed_vertices_v<Graph_t_out>) {
+            current_super_node_idx
+                = dag_out.add_vertex(current_work, current_communication, current_memory, dag_in.vertex_type(node));
         } else {
             current_super_node_idx = dag_out.add_vertex(current_work, current_communication, current_memory);
         }
@@ -118,8 +107,11 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
     virtual ~top_order_coarser() = default;
 
     inline void set_degree_threshold(unsigned degree_threshold_) { degree_threshold = degree_threshold_; }
+
     inline void set_work_threshold(v_workw_t<Graph_t_in> work_threshold_) { work_threshold = work_threshold_; }
+
     inline void set_memory_threshold(v_memw_t<Graph_t_in> memory_threshold_) { memory_threshold = memory_threshold_; }
+
     inline void set_communication_threshold(v_commw_t<Graph_t_in> communication_threshold_) {
         communication_threshold = communication_threshold_;
     }
@@ -127,7 +119,7 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
     inline void set_super_node_size_threshold(VertexType super_node_size_threshold_) {
         super_node_size_threshold = super_node_size_threshold_;
     }
-    
+
     inline void set_node_dist_threshold(unsigned node_dist_threshold_) { node_dist_threshold = node_dist_threshold_; }
 
     // inline void set_memory_constraint_type(MEMORY_CONSTRAINT_TYPE memory_constraint_type_) { memory_constraint_type =
@@ -135,9 +127,9 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
 
     virtual std::string getCoarserName() const override { return "top_order_coarser"; };
 
-    virtual bool coarsenDag(const Graph_t_in &dag_in, Graph_t_out &dag_out,
+    virtual bool coarsenDag(const Graph_t_in &dag_in,
+                            Graph_t_out &dag_out,
                             std::vector<vertex_idx_t<Graph_t_out>> &reverse_vertex_map) override {
-
         assert(dag_out.num_vertices() == 0);
         if (dag_in.num_vertices() == 0) {
             reverse_vertex_map = std::vector<vertex_idx_t<Graph_t_out>>();
@@ -157,7 +149,6 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
         reverse_vertex_map[top_ordering[0]] = current_super_node_idx;
 
         for (size_t i = 1; i < top_ordering.size(); i++) {
-
             const auto v = top_ordering[i];
 
             // int node_mem = dag_in.vertex_mem_weight(v);
@@ -172,31 +163,27 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
             const unsigned dist = source_node_dist[v] - source_node_dist[top_ordering[i - 1]];
 
             // start new super node if thresholds are exceeded
-            if (((current_memory + dag_in.vertex_mem_weight(v) > memory_threshold) ||
-                 (current_work + dag_in.vertex_work_weight(v) > work_threshold) ||
-                 (vertex_map.back().size() >= super_node_size_threshold) ||
-                 (current_communication + dag_in.vertex_comm_weight(v) > communication_threshold)) ||
-                (dist > node_dist_threshold) ||
+            if (((current_memory + dag_in.vertex_mem_weight(v) > memory_threshold)
+                 || (current_work + dag_in.vertex_work_weight(v) > work_threshold)
+                 || (vertex_map.back().size() >= super_node_size_threshold)
+                 || (current_communication + dag_in.vertex_comm_weight(v) > communication_threshold))
+                || (dist > node_dist_threshold) ||
                 // or prev node high out degree
                 (dag_in.out_degree(top_ordering[i - 1]) > degree_threshold)) {
-
-                finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(),reverse_vertex_map);
+                finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(), reverse_vertex_map);
                 vertex_map.push_back(std::vector<VertexType>({v}));
                 add_new_super_node(dag_in, dag_out, v);
 
-            } else { // grow current super node
-
-                if constexpr (is_computational_dag_typed_vertices_v<Graph_t_in> &&
-                              is_computational_dag_typed_vertices_v<Graph_t_out>) {
+            } else {    // grow current super node
 
+                if constexpr (is_computational_dag_typed_vertices_v<Graph_t_in>
+                              && is_computational_dag_typed_vertices_v<Graph_t_out>) {
                     if (dag_out.vertex_type(current_super_node_idx) != dag_in.vertex_type(v)) {
-
                         finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(), reverse_vertex_map);
                         vertex_map.push_back(std::vector<VertexType>({v}));
                         add_new_super_node(dag_in, dag_out, v);
 
                     } else {
-
                         current_memory += dag_in.vertex_mem_weight(v);
                         current_work += dag_in.vertex_work_weight(v);
                         current_communication += dag_in.vertex_comm_weight(v);
@@ -205,7 +192,6 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
                     }
 
                 } else {
-
                     current_memory += dag_in.vertex_mem_weight(v);
                     current_work += dag_in.vertex_work_weight(v);
                     current_communication += dag_in.vertex_comm_weight(v);
@@ -225,4 +211,4 @@ class top_order_coarser : public Coarser<Graph_t_in, Graph_t_out> {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/concepts/computational_dag_concept.hpp b/include/osp/concepts/computational_dag_concept.hpp
index 546b6467..29ca517e 100644
--- a/include/osp/concepts/computational_dag_concept.hpp
+++ b/include/osp/concepts/computational_dag_concept.hpp
@@ -52,20 +52,19 @@ namespace osp {
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_vertex_weights : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct has_vertex_weights<T,
                           std::void_t<decltype(std::declval<T>().vertex_work_weight(std::declval<vertex_idx_t<T>>())),
                                       decltype(std::declval<T>().vertex_comm_weight(std::declval<vertex_idx_t<T>>())),
                                       decltype(std::declval<T>().vertex_mem_weight(std::declval<vertex_idx_t<T>>()))>>
-    : std::conjunction<
-          std::is_arithmetic<decltype(std::declval<T>().vertex_work_weight(std::declval<vertex_idx_t<T>>()))>,
-          std::is_arithmetic<decltype(std::declval<T>().vertex_comm_weight(std::declval<vertex_idx_t<T>>()))>,
-          std::is_arithmetic<decltype(std::declval<T>().vertex_mem_weight(std::declval<vertex_idx_t<T>>()))>> {};
+    : std::conjunction<std::is_arithmetic<decltype(std::declval<T>().vertex_work_weight(std::declval<vertex_idx_t<T>>()))>,
+                       std::is_arithmetic<decltype(std::declval<T>().vertex_comm_weight(std::declval<vertex_idx_t<T>>()))>,
+                       std::is_arithmetic<decltype(std::declval<T>().vertex_mem_weight(std::declval<vertex_idx_t<T>>()))>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool has_vertex_weights_v = has_vertex_weights<T>::value;
 
 /**
@@ -80,16 +79,17 @@ inline constexpr bool has_vertex_weights_v = has_vertex_weights<T>::value;
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_typed_vertices : std::false_type {};
 
-template<typename T>
-struct has_typed_vertices<T, std::void_t<decltype(std::declval<T>().vertex_type(std::declval<vertex_idx_t<T>>())),
-                                         decltype(std::declval<T>().num_vertex_types())>>
+template <typename T>
+struct has_typed_vertices<T,
+                          std::void_t<decltype(std::declval<T>().vertex_type(std::declval<vertex_idx_t<T>>())),
+                                      decltype(std::declval<T>().num_vertex_types())>>
     : std::conjunction<std::is_integral<decltype(std::declval<T>().vertex_type(std::declval<vertex_idx_t<T>>()))>,
                        std::is_integral<decltype(std::declval<T>().num_vertex_types())>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool has_typed_vertices_v = has_typed_vertices<T>::value;
 
 /**
@@ -101,18 +101,17 @@ inline constexpr bool has_typed_vertices_v = has_typed_vertices<T>::value;
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_edge_weights : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct has_edge_weights<T,
                         std::void_t<typename directed_graph_edge_desc_traits<T>::directed_edge_descriptor,
                                     decltype(std::declval<T>().edge_comm_weight(std::declval<edge_desc_t<T>>()))>>
-    : std::conjunction<
-          std::is_arithmetic<decltype(std::declval<T>().edge_comm_weight(std::declval<edge_desc_t<T>>()))>,
-          is_directed_graph_edge_desc<T>> {};
+    : std::conjunction<std::is_arithmetic<decltype(std::declval<T>().edge_comm_weight(std::declval<edge_desc_t<T>>()))>,
+                       is_directed_graph_edge_desc<T>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool has_edge_weights_v = has_edge_weights<T>::value;
 
 /**
@@ -124,13 +123,13 @@ inline constexpr bool has_edge_weights_v = has_edge_weights<T>::value;
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_computational_dag : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_computational_dag<T, std::void_t<>> : std::conjunction<is_directed_graph<T>, has_vertex_weights<T>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_computational_dag_v = is_computational_dag<T>::value;
 
 /**
@@ -140,14 +139,13 @@ inline constexpr bool is_computational_dag_v = is_computational_dag<T>::value;
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_computational_dag_typed_vertices : std::false_type {};
 
-template<typename T>
-struct is_computational_dag_typed_vertices<T, std::void_t<>>
-    : std::conjunction<is_computational_dag<T>, has_typed_vertices<T>> {};
+template <typename T>
+struct is_computational_dag_typed_vertices<T, std::void_t<>> : std::conjunction<is_computational_dag<T>, has_typed_vertices<T>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_computational_dag_typed_vertices_v = is_computational_dag_typed_vertices<T>::value;
 
 /**
@@ -158,14 +156,14 @@ inline constexpr bool is_computational_dag_typed_vertices_v = is_computational_d
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_computational_dag_edge_desc : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_computational_dag_edge_desc<T, std::void_t<>>
     : std::conjunction<is_directed_graph_edge_desc<T>, is_computational_dag<T>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_computational_dag_edge_desc_v = is_computational_dag_edge_desc<T>::value;
 
 /**
@@ -175,15 +173,14 @@ inline constexpr bool is_computational_dag_edge_desc_v = is_computational_dag_ed
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_computational_dag_typed_vertices_edge_desc : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_computational_dag_typed_vertices_edge_desc<T, std::void_t<>>
     : std::conjunction<is_directed_graph_edge_desc<T>, is_computational_dag_typed_vertices<T>> {};
 
-template<typename T>
-inline constexpr bool is_computational_dag_typed_vertices_edge_desc_v =
-    is_computational_dag_typed_vertices_edge_desc<T>::value;
+template <typename T>
+inline constexpr bool is_computational_dag_typed_vertices_edge_desc_v = is_computational_dag_typed_vertices_edge_desc<T>::value;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/concepts/constructable_computational_dag_concept.hpp b/include/osp/concepts/constructable_computational_dag_concept.hpp
index 26e5a4a9..0caa561e 100644
--- a/include/osp/concepts/constructable_computational_dag_concept.hpp
+++ b/include/osp/concepts/constructable_computational_dag_concept.hpp
@@ -48,14 +48,15 @@ namespace osp {
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_modifiable_cdag_vertex : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_modifiable_cdag_vertex<
-    T, std::void_t<decltype(std::declval<T>().set_vertex_work_weight(std::declval<vertex_idx_t<T>>(), std::declval<v_workw_t<T>>())),
-                   decltype(std::declval<T>().set_vertex_comm_weight(std::declval<vertex_idx_t<T>>(), std::declval<v_commw_t<T>>())),
-                   decltype(std::declval<T>().set_vertex_mem_weight(std::declval<vertex_idx_t<T>>(), std::declval<v_memw_t<T>>()))>>
+    T,
+    std::void_t<decltype(std::declval<T>().set_vertex_work_weight(std::declval<vertex_idx_t<T>>(), std::declval<v_workw_t<T>>())),
+                decltype(std::declval<T>().set_vertex_comm_weight(std::declval<vertex_idx_t<T>>(), std::declval<v_commw_t<T>>())),
+                decltype(std::declval<T>().set_vertex_mem_weight(std::declval<vertex_idx_t<T>>(), std::declval<v_memw_t<T>>()))>>
     : std::conjunction<is_computational_dag<T>,
                        std::is_default_constructible<T>,
                        std::is_copy_constructible<T>,
@@ -63,7 +64,7 @@ struct is_modifiable_cdag_vertex<
                        std::is_copy_assignable<T>,
                        std::is_move_assignable<T>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_modifiable_cdag_vertex_v = is_modifiable_cdag_vertex<T>::value;
 
 /**
@@ -75,16 +76,16 @@ inline constexpr bool is_modifiable_cdag_vertex_v = is_modifiable_cdag_vertex<T>
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_constructable_cdag_vertex : std::false_type {};
 
-template<typename T>
-struct is_constructable_cdag_vertex<
-    T, std::void_t<decltype(std::declval<T>().add_vertex(std::declval<v_workw_t<T>>(), std::declval<v_commw_t<T>>(), std::declval<v_memw_t<T>>()))>>
-    : std::conjunction<is_modifiable_cdag_vertex<T>,
-                       std::is_constructible<T, vertex_idx_t<T>>> {};
+template <typename T>
+struct is_constructable_cdag_vertex<T,
+                                    std::void_t<decltype(std::declval<T>().add_vertex(
+                                        std::declval<v_workw_t<T>>(), std::declval<v_commw_t<T>>(), std::declval<v_memw_t<T>>()))>>
+    : std::conjunction<is_modifiable_cdag_vertex<T>, std::is_constructible<T, vertex_idx_t<T>>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_constructable_cdag_vertex_v = is_constructable_cdag_vertex<T>::value;
 
 /**
@@ -95,16 +96,16 @@ inline constexpr bool is_constructable_cdag_vertex_v = is_constructable_cdag_ver
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_modifiable_cdag_typed_vertex : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_modifiable_cdag_typed_vertex<
-    T, std::void_t<decltype(std::declval<T>().set_vertex_type(std::declval<vertex_idx_t<T>>(), std::declval<v_type_t<T>>()))>>
-    : std::conjunction<is_modifiable_cdag_vertex<T>,
-                       is_computational_dag_typed_vertices<T>> {}; // for default node type
+    T,
+    std::void_t<decltype(std::declval<T>().set_vertex_type(std::declval<vertex_idx_t<T>>(), std::declval<v_type_t<T>>()))>>
+    : std::conjunction<is_modifiable_cdag_vertex<T>, is_computational_dag_typed_vertices<T>> {};    // for default node type
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_modifiable_cdag_typed_vertex_v = is_modifiable_cdag_typed_vertex<T>::value;
 
 /**
@@ -115,16 +116,17 @@ inline constexpr bool is_modifiable_cdag_typed_vertex_v = is_modifiable_cdag_typ
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_constructable_cdag_typed_vertex : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_constructable_cdag_typed_vertex<
-    T, std::void_t<decltype(std::declval<T>().add_vertex(std::declval<v_workw_t<T>>(), std::declval<v_commw_t<T>>(), std::declval<v_memw_t<T>>(), std::declval<v_type_t<T>>()))>>
-    : std::conjunction<is_constructable_cdag_vertex<T>,
-                       is_modifiable_cdag_typed_vertex<T>> {}; // for default node type
+    T,
+    std::void_t<decltype(std::declval<T>().add_vertex(
+        std::declval<v_workw_t<T>>(), std::declval<v_commw_t<T>>(), std::declval<v_memw_t<T>>(), std::declval<v_type_t<T>>()))>>
+    : std::conjunction<is_constructable_cdag_vertex<T>, is_modifiable_cdag_typed_vertex<T>> {};    // for default node type
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_constructable_cdag_typed_vertex_v = is_constructable_cdag_typed_vertex<T>::value;
 
 /**
@@ -135,15 +137,16 @@ inline constexpr bool is_constructable_cdag_typed_vertex_v = is_constructable_cd
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_constructable_cdag_edge : std::false_type {};
 
-template<typename T>
-struct is_constructable_cdag_edge<T, std::void_t<decltype(std::declval<T>().add_edge(std::declval<vertex_idx_t<T>>(),
-                                                                                     std::declval<vertex_idx_t<T>>()))>>
+template <typename T>
+struct is_constructable_cdag_edge<
+    T,
+    std::void_t<decltype(std::declval<T>().add_edge(std::declval<vertex_idx_t<T>>(), std::declval<vertex_idx_t<T>>()))>>
     : is_directed_graph<T> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_constructable_cdag_edge_v = is_constructable_cdag_edge<T>::value;
 
 /**
@@ -154,15 +157,16 @@ inline constexpr bool is_constructable_cdag_edge_v = is_constructable_cdag_edge<
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_modifiable_cdag_comm_edge : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_modifiable_cdag_comm_edge<
-    T, std::void_t<decltype(std::declval<T>().set_edge_comm_weight(std::declval<edge_desc_t<T>>(), std::declval<e_commw_t<T>>()))>>
-    : std::conjunction<is_computational_dag_edge_desc<T>> {}; // for default edge weight
+    T,
+    std::void_t<decltype(std::declval<T>().set_edge_comm_weight(std::declval<edge_desc_t<T>>(), std::declval<e_commw_t<T>>()))>>
+    : std::conjunction<is_computational_dag_edge_desc<T>> {};    // for default edge weight
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_modifiable_cdag_comm_edge_v = is_modifiable_cdag_comm_edge<T>::value;
 
 /**
@@ -173,17 +177,18 @@ inline constexpr bool is_modifiable_cdag_comm_edge_v = is_modifiable_cdag_comm_e
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_constructable_cdag_comm_edge : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_constructable_cdag_comm_edge<
-    T, std::void_t<decltype(std::declval<T>().add_edge(std::declval<vertex_idx_t<T>>(), std::declval<vertex_idx_t<T>>(), std::declval<e_commw_t<T>>()))>>
-    : std::conjunction<is_constructable_cdag_edge<T>,
-                       is_computational_dag_edge_desc<T>,
-                       is_modifiable_cdag_comm_edge<T>> {}; // for default edge weight
+    T,
+    std::void_t<decltype(std::declval<T>().add_edge(
+        std::declval<vertex_idx_t<T>>(), std::declval<vertex_idx_t<T>>(), std::declval<e_commw_t<T>>()))>>
+    : std::conjunction<is_constructable_cdag_edge<T>, is_computational_dag_edge_desc<T>, is_modifiable_cdag_comm_edge<T>> {
+};    // for default edge weight
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_constructable_cdag_comm_edge_v = is_constructable_cdag_comm_edge<T>::value;
 
 /**
@@ -193,20 +198,21 @@ inline constexpr bool is_constructable_cdag_comm_edge_v = is_constructable_cdag_
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_constructable_cdag : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_constructable_cdag<T, std::void_t<>>
     : std::conjunction<is_computational_dag<T>, is_constructable_cdag_vertex<T>, is_constructable_cdag_edge<T>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_constructable_cdag_v = is_constructable_cdag<T>::value;
 
 /**
  * @brief Helper trait to check if a graph can be directly constructed from a vertex count and a set of edges.
  */
-template<typename T>
-inline constexpr bool is_direct_constructable_cdag_v = std::is_constructible<T, vertex_idx_t<T>, std::set<std::pair<vertex_idx_t<T>, vertex_idx_t<T>>>>::value;
+template <typename T>
+inline constexpr bool is_direct_constructable_cdag_v
+    = std::is_constructible<T, vertex_idx_t<T>, std::set<std::pair<vertex_idx_t<T>, vertex_idx_t<T>>>>::value;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/concepts/directed_graph_concept.hpp b/include/osp/concepts/directed_graph_concept.hpp
index c6a470b3..0bd65d77 100644
--- a/include/osp/concepts/directed_graph_concept.hpp
+++ b/include/osp/concepts/directed_graph_concept.hpp
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include "graph_traits.hpp"
 #include "iterator_concepts.hpp"
+
 namespace osp {
 
 /**
@@ -50,29 +51,28 @@ namespace osp {
  *
  * @tparam T The graph type to check against the concept.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_directed_graph : std::false_type {};
 
-template<typename T>
-struct is_directed_graph<
-    T, std::void_t<typename directed_graph_traits<T>::vertex_idx,
-                   decltype(std::declval<T>().vertices()),
-                   decltype(std::declval<T>().num_vertices()),
-                   decltype(std::declval<T>().num_edges()),
-                   decltype(std::declval<T>().parents(std::declval<vertex_idx_t<T>>())),
-                   decltype(std::declval<T>().children(std::declval<vertex_idx_t<T>>())),
-                   decltype(std::declval<T>().in_degree(std::declval<vertex_idx_t<T>>())),
-                   decltype(std::declval<T>().out_degree(std::declval<vertex_idx_t<T>>()))>>
-    : std::conjunction<
-          is_forward_range_of<decltype(std::declval<T>().vertices()), vertex_idx_t<T>>,
-          std::is_integral<decltype(std::declval<T>().num_vertices())>,
-          std::is_integral<decltype(std::declval<T>().num_edges())>,
-          is_input_range_of<decltype(std::declval<T>().parents(std::declval<vertex_idx_t<T>>())), vertex_idx_t<T>>,
-          is_input_range_of<decltype(std::declval<T>().children(std::declval<vertex_idx_t<T>>())), vertex_idx_t<T>>,
-          std::is_integral<decltype(std::declval<T>().in_degree(std::declval<vertex_idx_t<T>>()))>,
-          std::is_integral<decltype(std::declval<T>().out_degree(std::declval<vertex_idx_t<T>>()))>> {};
+template <typename T>
+struct is_directed_graph<T,
+                         std::void_t<typename directed_graph_traits<T>::vertex_idx,
+                                     decltype(std::declval<T>().vertices()),
+                                     decltype(std::declval<T>().num_vertices()),
+                                     decltype(std::declval<T>().num_edges()),
+                                     decltype(std::declval<T>().parents(std::declval<vertex_idx_t<T>>())),
+                                     decltype(std::declval<T>().children(std::declval<vertex_idx_t<T>>())),
+                                     decltype(std::declval<T>().in_degree(std::declval<vertex_idx_t<T>>())),
+                                     decltype(std::declval<T>().out_degree(std::declval<vertex_idx_t<T>>()))>>
+    : std::conjunction<is_forward_range_of<decltype(std::declval<T>().vertices()), vertex_idx_t<T>>,
+                       std::is_integral<decltype(std::declval<T>().num_vertices())>,
+                       std::is_integral<decltype(std::declval<T>().num_edges())>,
+                       is_input_range_of<decltype(std::declval<T>().parents(std::declval<vertex_idx_t<T>>())), vertex_idx_t<T>>,
+                       is_input_range_of<decltype(std::declval<T>().children(std::declval<vertex_idx_t<T>>())), vertex_idx_t<T>>,
+                       std::is_integral<decltype(std::declval<T>().in_degree(std::declval<vertex_idx_t<T>>()))>,
+                       std::is_integral<decltype(std::declval<T>().out_degree(std::declval<vertex_idx_t<T>>()))>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_directed_graph_v = is_directed_graph<T>::value;
 
 /**
@@ -86,17 +86,28 @@ inline constexpr bool is_directed_graph_v = is_directed_graph<T>::value;
  * @tparam v_type The vertex type.
  * @tparam e_type The size type (usually integral).
  */
-template<typename T, typename v_type, typename e_type, typename = void>
+template <typename T, typename v_type, typename e_type, typename = void>
 struct is_edge_list_type : std::false_type {};
 
-template<typename T, typename v_type, typename e_type>
+template <typename T, typename v_type, typename e_type>
 struct is_edge_list_type<
-    T, v_type, e_type, std::void_t<decltype(std::declval<T>().begin()), decltype(std::declval<T>().end()), decltype(std::declval<T>().size()), typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type, decltype(std::declval<typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>().source), decltype(std::declval<typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>().target)>>
-    : std::conjunction<std::is_same<decltype(std::declval<typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>().source), v_type>,
-                       std::is_same<decltype(std::declval<typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>().target), v_type>,
-                       std::is_same<decltype(std::declval<T>().size()), e_type>> {};
+    T,
+    v_type,
+    e_type,
+    std::void_t<decltype(std::declval<T>().begin()),
+                decltype(std::declval<T>().end()),
+                decltype(std::declval<T>().size()),
+                typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type,
+                decltype(std::declval<typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>().source),
+                decltype(std::declval<typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>().target)>>
+    : std::conjunction<
+          std::is_same<decltype(std::declval<typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>().source),
+                       v_type>,
+          std::is_same<decltype(std::declval<typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>().target),
+                       v_type>,
+          std::is_same<decltype(std::declval<T>().size()), e_type>> {};
 
-template<typename T, typename v_type, typename e_type>
+template <typename T, typename v_type, typename e_type>
 inline constexpr bool is_edge_list_type_v = is_edge_list_type<T, v_type, e_type>::value;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/concepts/directed_graph_edge_desc_concept.hpp b/include/osp/concepts/directed_graph_edge_desc_concept.hpp
index dadd827b..d3a8227b 100644
--- a/include/osp/concepts/directed_graph_edge_desc_concept.hpp
+++ b/include/osp/concepts/directed_graph_edge_desc_concept.hpp
@@ -45,7 +45,7 @@ namespace osp {
  * @param edge The edge descriptor.
  * @return The source vertex index.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline vertex_idx_t<Graph_t> source(const directed_edge<Graph_t> &edge, const Graph_t &) {
     return edge.source;
 }
@@ -57,7 +57,7 @@ inline vertex_idx_t<Graph_t> source(const directed_edge<Graph_t> &edge, const Gr
  * @param edge The edge descriptor.
  * @return The target vertex index.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline vertex_idx_t<Graph_t> target(const directed_edge<Graph_t> &edge, const Graph_t &) {
     return edge.target;
 }
@@ -69,7 +69,7 @@ inline vertex_idx_t<Graph_t> target(const directed_edge<Graph_t> &edge, const Gr
  * @param graph The graph instance.
  * @return An `edge_view` allowing iteration over all edges.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline edge_view<Graph_t> edges(const Graph_t &graph) {
     return edge_view(graph);
 }
@@ -82,7 +82,7 @@ inline edge_view<Graph_t> edges(const Graph_t &graph) {
  * @param graph The graph instance.
  * @return An `out_edge_view` allowing iteration over outgoing edges from `u`.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline OutEdgeView<Graph_t> out_edges(vertex_idx_t<Graph_t> u, const Graph_t &graph) {
     return OutEdgeView<Graph_t>(graph, u);
 }
@@ -95,7 +95,7 @@ inline OutEdgeView<Graph_t> out_edges(vertex_idx_t<Graph_t> u, const Graph_t &gr
  * @param graph The graph instance.
  * @return An `in_edge_view` allowing iteration over incoming edges to `v`.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline InEdgeView<Graph_t> in_edges(vertex_idx_t<Graph_t> v, const Graph_t &graph) {
     return InEdgeView<Graph_t>(graph, v);
 }
@@ -112,10 +112,10 @@ inline InEdgeView<Graph_t> in_edges(vertex_idx_t<Graph_t> v, const Graph_t &grap
  *
  * @tparam T The graph type to check.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_directed_graph_edge_desc : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct is_directed_graph_edge_desc<T,
                                    std::void_t<typename directed_graph_edge_desc_traits<T>::directed_edge_descriptor,
                                                decltype(edges(std::declval<T>())),
@@ -123,16 +123,16 @@ struct is_directed_graph_edge_desc<T,
                                                decltype(in_edges(std::declval<vertex_idx_t<T>>(), std::declval<T>())),
                                                decltype(source(std::declval<edge_desc_t<T>>(), std::declval<T>())),
                                                decltype(target(std::declval<edge_desc_t<T>>(), std::declval<T>()))>>
-    : std::conjunction<
-          is_directed_graph<T>, std::is_default_constructible<edge_desc_t<T>>,
-          std::is_copy_constructible<edge_desc_t<T>>,
-          is_input_range_of<decltype(edges(std::declval<T>())), edge_desc_t<T>>,
-          is_input_range_of<decltype(out_edges(std::declval<vertex_idx_t<T>>(), std::declval<T>())), edge_desc_t<T>>,
-          is_input_range_of<decltype(in_edges(std::declval<vertex_idx_t<T>>(), std::declval<T>())), edge_desc_t<T>>,
-          std::is_same<decltype(source(std::declval<edge_desc_t<T>>(), std::declval<T>())), vertex_idx_t<T>>,
-          std::is_same<decltype(target(std::declval<edge_desc_t<T>>(), std::declval<T>())), vertex_idx_t<T>>> {};
-
-template<typename T>
+    : std::conjunction<is_directed_graph<T>,
+                       std::is_default_constructible<edge_desc_t<T>>,
+                       std::is_copy_constructible<edge_desc_t<T>>,
+                       is_input_range_of<decltype(edges(std::declval<T>())), edge_desc_t<T>>,
+                       is_input_range_of<decltype(out_edges(std::declval<vertex_idx_t<T>>(), std::declval<T>())), edge_desc_t<T>>,
+                       is_input_range_of<decltype(in_edges(std::declval<vertex_idx_t<T>>(), std::declval<T>())), edge_desc_t<T>>,
+                       std::is_same<decltype(source(std::declval<edge_desc_t<T>>(), std::declval<T>())), vertex_idx_t<T>>,
+                       std::is_same<decltype(target(std::declval<edge_desc_t<T>>(), std::declval<T>())), vertex_idx_t<T>>> {};
+
+template <typename T>
 inline constexpr bool is_directed_graph_edge_desc_v = is_directed_graph_edge_desc<T>::value;
 
 /**
@@ -142,18 +142,19 @@ inline constexpr bool is_directed_graph_edge_desc_v = is_directed_graph_edge_des
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_hashable_edge_desc : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct has_hashable_edge_desc<T,
                               std::void_t<decltype(std::hash<edge_desc_t<T>>{}(std::declval<edge_desc_t<T>>())),
                                           decltype(std::declval<edge_desc_t<T>>() == std::declval<edge_desc_t<T>>()),
                                           decltype(std::declval<edge_desc_t<T>>() != std::declval<edge_desc_t<T>>())>>
-    : std::conjunction<is_directed_graph_edge_desc<T>, std::is_default_constructible<edge_desc_t<T>>,
+    : std::conjunction<is_directed_graph_edge_desc<T>,
+                       std::is_default_constructible<edge_desc_t<T>>,
                        std::is_copy_constructible<edge_desc_t<T>>> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool has_hashable_edge_desc_v = has_hashable_edge_desc<T>::value;
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/concepts/graph_traits.hpp b/include/osp/concepts/graph_traits.hpp
index 48e980ac..05b86a56 100644
--- a/include/osp/concepts/graph_traits.hpp
+++ b/include/osp/concepts/graph_traits.hpp
@@ -38,39 +38,46 @@ namespace osp {
  * These structs inherit from `std::true_type` if the specified member type exists in `T`,
  * otherwise they inherit from `std::false_type`.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_vertex_idx_tmember : std::false_type {};
-template<typename T>
+
+template <typename T>
 struct has_vertex_idx_tmember<T, std::void_t<typename T::vertex_idx>> : std::true_type {};
 
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_edge_desc_tmember : std::false_type {};
-template<typename T>
+
+template <typename T>
 struct has_edge_desc_tmember<T, std::void_t<typename T::directed_edge_descriptor>> : std::true_type {};
 
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_vertex_work_weight_tmember : std::false_type {};
-template<typename T>
+
+template <typename T>
 struct has_vertex_work_weight_tmember<T, std::void_t<typename T::vertex_work_weight_type>> : std::true_type {};
 
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_vertex_comm_weight_tmember : std::false_type {};
-template<typename T>
+
+template <typename T>
 struct has_vertex_comm_weight_tmember<T, std::void_t<typename T::vertex_comm_weight_type>> : std::true_type {};
 
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_vertex_mem_weight_tmember : std::false_type {};
-template<typename T>
+
+template <typename T>
 struct has_vertex_mem_weight_tmember<T, std::void_t<typename T::vertex_mem_weight_type>> : std::true_type {};
 
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_vertex_type_tmember : std::false_type {};
-template<typename T>
+
+template <typename T>
 struct has_vertex_type_tmember<T, std::void_t<typename T::vertex_type_type>> : std::true_type {};
 
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_edge_comm_weight_tmember : std::false_type {};
-template<typename T>
+
+template <typename T>
 struct has_edge_comm_weight_tmember<T, std::void_t<typename T::edge_comm_weight_type>> : std::true_type {};
 
 /**
@@ -80,7 +87,7 @@ struct has_edge_comm_weight_tmember<T, std::void_t<typename T::edge_comm_weight_
  *
  * @tparam T The graph type.
  */
-template<typename T>
+template <typename T>
 struct directed_graph_traits {
     static_assert(has_vertex_idx_tmember<T>::value, "graph must have vertex_idx");
     using vertex_idx = typename T::vertex_idx;
@@ -89,7 +96,7 @@ struct directed_graph_traits {
 /**
  * @brief Alias to easily access the vertex index type of a graph.
  */
-template<typename T>
+template <typename T>
 using vertex_idx_t = typename directed_graph_traits<T>::vertex_idx;
 
 /**
@@ -100,14 +107,17 @@ using vertex_idx_t = typename directed_graph_traits<T>::vertex_idx;
  *
  * @tparam Graph_t The graph type.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct directed_edge {
     vertex_idx_t<Graph_t> source;
     vertex_idx_t<Graph_t> target;
 
     bool operator==(const directed_edge &other) const { return source == other.source && target == other.target; }
+
     bool operator!=(const directed_edge &other) const { return !(*this == other); }
+
     directed_edge() : source(0), target(0) {}
+
     directed_edge(const directed_edge &other) = default;
     directed_edge(directed_edge &&other) = default;
     directed_edge &operator=(const directed_edge &other) = default;
@@ -122,23 +132,23 @@ struct directed_edge {
  *
  * If the graph defines `directed_edge_descriptor`, it is extracted; otherwise, `directed_edge` is used as a default implementation.
  */
-template<typename T, bool has_edge>
+template <typename T, bool has_edge>
 struct directed_graph_edge_desc_traits_helper {
     using directed_edge_descriptor = directed_edge<T>;
 };
 
-template<typename T>
+template <typename T>
 struct directed_graph_edge_desc_traits_helper<T, true> {
     using directed_edge_descriptor = typename T::directed_edge_descriptor;
 };
 
-template<typename T>
+template <typename T>
 struct directed_graph_edge_desc_traits {
     using directed_edge_descriptor =
         typename directed_graph_edge_desc_traits_helper<T, has_edge_desc_tmember<T>::value>::directed_edge_descriptor;
 };
 
-template<typename T>
+template <typename T>
 using edge_desc_t = typename directed_graph_edge_desc_traits<T>::directed_edge_descriptor;
 
 /**
@@ -151,7 +161,7 @@ using edge_desc_t = typename directed_graph_edge_desc_traits<T>::directed_edge_d
  *
  * @tparam T The computational DAG type.
  */
-template<typename T>
+template <typename T>
 struct computational_dag_traits {
     static_assert(has_vertex_work_weight_tmember<T>::value, "cdag must have vertex work weight type");
     static_assert(has_vertex_comm_weight_tmember<T>::value, "cdag must have vertex comm weight type");
@@ -162,13 +172,13 @@ struct computational_dag_traits {
     using vertex_mem_weight_type = typename T::vertex_mem_weight_type;
 };
 
-template<typename T>
+template <typename T>
 using v_workw_t = typename computational_dag_traits<T>::vertex_work_weight_type;
 
-template<typename T>
+template <typename T>
 using v_commw_t = typename computational_dag_traits<T>::vertex_comm_weight_type;
 
-template<typename T>
+template <typename T>
 using v_memw_t = typename computational_dag_traits<T>::vertex_mem_weight_type;
 
 /**
@@ -176,17 +186,17 @@ using v_memw_t = typename computational_dag_traits<T>::vertex_mem_weight_type;
  *
  * If the DAG defines `vertex_type_type`, it is extracted; otherwise, `void` is used.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct computational_dag_typed_vertices_traits {
     using vertex_type_type = void;
 };
 
-template<typename T>
+template <typename T>
 struct computational_dag_typed_vertices_traits<T, std::void_t<typename T::vertex_type_type>> {
     using vertex_type_type = typename T::vertex_type_type;
 };
 
-template<typename T>
+template <typename T>
 using v_type_t = typename computational_dag_typed_vertices_traits<T>::vertex_type_type;
 
 /**
@@ -194,17 +204,17 @@ using v_type_t = typename computational_dag_typed_vertices_traits<T>::vertex_typ
  *
  * If the DAG defines `edge_comm_weight_type`, it is extracted; otherwise, `void` is used.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct computational_dag_edge_desc_traits {
     using edge_comm_weight_type = void;
 };
 
-template<typename T>
+template <typename T>
 struct computational_dag_edge_desc_traits<T, std::void_t<typename T::edge_comm_weight_type>> {
     using edge_comm_weight_type = typename T::edge_comm_weight_type;
 };
 
-template<typename T>
+template <typename T>
 using e_commw_t = typename computational_dag_edge_desc_traits<T>::edge_comm_weight_type;
 
 // -----------------------------------------------------------------------------
@@ -213,57 +223,59 @@ using e_commw_t = typename computational_dag_edge_desc_traits<T>::edge_comm_weig
 
 /**
  * @brief Check if a graph guarantees vertices are stored/iterated in topological order.
- * It allows a graph implementation to notify algorithms that vertices are stored/iterated in topological order which can be used to optimize the algorithm.
+ * It allows a graph implementation to notify algorithms that vertices are stored/iterated in topological order which can be used
+ * to optimize the algorithm.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_vertices_in_top_order_trait : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct has_vertices_in_top_order_trait<T, std::void_t<decltype(T::vertices_in_top_order)>>
     : std::bool_constant<std::is_same_v<decltype(T::vertices_in_top_order), const bool> && T::vertices_in_top_order> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool has_vertices_in_top_order_v = has_vertices_in_top_order_trait<T>::value;
 
 /**
  * @brief Check if a graph guarantees children of a vertex are stored/iterated in vertex index order.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_children_in_vertex_order_trait : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct has_children_in_vertex_order_trait<T, std::void_t<decltype(T::children_in_vertex_order)>>
     : std::bool_constant<std::is_same_v<decltype(T::children_in_vertex_order), const bool> && T::children_in_vertex_order> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool has_children_in_vertex_order_v = has_children_in_vertex_order_trait<T>::value;
 
 /**
  * @brief Check if a graph guarantees parents of a vertex are stored/iterated in vertex index order.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct has_parents_in_vertex_order_trait : std::false_type {};
 
-template<typename T>
+template <typename T>
 struct has_parents_in_vertex_order_trait<T, std::void_t<decltype(T::parents_in_vertex_order)>>
     : std::bool_constant<std::is_same_v<decltype(T::parents_in_vertex_order), const bool> && T::parents_in_vertex_order> {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool has_parents_in_vertex_order_v = has_parents_in_vertex_order_trait<T>::value;
 
-} // namespace osp
+}    // namespace osp
 
 /**
  * @brief Specialization of std::hash for osp::directed_edge.
  *
- * This specialization provides a hash function for osp::directed_edge, which is used in hash-based containers like std::unordered_set and std::unordered_map.
+ * This specialization provides a hash function for osp::directed_edge, which is used in hash-based containers like
+ * std::unordered_set and std::unordered_map.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct std::hash<osp::directed_edge<Graph_t>> {
     std::size_t operator()(const osp::directed_edge<Graph_t> &p) const noexcept {
         // Combine hashes of source and target
         std::size_t h1 = std::hash<osp::vertex_idx_t<Graph_t>>{}(p.source);
         std::size_t h2 = std::hash<osp::vertex_idx_t<Graph_t>>{}(p.target);
-        return h1 ^ (h2 << 1); // Simple hash combining
+        return h1 ^ (h2 << 1);    // Simple hash combining
     }
-};
\ No newline at end of file
+};
diff --git a/include/osp/concepts/iterator_concepts.hpp b/include/osp/concepts/iterator_concepts.hpp
index 2bf9c4cb..bb827ae6 100644
--- a/include/osp/concepts/iterator_concepts.hpp
+++ b/include/osp/concepts/iterator_concepts.hpp
@@ -22,6 +22,7 @@ limitations under the License.
 #include <type_traits>
 
 namespace osp {
+
 /**
  * @file iterator_concepts.hpp
  * @brief C++17 compatible concept checks (type traits) for iterators and ranges.
@@ -42,20 +43,19 @@ namespace osp {
  *
  * @tparam T The type to check.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_forward_iterator : std::false_type {};
 
-template<typename T>
-struct is_forward_iterator<
-    T, std::void_t<typename std::iterator_traits<T>::difference_type,
-                   typename std::iterator_traits<T>::value_type,
-                   typename std::iterator_traits<T>::pointer,
-                   typename std::iterator_traits<T>::reference,
-                   typename std::iterator_traits<T>::iterator_category>>
-    : std::conjunction<
-          std::is_base_of<std::forward_iterator_tag, typename std::iterator_traits<T>::iterator_category>> {};
-
-template<typename T>
+template <typename T>
+struct is_forward_iterator<T,
+                           std::void_t<typename std::iterator_traits<T>::difference_type,
+                                       typename std::iterator_traits<T>::value_type,
+                                       typename std::iterator_traits<T>::pointer,
+                                       typename std::iterator_traits<T>::reference,
+                                       typename std::iterator_traits<T>::iterator_category>>
+    : std::conjunction<std::is_base_of<std::forward_iterator_tag, typename std::iterator_traits<T>::iterator_category>> {};
+
+template <typename T>
 inline constexpr bool is_forward_iterator_v = is_forward_iterator<T>::value;
 
 /**
@@ -69,19 +69,16 @@ inline constexpr bool is_forward_iterator_v = is_forward_iterator<T>::value;
  * @tparam T The range type to check.
  * @tparam ValueType The expected value type of the range.
  */
-template<typename T, typename ValueType, typename = void>
+template <typename T, typename ValueType, typename = void>
 struct is_forward_range_of : std::false_type {};
 
-template<typename T, typename ValueType>
-struct is_forward_range_of<
-    T, ValueType,
-    std::void_t<decltype(std::begin(std::declval<T>())),
-                decltype(std::end(std::declval<T>()))>>
-    : std::conjunction<
-          is_forward_iterator<decltype(std::begin(std::declval<T>()))>,
-          std::is_same<ValueType, typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>> {};
+template <typename T, typename ValueType>
+struct is_forward_range_of<T, ValueType, std::void_t<decltype(std::begin(std::declval<T>())), decltype(std::end(std::declval<T>()))>>
+    : std::conjunction<is_forward_iterator<decltype(std::begin(std::declval<T>()))>,
+                       std::is_same<ValueType, typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>> {
+};
 
-template<typename T, typename ValueType>
+template <typename T, typename ValueType>
 inline constexpr bool is_forward_range_of_v = is_forward_range_of<T, ValueType>::value;
 
 /**
@@ -95,17 +92,14 @@ inline constexpr bool is_forward_range_of_v = is_forward_range_of<T, ValueType>:
  * @tparam T The container type to check.
  * @tparam ValueType The expected value type of the container.
  */
-template<typename T, typename ValueType, typename = void>
+template <typename T, typename ValueType, typename = void>
 struct is_container_of : std::false_type {};
 
-template<typename T, typename ValueType>
-struct is_container_of<
-    T, ValueType,
-    std::void_t<decltype(std::size(std::declval<T>()))>>
-    : std::conjunction<
-          is_forward_range_of<T, ValueType>> {};
+template <typename T, typename ValueType>
+struct is_container_of<T, ValueType, std::void_t<decltype(std::size(std::declval<T>()))>>
+    : std::conjunction<is_forward_range_of<T, ValueType>> {};
 
-template<typename T, typename ValueType>
+template <typename T, typename ValueType>
 inline constexpr bool is_container_of_v = is_container_of<T, ValueType>::value;
 
 /**
@@ -119,20 +113,19 @@ inline constexpr bool is_container_of_v = is_container_of<T, ValueType>::value;
  *
  * @tparam T The type to check.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_input_iterator : std::false_type {};
 
-template<typename T>
-struct is_input_iterator<
-    T, std::void_t<typename std::iterator_traits<T>::difference_type,
-                   typename std::iterator_traits<T>::value_type,
-                   typename std::iterator_traits<T>::pointer,
-                   typename std::iterator_traits<T>::reference,
-                   typename std::iterator_traits<T>::iterator_category>>
-    : std::conjunction<
-          std::is_base_of<std::input_iterator_tag, typename std::iterator_traits<T>::iterator_category>> {};
-
-template<typename T>
+template <typename T>
+struct is_input_iterator<T,
+                         std::void_t<typename std::iterator_traits<T>::difference_type,
+                                     typename std::iterator_traits<T>::value_type,
+                                     typename std::iterator_traits<T>::pointer,
+                                     typename std::iterator_traits<T>::reference,
+                                     typename std::iterator_traits<T>::iterator_category>>
+    : std::conjunction<std::is_base_of<std::input_iterator_tag, typename std::iterator_traits<T>::iterator_category>> {};
+
+template <typename T>
 inline constexpr bool is_input_iterator_v = is_input_iterator<T>::value;
 
 /**
@@ -146,19 +139,16 @@ inline constexpr bool is_input_iterator_v = is_input_iterator<T>::value;
  * @tparam T The range type to check.
  * @tparam ValueType The expected value type of the range.
  */
-template<typename T, typename ValueType, typename = void>
+template <typename T, typename ValueType, typename = void>
 struct is_input_range_of : std::false_type {};
 
-template<typename T, typename ValueType>
-struct is_input_range_of<
-    T, ValueType,
-    std::void_t<decltype(std::begin(std::declval<T>())),
-                decltype(std::end(std::declval<T>()))>>
-    : std::conjunction<
-          is_input_iterator<decltype(std::begin(std::declval<T>()))>,
-          std::is_same<ValueType, typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>> {};
+template <typename T, typename ValueType>
+struct is_input_range_of<T, ValueType, std::void_t<decltype(std::begin(std::declval<T>())), decltype(std::end(std::declval<T>()))>>
+    : std::conjunction<is_input_iterator<decltype(std::begin(std::declval<T>()))>,
+                       std::is_same<ValueType, typename std::iterator_traits<decltype(std::begin(std::declval<T>()))>::value_type>> {
+};
 
-template<typename T, typename ValueType>
+template <typename T, typename ValueType>
 inline constexpr bool is_input_range_of_v = is_input_range_of<T, ValueType>::value;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/concepts/specific_graph_impl.hpp b/include/osp/concepts/specific_graph_impl.hpp
index 29c3f75c..810211c3 100644
--- a/include/osp/concepts/specific_graph_impl.hpp
+++ b/include/osp/concepts/specific_graph_impl.hpp
@@ -34,10 +34,10 @@ namespace osp {
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_Compact_Sparse_Graph : std::false_type {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_Compact_Sparse_Graph_v = is_Compact_Sparse_Graph<T>::value;
 
 /**
@@ -45,10 +45,10 @@ inline constexpr bool is_Compact_Sparse_Graph_v = is_Compact_Sparse_Graph<T>::va
  *
  * @tparam T The graph type.
  */
-template<typename T, typename = void>
+template <typename T, typename = void>
 struct is_Compact_Sparse_Graph_reorder : std::false_type {};
 
-template<typename T>
+template <typename T>
 inline constexpr bool is_Compact_Sparse_Graph_reorder_v = is_Compact_Sparse_Graph_reorder<T>::value;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/AbstractWavefrontScheduler.hpp b/include/osp/dag_divider/AbstractWavefrontScheduler.hpp
index 69a3c80c..627ee33d 100644
--- a/include/osp/dag_divider/AbstractWavefrontScheduler.hpp
+++ b/include/osp/dag_divider/AbstractWavefrontScheduler.hpp
@@ -16,15 +16,16 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 #pragma once
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <numeric>
+
 #include "DagDivider.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/graph_algorithms/computational_dag_util.hpp"
 #include "osp/graph_algorithms/subgraph_algorithms.hpp"
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
-#include <algorithm>
-#include <cassert>
-#include <iostream>
-#include <numeric>
 
 namespace osp {
 
@@ -32,7 +33,7 @@ namespace osp {
  * @class AbstractWavefrontScheduler
  * @brief Base class for schedulers that operate on wavefronts of a DAG.
  */
-template<typename Graph_t, typename constr_graph_t>
+template <typename Graph_t, typename constr_graph_t>
 class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
   protected:
     IDagDivider<Graph_t> *divider;
@@ -44,11 +45,9 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
      * @param allocation A reference to the vector that will be filled with the processor allocation.
      * @return True if the scarcity case was hit (fewer processors than active components), false otherwise.
      */
-    bool distributeProcessors(
-        unsigned total_processors_of_type,
-        const std::vector<double> &work_weights,
-        std::vector<unsigned> &allocation) const {
-
+    bool distributeProcessors(unsigned total_processors_of_type,
+                              const std::vector<double> &work_weights,
+                              std::vector<unsigned> &allocation) const {
         allocation.assign(work_weights.size(), 0);
         double total_work = std::accumulate(work_weights.begin(), work_weights.end(), 0.0);
         if (total_work <= 1e-9 || total_processors_of_type == 0) {
@@ -86,7 +85,7 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
             for (unsigned i = 0; i < remaining_procs; ++i) {
                 allocation[sorted_work[i].second]++;
             }
-            return true; // Scarcity case was hit.
+            return true;    // Scarcity case was hit.
         }
 
         // --- Stage 2: Proportional Distribution of Remaining Processors ---
@@ -109,7 +108,7 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
                 for (size_t i = 0; i < active_indices.size(); ++i) {
                     double exact_share = (adjusted_work_weights[i] / adjusted_total_work) * remaining_procs;
                     unsigned additional_alloc = static_cast<unsigned>(std::floor(exact_share));
-                    allocation[active_indices[i]] += additional_alloc; // Add to the base allocation of 1
+                    allocation[active_indices[i]] += additional_alloc;    // Add to the base allocation of 1
                     remainders.push_back({exact_share - additional_alloc, active_indices[i]});
                     allocated_count += additional_alloc;
                 }
@@ -124,22 +123,21 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
                 }
             }
         }
-        return false; // Scarcity case was not hit.
+        return false;    // Scarcity case was not hit.
     }
 
-    BspArchitecture<constr_graph_t> createSubArchitecture(
-        const BspArchitecture<Graph_t> &original_arch,
-        const std::vector<unsigned> &sub_dag_proc_types) const {
-
+    BspArchitecture<constr_graph_t> createSubArchitecture(const BspArchitecture<Graph_t> &original_arch,
+                                                          const std::vector<unsigned> &sub_dag_proc_types) const {
         // The calculation is now inside the assert, so it only happens in debug builds.
-        assert(std::accumulate(sub_dag_proc_types.begin(), sub_dag_proc_types.end(), 0u) > 0 && "Attempted to create a sub-architecture with zero processors.");
+        assert(std::accumulate(sub_dag_proc_types.begin(), sub_dag_proc_types.end(), 0u) > 0
+               && "Attempted to create a sub-architecture with zero processors.");
 
         BspArchitecture<constr_graph_t> sub_architecture(original_arch);
         std::vector<v_memw_t<Graph_t>> sub_dag_processor_memory(original_arch.getProcessorTypeCount().size(),
                                                                 std::numeric_limits<v_memw_t<Graph_t>>::max());
         for (unsigned i = 0; i < original_arch.numberOfProcessors(); ++i) {
-            sub_dag_processor_memory[original_arch.processorType(i)] =
-                std::min(original_arch.memoryBound(i), sub_dag_processor_memory[original_arch.processorType(i)]);
+            sub_dag_processor_memory[original_arch.processorType(i)]
+                = std::min(original_arch.memoryBound(i), sub_dag_processor_memory[original_arch.processorType(i)]);
         }
         sub_architecture.SetProcessorsConsequTypes(sub_dag_proc_types, sub_dag_processor_memory);
         return sub_architecture;
@@ -156,10 +154,10 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
             }
 
             if (sum_of_compatible_works_for_rep > total_rep_work + 1e-9) {
-                if constexpr (enable_debug_prints)
-                    std::cerr << "ERROR: Sum of compatible work (" << sum_of_compatible_works_for_rep
-                              << ") exceeds total work (" << total_rep_work
-                              << ") for a sub-dag. Aborting." << std::endl;
+                if constexpr (enable_debug_prints) {
+                    std::cerr << "ERROR: Sum of compatible work (" << sum_of_compatible_works_for_rep << ") exceeds total work ("
+                              << total_rep_work << ") for a sub-dag. Aborting." << std::endl;
+                }
                 return false;
             }
         }
@@ -167,8 +165,7 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
     }
 
   public:
-    AbstractWavefrontScheduler(IDagDivider<Graph_t> &div, Scheduler<constr_graph_t> &sched)
-        : divider(&div), scheduler(&sched) {}
+    AbstractWavefrontScheduler(IDagDivider<Graph_t> &div, Scheduler<constr_graph_t> &sched) : divider(&div), scheduler(&sched) {}
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/dag_divider/ConnectedComponentDivider.hpp b/include/osp/dag_divider/ConnectedComponentDivider.hpp
index 5059d10a..7edc747f 100644
--- a/include/osp/dag_divider/ConnectedComponentDivider.hpp
+++ b/include/osp/dag_divider/ConnectedComponentDivider.hpp
@@ -27,13 +27,11 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename Constr_Graph_t>
+template <typename Graph_t, typename Constr_Graph_t>
 class ConnectedComponentDivider : public IDagDivider<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "Graph must be a computational DAG");
     static_assert(is_computational_dag_v<Constr_Graph_t>, "Constr_Graph_t must be a computational DAG");
-    static_assert(is_constructable_cdag_v<Constr_Graph_t>,
-                  "Constr_Graph_t must satisfy the constructable_cdag_vertex concept");
+    static_assert(is_constructable_cdag_v<Constr_Graph_t>, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept");
     static_assert(std::is_same_v<vertex_idx_t<Graph_t>, vertex_idx_t<Constr_Graph_t>>,
                   "Graph_t and Constr_Graph_t must have the same vertex_idx types");
 
@@ -53,14 +51,19 @@ class ConnectedComponentDivider : public IDagDivider<Graph_t> {
 
   public:
     inline std::vector<Constr_Graph_t> &get_sub_dags() { return sub_dags; }
+
     inline const std::vector<Constr_Graph_t> &get_sub_dags() const { return sub_dags; }
+
     inline const std::vector<std::vector<vertex_idx>> &get_vertex_mapping() const { return vertex_mapping; }
+
     inline const std::vector<unsigned> &get_component() const { return component; }
+
     inline const std::vector<vertex_idx> &get_vertex_map() const { return vertex_map; }
 
     virtual std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> divide(const Graph_t &dag) override {
-        if (dag.num_vertices() == 0)
+        if (dag.num_vertices() == 0) {
             return {};
+        }
 
         bool has_more_than_one_connected_component = compute_connected_components(dag);
 
@@ -68,8 +71,9 @@ class ConnectedComponentDivider : public IDagDivider<Graph_t> {
 
         if (has_more_than_one_connected_component) {
             vertex_maps[0].resize(sub_dags.size());
-            for (unsigned i = 0; i < sub_dags.size(); ++i)
+            for (unsigned i = 0; i < sub_dags.size(); ++i) {
                 vertex_maps[0][i].resize(sub_dags[i].num_vertices());
+            }
 
             for (const auto &v : dag.vertices()) {
                 vertex_maps[0][component[v]][vertex_map[v]] = v;
@@ -93,13 +97,13 @@ class ConnectedComponentDivider : public IDagDivider<Graph_t> {
         return vertex_maps;
     }
 
-
     std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> compute_vertex_maps(const Graph_t &dag) {
         std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> vertex_maps(1);
 
         vertex_maps[0].resize(sub_dags.size());
-        for (unsigned i = 0; i < sub_dags.size(); ++i)
+        for (unsigned i = 0; i < sub_dags.size(); ++i) {
             vertex_maps[0][i].resize(sub_dags[i].num_vertices());
+        }
 
         for (const auto &v : dag.vertices()) {
             vertex_maps[0][component[v]][vertex_map[v]] = v;
@@ -168,8 +172,9 @@ class ConnectedComponentDivider : public IDagDivider<Graph_t> {
             vertex_idx local_idx = current_index_in_subdag[comp_id]++;
             vertex_map[v] = local_idx;
 
-            if (vertex_mapping[comp_id].empty())
+            if (vertex_mapping[comp_id].empty()) {
                 vertex_mapping[comp_id].resize(sub_dags[comp_id].num_vertices());
+            }
 
             vertex_mapping[comp_id][local_idx] = v;
         }
@@ -178,4 +183,4 @@ class ConnectedComponentDivider : public IDagDivider<Graph_t> {
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/dag_divider/ConnectedComponentScheduler.hpp b/include/osp/dag_divider/ConnectedComponentScheduler.hpp
index a25a1ab6..7d6cdece 100644
--- a/include/osp/dag_divider/ConnectedComponentScheduler.hpp
+++ b/include/osp/dag_divider/ConnectedComponentScheduler.hpp
@@ -22,12 +22,10 @@ limitations under the License.
 #include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/graph_algorithms/computational_dag_util.hpp"
 
-
 namespace osp {
 
-template<typename Graph_t, typename Constr_Graph_t>
+template <typename Graph_t, typename Constr_Graph_t>
 class ConnectedComponentScheduler : public Scheduler<Graph_t> {
-
     Scheduler<Constr_Graph_t> *scheduler;
 
   public:
@@ -36,7 +34,6 @@ class ConnectedComponentScheduler : public Scheduler<Graph_t> {
     std::string getScheduleName() const override { return "SubDagScheduler"; }
 
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
-
         const auto &instance = schedule.getInstance();
 
         const Graph_t &dag = instance.getComputationalDag();
@@ -57,8 +54,10 @@ class ConnectedComponentScheduler : public Scheduler<Graph_t> {
             BspInstance<Constr_Graph_t> sub_instance(sub_dag, instance.getArchitecture());
             BspArchitecture<Constr_Graph_t> &sub_architecture = sub_instance.getArchitecture();
 
-            const double sub_dag_work_weight_percent = static_cast<double>(sub_dag_work_weight) / static_cast<double>(total_work_weight);
-            const unsigned sub_dag_processors = static_cast<unsigned>(sub_dag_work_weight_percent * sub_architecture.numberOfProcessors());
+            const double sub_dag_work_weight_percent
+                = static_cast<double>(sub_dag_work_weight) / static_cast<double>(total_work_weight);
+            const unsigned sub_dag_processors
+                = static_cast<unsigned>(sub_dag_work_weight_percent * sub_architecture.numberOfProcessors());
 
             sub_architecture.setNumberOfProcessors(sub_dag_processors);
 
@@ -81,4 +80,4 @@ class ConnectedComponentScheduler : public Scheduler<Graph_t> {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/DagDivider.hpp b/include/osp/dag_divider/DagDivider.hpp
index ffa4559e..e89c2b01 100644
--- a/include/osp/dag_divider/DagDivider.hpp
+++ b/include/osp/dag_divider/DagDivider.hpp
@@ -17,9 +17,10 @@ limitations under the License.
 */
 
 #pragma once
+#include <vector>
+
 #include "osp/concepts/directed_graph_concept.hpp"
 #include "osp/concepts/graph_traits.hpp"
-#include <vector>
 
 namespace osp {
 
@@ -28,10 +29,9 @@ namespace osp {
  * @brief Divides the wavefronts of a computational DAG into consecutive groups or sections.
  *
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class IDagDivider {
-
-  static_assert(is_directed_graph_v<Graph_t>, "Graph must be directed");
+    static_assert(is_directed_graph_v<Graph_t>, "Graph must be directed");
 
   public:
     virtual ~IDagDivider() = default;
@@ -50,4 +50,4 @@ class IDagDivider {
     virtual std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> divide(const Graph_t &dag) = 0;
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp b/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp
index 270b7073..5b61736f 100644
--- a/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp
+++ b/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp
@@ -25,9 +25,9 @@ namespace osp {
  * @class IsomorphicWavefrontComponentScheduler
  * @brief Schedules wavefronts by grouping isomorphic components.
  */
-template<typename Graph_t, typename constr_graph_t>
+template <typename Graph_t, typename constr_graph_t>
 class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<Graph_t, constr_graph_t> {
-public:
+  public:
     IsomorphicWavefrontComponentScheduler(IDagDivider<Graph_t> &div, Scheduler<constr_graph_t> &scheduler)
         : AbstractWavefrontScheduler<Graph_t, constr_graph_t>(div, scheduler) {}
 
@@ -43,19 +43,24 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
         }
 
         IsomorphismGroups<Graph_t, constr_graph_t> iso_groups;
-        std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> vertex_maps = this->divider->divide(instance.getComputationalDag());
+        std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> vertex_maps
+            = this->divider->divide(instance.getComputationalDag());
         iso_groups.compute_isomorphism_groups(vertex_maps, instance.getComputationalDag());
 
         unsigned superstep_offset = 0;
         for (std::size_t i = 0; i < vertex_maps.size(); ++i) {
-            if (this->enable_debug_prints) std::cout << "\n--- Processing Wavefront Set " << i << " ---" << std::endl;
-            
+            if (this->enable_debug_prints) {
+                std::cout << "\n--- Processing Wavefront Set " << i << " ---" << std::endl;
+            }
+
             unsigned supersteps_in_set = 0;
-            auto status = process_wavefront_set(schedule, vertex_maps[i], 
+            auto status = process_wavefront_set(schedule,
+                                                vertex_maps[i],
                                                 iso_groups.get_isomorphism_groups()[i],
                                                 iso_groups.get_isomorphism_groups_subgraphs()[i],
                                                 global_ids_by_type,
-                                                superstep_offset, supersteps_in_set);
+                                                superstep_offset,
+                                                supersteps_in_set);
             if (status != RETURN_STATUS::OSP_SUCCESS) {
                 return status;
             }
@@ -64,27 +69,25 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
         return RETURN_STATUS::OSP_SUCCESS;
     }
 
-private:
-    RETURN_STATUS process_wavefront_set(
-        BspSchedule<Graph_t>& schedule,
-        const std::vector<std::vector<vertex_idx_t<Graph_t>>>& vertex_map_for_set,
-        const std::vector<std::vector<size_t>>& iso_groups_for_set,
-        const std::vector<constr_graph_t>& subgraphs_for_set,
-        const std::vector<std::vector<unsigned>>& global_ids_by_type,
-        unsigned superstep_offset,
-        unsigned& supersteps_in_set) {
-
+  private:
+    RETURN_STATUS process_wavefront_set(BspSchedule<Graph_t> &schedule,
+                                        const std::vector<std::vector<vertex_idx_t<Graph_t>>> &vertex_map_for_set,
+                                        const std::vector<std::vector<size_t>> &iso_groups_for_set,
+                                        const std::vector<constr_graph_t> &subgraphs_for_set,
+                                        const std::vector<std::vector<unsigned>> &global_ids_by_type,
+                                        unsigned superstep_offset,
+                                        unsigned &supersteps_in_set) {
         const auto &instance = schedule.getInstance();
         const auto &original_arch = instance.getArchitecture();
-        const auto& original_proc_type_count = original_arch.getProcessorTypeCount();
+        const auto &original_proc_type_count = original_arch.getProcessorTypeCount();
 
         if constexpr (this->enable_debug_prints) {
             std::cout << "  Found " << iso_groups_for_set.size() << " isomorphism groups in this wavefront set." << std::endl;
         }
 
         // Calculate work for each isomorphism group
-        std::vector<std::vector<double>> group_work_by_type(
-            iso_groups_for_set.size(), std::vector<double>(original_proc_type_count.size(), 0.0));
+        std::vector<std::vector<double>> group_work_by_type(iso_groups_for_set.size(),
+                                                            std::vector<double>(original_proc_type_count.size(), 0.0));
 
         for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) {
             const constr_graph_t &rep_sub_dag = subgraphs_for_set[j];
@@ -97,26 +100,28 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
         assert(this->validateWorkDistribution(subgraphs_for_set, instance));
 
         // Distribute processors among isomorphism groups
-        std::vector<std::vector<unsigned>> group_proc_allocations(iso_groups_for_set.size(), std::vector<unsigned>(original_proc_type_count.size()));
-        
-        for(unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) {
+        std::vector<std::vector<unsigned>> group_proc_allocations(iso_groups_for_set.size(),
+                                                                  std::vector<unsigned>(original_proc_type_count.size()));
+
+        for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) {
             std::vector<double> work_for_this_type;
-            for(size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) {
+            for (size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) {
                 work_for_this_type.push_back(group_work_by_type[group_idx][type_idx]);
             }
-            
+
             std::vector<unsigned> type_allocation;
-            bool starvation_hit = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation);
+            bool starvation_hit
+                = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation);
 
             if (starvation_hit) {
                 if constexpr (this->enable_debug_prints) {
-                    std::cerr << "ERROR: Processor starvation detected for type " << type_idx 
+                    std::cerr << "ERROR: Processor starvation detected for type " << type_idx
                               << ". Not enough processors to assign one to each active isomorphism group." << std::endl;
                 }
                 return RETURN_STATUS::ERROR;
             }
-            
-            for(size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) {
+
+            for (size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) {
                 group_proc_allocations[group_idx][type_idx] = type_allocation[group_idx];
             }
         }
@@ -124,15 +129,23 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
         // Schedule each group
         unsigned max_supersteps = 0;
         std::vector<unsigned> proc_type_offsets(original_arch.getNumberOfProcessorTypes(), 0);
-        
+
         std::vector<unsigned> num_supersteps_per_iso_group(iso_groups_for_set.size());
 
         for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) {
             unsigned supersteps_for_group = 0;
-            auto status = schedule_isomorphism_group(schedule, vertex_map_for_set, iso_groups_for_set[j], subgraphs_for_set[j],
-                                                     group_proc_allocations[j], global_ids_by_type, proc_type_offsets,
-                                                     superstep_offset, supersteps_for_group);
-            if (status != RETURN_STATUS::OSP_SUCCESS) return status;
+            auto status = schedule_isomorphism_group(schedule,
+                                                     vertex_map_for_set,
+                                                     iso_groups_for_set[j],
+                                                     subgraphs_for_set[j],
+                                                     group_proc_allocations[j],
+                                                     global_ids_by_type,
+                                                     proc_type_offsets,
+                                                     superstep_offset,
+                                                     supersteps_for_group);
+            if (status != RETURN_STATUS::OSP_SUCCESS) {
+                return status;
+            }
             num_supersteps_per_iso_group[j] = supersteps_for_group;
             max_supersteps = std::max(max_supersteps, supersteps_for_group);
 
@@ -144,34 +157,31 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
 
         for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) {
             num_supersteps_per_iso_group[j] = max_supersteps - num_supersteps_per_iso_group[j];
-            
-            if (num_supersteps_per_iso_group[j] > 0) { // This is the padding
-                const auto& group_members = iso_groups_for_set[j];
-                for (const auto& original_comp_idx : group_members) {
-                    const auto& component_vertices = vertex_map_for_set[original_comp_idx];
-                    for (const auto& vertex : component_vertices) {
+
+            if (num_supersteps_per_iso_group[j] > 0) {    // This is the padding
+                const auto &group_members = iso_groups_for_set[j];
+                for (const auto &original_comp_idx : group_members) {
+                    const auto &component_vertices = vertex_map_for_set[original_comp_idx];
+                    for (const auto &vertex : component_vertices) {
                         schedule.setAssignedSuperstep(vertex, schedule.assignedSuperstep(vertex) + num_supersteps_per_iso_group[j]);
                     }
                 }
             }
         }
 
-
         supersteps_in_set = max_supersteps;
         return RETURN_STATUS::OSP_SUCCESS;
     }
 
-    RETURN_STATUS schedule_isomorphism_group(
-        BspSchedule<Graph_t>& schedule,
-        const std::vector<std::vector<vertex_idx_t<Graph_t>>>& vertex_map_for_set,
-        const std::vector<size_t>& group_members,
-        const constr_graph_t& rep_sub_dag,
-        const std::vector<unsigned>& procs_for_group,
-        const std::vector<std::vector<unsigned>>& global_ids_by_type,
-        const std::vector<unsigned>& proc_type_offsets,
-        unsigned superstep_offset,
-        unsigned& supersteps_for_group) {
-
+    RETURN_STATUS schedule_isomorphism_group(BspSchedule<Graph_t> &schedule,
+                                             const std::vector<std::vector<vertex_idx_t<Graph_t>>> &vertex_map_for_set,
+                                             const std::vector<size_t> &group_members,
+                                             const constr_graph_t &rep_sub_dag,
+                                             const std::vector<unsigned> &procs_for_group,
+                                             const std::vector<std::vector<unsigned>> &global_ids_by_type,
+                                             const std::vector<unsigned> &proc_type_offsets,
+                                             unsigned superstep_offset,
+                                             unsigned &supersteps_for_group) {
         const auto &instance = schedule.getInstance();
         const auto &original_arch = instance.getArchitecture();
         const size_t num_members = group_members.size();
@@ -189,11 +199,14 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
 
         if (scarcity_found) {
             // --- SCARCITY/INDIVISIBLE CASE: Schedule sequentially on the shared processor block ---
-            if constexpr (this->enable_debug_prints) std::cout << "  Group with " << num_members << " members: Scarcity/Indivisible case. Scheduling sequentially." << std::endl;
-            
+            if constexpr (this->enable_debug_prints) {
+                std::cout << "  Group with " << num_members << " members: Scarcity/Indivisible case. Scheduling sequentially."
+                          << std::endl;
+            }
+
             BspInstance<constr_graph_t> sub_instance(rep_sub_dag, this->createSubArchitecture(original_arch, procs_for_group));
             sub_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
-            auto & sub_architecture = sub_instance.getArchitecture();
+            auto &sub_architecture = sub_instance.getArchitecture();
 
             if constexpr (this->enable_debug_prints) {
                 std::cout << "    Sub-architecture for sequential scheduling: { ";
@@ -207,7 +220,9 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
             for (const auto &group_member_idx : group_members) {
                 BspSchedule<constr_graph_t> sub_schedule(sub_instance);
                 auto status = this->scheduler->computeSchedule(sub_schedule);
-                if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return status;
+                if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) {
+                    return status;
+                }
 
                 const auto sub_proc_type_count = sub_architecture.getProcessorTypeCount();
                 std::vector<unsigned> sub_proc_type_corrections(sub_architecture.getNumberOfProcessorTypes(), 0);
@@ -215,18 +230,21 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
                     sub_proc_type_corrections[k] = sub_proc_type_corrections[k - 1] + sub_proc_type_count[k - 1];
                 }
 
-                std::vector<vertex_idx_t<Graph_t>> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(), vertex_map_for_set[group_member_idx].end());
+                std::vector<vertex_idx_t<Graph_t>> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(),
+                                                                             vertex_map_for_set[group_member_idx].end());
                 std::sort(sorted_component_vertices.begin(), sorted_component_vertices.end());
-                
+
                 vertex_idx_t<constr_graph_t> subdag_vertex = 0;
                 for (const auto &vertex : sorted_component_vertices) {
                     const unsigned proc_in_sub_sched = sub_schedule.assignedProcessor(subdag_vertex);
                     const unsigned proc_type = sub_architecture.processorType(proc_in_sub_sched);
                     const unsigned local_proc_id_within_type = proc_in_sub_sched - sub_proc_type_corrections[proc_type];
-                    unsigned global_proc_id = global_ids_by_type[proc_type][proc_type_offsets[proc_type] + local_proc_id_within_type];
-                    
+                    unsigned global_proc_id
+                        = global_ids_by_type[proc_type][proc_type_offsets[proc_type] + local_proc_id_within_type];
+
                     schedule.setAssignedProcessor(vertex, global_proc_id);
-                    schedule.setAssignedSuperstep(vertex, superstep_offset + sequential_superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex));
+                    schedule.setAssignedSuperstep(
+                        vertex, superstep_offset + sequential_superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex));
                     subdag_vertex++;
                 }
                 sequential_superstep_offset += sub_schedule.numberOfSupersteps();
@@ -235,29 +253,37 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
 
         } else {
             // --- ABUNDANCE/DIVISIBLE CASE: Replicate Schedule ---
-            if constexpr (this->enable_debug_prints) std::cout << "  Group with " << num_members << " members: Abundance/Divisible case. Replicating schedule." << std::endl;
+            if constexpr (this->enable_debug_prints) {
+                std::cout << "  Group with " << num_members << " members: Abundance/Divisible case. Replicating schedule."
+                          << std::endl;
+            }
 
             std::vector<unsigned> single_sub_dag_proc_types = procs_for_group;
             if (num_members > 0) {
-                for(auto& count : single_sub_dag_proc_types) count /= static_cast<unsigned>(num_members);
+                for (auto &count : single_sub_dag_proc_types) {
+                    count /= static_cast<unsigned>(num_members);
+                }
             }
 
-            BspInstance<constr_graph_t> sub_instance(rep_sub_dag, this->createSubArchitecture(original_arch, single_sub_dag_proc_types));
+            BspInstance<constr_graph_t> sub_instance(rep_sub_dag,
+                                                     this->createSubArchitecture(original_arch, single_sub_dag_proc_types));
             sub_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
-            
+
             if constexpr (this->enable_debug_prints) {
-                const auto& sub_arch = sub_instance.getArchitecture();
+                const auto &sub_arch = sub_instance.getArchitecture();
                 std::cout << "    Sub-architecture for replication (per member): { ";
                 for (unsigned type_idx = 0; type_idx < sub_arch.getNumberOfProcessorTypes(); ++type_idx) {
                     std::cout << "Type " << type_idx << ": " << sub_arch.getProcessorTypeCount()[type_idx] << "; ";
                 }
                 std::cout << "}" << std::endl;
             }
-            
+
             BspSchedule<constr_graph_t> sub_schedule(sub_instance);
             auto status = this->scheduler->computeSchedule(sub_schedule);
-            if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return status;
-            
+            if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) {
+                return status;
+            }
+
             const auto sub_proc_type_count = sub_schedule.getInstance().getArchitecture().getProcessorTypeCount();
             std::vector<unsigned> sub_proc_type_corrections(sub_proc_type_count.size(), 0);
             for (std::size_t k = 1; k < sub_proc_type_corrections.size(); ++k) {
@@ -266,7 +292,8 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
 
             std::vector<unsigned> current_member_proc_offsets = proc_type_offsets;
             for (const auto &group_member_idx : group_members) {
-                std::vector<vertex_idx_t<Graph_t>> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(), vertex_map_for_set[group_member_idx].end());
+                std::vector<vertex_idx_t<Graph_t>> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(),
+                                                                             vertex_map_for_set[group_member_idx].end());
                 std::sort(sorted_component_vertices.begin(), sorted_component_vertices.end());
 
                 vertex_idx_t<constr_graph_t> subdag_vertex = 0;
@@ -274,8 +301,9 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
                     const unsigned proc_in_sub_sched = sub_schedule.assignedProcessor(subdag_vertex);
                     const unsigned proc_type = sub_schedule.getInstance().getArchitecture().processorType(proc_in_sub_sched);
                     const unsigned local_proc_id_within_type = proc_in_sub_sched - sub_proc_type_corrections[proc_type];
-                    unsigned global_proc_id = global_ids_by_type[proc_type][current_member_proc_offsets[proc_type] + local_proc_id_within_type];
-                    
+                    unsigned global_proc_id
+                        = global_ids_by_type[proc_type][current_member_proc_offsets[proc_type] + local_proc_id_within_type];
+
                     schedule.setAssignedProcessor(vertex, global_proc_id);
                     schedule.setAssignedSuperstep(vertex, superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex));
                     subdag_vertex++;
@@ -290,7 +318,7 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler<
     }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 using IsomorphicWavefrontComponentScheduler_def_int_t = IsomorphicWavefrontComponentScheduler<Graph_t, boost_graph_int_t>;
 
-}
+}    // namespace osp
diff --git a/include/osp/dag_divider/IsomorphismGroups.hpp b/include/osp/dag_divider/IsomorphismGroups.hpp
index c8e1db11..46b91c3d 100644
--- a/include/osp/dag_divider/IsomorphismGroups.hpp
+++ b/include/osp/dag_divider/IsomorphismGroups.hpp
@@ -22,27 +22,24 @@ limitations under the License.
 #include <vector>
 
 #include "osp/concepts/graph_traits.hpp"
-#include "osp/graph_algorithms/subgraph_algorithms.hpp"
 #include "osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp"
+#include "osp/graph_algorithms/subgraph_algorithms.hpp"
 
 namespace osp {
 
-template<typename Graph_t, typename Constr_Graph_t>
+template <typename Graph_t, typename Constr_Graph_t>
 class IsomorphismGroups {
-
   private:
     std::vector<std::vector<std::vector<std::size_t>>> isomorphism_groups;
 
     std::vector<std::vector<Constr_Graph_t>> isomorphism_groups_subgraphs;
 
     void print_isomorphism_groups() const {
-
         std::cout << "Isomorphism groups: " << std::endl;
         for (std::size_t i = 0; i < isomorphism_groups.size(); i++) {
             std::cout << "Level " << i << std::endl;
             for (size_t j = 0; j < isomorphism_groups[i].size(); j++) {
-                std::cout << "Group " << j << " of size " << isomorphism_groups_subgraphs[i][j].num_vertices()
-                          << " : ";
+                std::cout << "Group " << j << " of size " << isomorphism_groups_subgraphs[i][j].num_vertices() << " : ";
 
                 // ComputationalDagWriter writer(isomorphism_groups_subgraphs[i][j]);
                 // writer.write_dot("isomorphism_group_" + std::to_string(i) + "_" + std::to_string(j) + ".dot");
@@ -72,13 +69,9 @@ class IsomorphismGroups {
      * @return const std::vector<std::vector<std::vector<unsigned>>>&
      *         A constant reference to the vertex maps.
      */
-    const std::vector<std::vector<std::vector<std::size_t>>> &get_isomorphism_groups() const {
-        return isomorphism_groups;
-    }
+    const std::vector<std::vector<std::vector<std::size_t>>> &get_isomorphism_groups() const { return isomorphism_groups; }
 
-    std::vector<std::vector<std::vector<std::size_t>>> &get_isomorphism_groups() {
-        return isomorphism_groups;
-    }
+    std::vector<std::vector<std::vector<std::size_t>>> &get_isomorphism_groups() { return isomorphism_groups; }
 
     /**
      * @brief Retrieves the isomorphism groups subgraphs.
@@ -105,25 +98,19 @@ class IsomorphismGroups {
      *
      * Reqires the dag to be divided before calling this function.
      */
-    void compute_isomorphism_groups(std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> &vertex_maps,
-                                    const Graph_t &dag) {
-
+    void compute_isomorphism_groups(std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> &vertex_maps, const Graph_t &dag) {
         isomorphism_groups = std::vector<std::vector<std::vector<std::size_t>>>(vertex_maps.size());
 
         isomorphism_groups_subgraphs = std::vector<std::vector<Constr_Graph_t>>(vertex_maps.size());
 
         for (size_t i = 0; i < vertex_maps.size(); i++) {
-
             for (std::size_t j = 0; j < vertex_maps[i].size(); j++) {
-
                 Constr_Graph_t current_subgraph;
                 create_induced_subgraph(dag, current_subgraph, vertex_maps[i][j]);
 
                 bool isomorphism_group_found = false;
                 for (size_t k = 0; k < isomorphism_groups[i].size(); k++) {
-
                     if (are_isomorphic_by_merkle_hash(isomorphism_groups_subgraphs[i][k], current_subgraph)) {
-
                         isomorphism_groups[i][k].emplace_back(j);
                         isomorphism_group_found = true;
                         break;
@@ -131,7 +118,6 @@ class IsomorphismGroups {
                 }
 
                 if (!isomorphism_group_found) {
-
                     isomorphism_groups[i].emplace_back(std::vector<std::size_t>{j});
                     isomorphism_groups_subgraphs[i].emplace_back(std::move(current_subgraph));
                 }
@@ -148,30 +134,27 @@ class IsomorphismGroups {
      * @param merge_threshold If a group has more members than this, it will be merged.
      * @param target_group_count The number of larger groups to create from a single large group.
      */
-    void merge_large_isomorphism_groups(
-        std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>>& vertex_maps,
-        const Graph_t& dag,
-        size_t merge_threshold,
-        size_t target_group_count = 8) {
-
+    void merge_large_isomorphism_groups(std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> &vertex_maps,
+                                        const Graph_t &dag,
+                                        size_t merge_threshold,
+                                        size_t target_group_count = 8) {
         // Ensure the merge logic is sound: the threshold must be larger than the target.
         assert(merge_threshold > target_group_count);
 
         for (size_t i = 0; i < isomorphism_groups.size(); ++i) {
-            
             std::vector<std::vector<vertex_idx_t<Graph_t>>> new_vertex_maps_for_level;
             std::vector<std::vector<std::size_t>> new_iso_groups_for_level;
             std::vector<Constr_Graph_t> new_iso_subgraphs_for_level;
-            
+
             size_t new_component_idx = 0;
 
             for (size_t j = 0; j < isomorphism_groups[i].size(); ++j) {
-                const auto& group = isomorphism_groups[i][j];
-                
+                const auto &group = isomorphism_groups[i][j];
+
                 if (group.size() <= merge_threshold) {
                     // This group is small enough, copy it over as is.
                     std::vector<std::size_t> new_group;
-                    for (const auto& original_comp_idx : group) {
+                    for (const auto &original_comp_idx : group) {
                         new_vertex_maps_for_level.push_back(vertex_maps[i][original_comp_idx]);
                         new_group.push_back(new_component_idx++);
                     }
@@ -179,11 +162,12 @@ class IsomorphismGroups {
                     new_iso_subgraphs_for_level.push_back(isomorphism_groups_subgraphs[i][j]);
                 } else {
                     // This group is too large and needs to be merged.
-                    std::cout << "Merging iso group of size " << group.size() << " into " << target_group_count << " new groups." << std::endl;
-                    
+                    std::cout << "Merging iso group of size " << group.size() << " into " << target_group_count << " new groups."
+                              << std::endl;
+
                     size_t base_mult = group.size() / target_group_count;
                     size_t remainder = group.size() % target_group_count;
-                    
+
                     std::vector<std::size_t> new_merged_group_indices;
                     size_t current_original_idx = 0;
 
@@ -192,7 +176,7 @@ class IsomorphismGroups {
                         size_t num_to_merge = base_mult + (k < remainder ? 1 : 0);
 
                         for (size_t m = 0; m < num_to_merge; ++m) {
-                            const auto& original_comp = vertex_maps[i][group[current_original_idx++]];
+                            const auto &original_comp = vertex_maps[i][group[current_original_idx++]];
                             merged_component.insert(merged_component.end(), original_comp.begin(), original_comp.end());
                         }
                         std::sort(merged_component.begin(), merged_component.end());
@@ -215,4 +199,4 @@ class IsomorphismGroups {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/WavefrontComponentScheduler.hpp b/include/osp/dag_divider/WavefrontComponentScheduler.hpp
index 8c7e2845..a38d801f 100644
--- a/include/osp/dag_divider/WavefrontComponentScheduler.hpp
+++ b/include/osp/dag_divider/WavefrontComponentScheduler.hpp
@@ -24,9 +24,9 @@ namespace osp {
  * @class WavefrontComponentScheduler
  * @brief Schedules wavefronts by treating each component individually.
  */
-template<typename Graph_t, typename constr_graph_t>
+template <typename Graph_t, typename constr_graph_t>
 class WavefrontComponentScheduler : public AbstractWavefrontScheduler<Graph_t, constr_graph_t> {
-public:
+  public:
     WavefrontComponentScheduler(IDagDivider<Graph_t> &div, Scheduler<constr_graph_t> &scheduler_)
         : AbstractWavefrontScheduler<Graph_t, constr_graph_t>(div, scheduler_) {}
 
@@ -35,8 +35,8 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler<Graph_t, c
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
         const auto &instance = schedule.getInstance();
         const auto &original_arch = instance.getArchitecture();
-        const auto& original_proc_type_count = original_arch.getProcessorTypeCount();
-        const auto& computational_dag = instance.getComputationalDag();
+        const auto &original_proc_type_count = original_arch.getProcessorTypeCount();
+        const auto &computational_dag = instance.getComputationalDag();
 
         std::vector<std::vector<unsigned>> global_ids_by_type(original_arch.getNumberOfProcessorTypes());
         for (unsigned i = 0; i < original_arch.numberOfProcessors(); ++i) {
@@ -46,14 +46,17 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler<Graph_t, c
         auto vertex_maps = this->divider->divide(computational_dag);
         unsigned superstep_offset = 0;
 
-        for (std::size_t i = 0; i < vertex_maps.size(); ++i) { // For each wavefront set
-            if (this->enable_debug_prints) std::cout << "\n--- Processing Wavefront Set " << i << " (No Isomorphism) ---" << std::endl;
-            
-            const auto& components = vertex_maps[i];
+        for (std::size_t i = 0; i < vertex_maps.size(); ++i) {    // For each wavefront set
+            if (this->enable_debug_prints) {
+                std::cout << "\n--- Processing Wavefront Set " << i << " (No Isomorphism) ---" << std::endl;
+            }
+
+            const auto &components = vertex_maps[i];
             std::vector<constr_graph_t> sub_dags(components.size());
-            std::vector<std::vector<double>> work_by_type(components.size(), std::vector<double>(original_proc_type_count.size(), 0.0));
+            std::vector<std::vector<double>> work_by_type(components.size(),
+                                                          std::vector<double>(original_proc_type_count.size(), 0.0));
 
-            for(size_t j = 0; j < components.size(); ++j) {
+            for (size_t j = 0; j < components.size(); ++j) {
                 create_induced_subgraph(computational_dag, sub_dags[j], components[j]);
                 for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) {
                     work_by_type[j][type_idx] = sumOfCompatibleWorkWeights(sub_dags[j], instance, type_idx);
@@ -63,25 +66,27 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler<Graph_t, c
             assert(this->validateWorkDistribution(sub_dags, instance));
 
             // Distribute Processors
-            std::vector<std::vector<unsigned>> proc_allocations(components.size(), std::vector<unsigned>(original_proc_type_count.size()));
-            for(unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) {
+            std::vector<std::vector<unsigned>> proc_allocations(components.size(),
+                                                                std::vector<unsigned>(original_proc_type_count.size()));
+            for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) {
                 std::vector<double> work_for_this_type(components.size());
-                for(size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) {
+                for (size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) {
                     work_for_this_type[comp_idx] = work_by_type[comp_idx][type_idx];
                 }
-                
+
                 std::vector<unsigned> type_allocation;
-                bool starvation_hit = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation);
+                bool starvation_hit
+                    = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation);
 
                 if (starvation_hit) {
                     if constexpr (this->enable_debug_prints) {
-                        std::cerr << "ERROR: Processor starvation detected for type " << type_idx 
-                                  << " in wavefront set " << i << ". Not enough processors to assign one to each active component." << std::endl;
+                        std::cerr << "ERROR: Processor starvation detected for type " << type_idx << " in wavefront set " << i
+                                  << ". Not enough processors to assign one to each active component." << std::endl;
                     }
                     return RETURN_STATUS::ERROR;
                 }
 
-                for(size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) {
+                for (size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) {
                     proc_allocations[comp_idx][type_idx] = type_allocation[comp_idx];
                 }
             }
@@ -104,7 +109,9 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler<Graph_t, c
 
                 BspSchedule<constr_graph_t> sub_schedule(sub_instance);
                 const auto status = this->scheduler->computeSchedule(sub_schedule);
-                if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return status;
+                if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) {
+                    return status;
+                }
 
                 const auto sub_proc_type_count = sub_architecture.getProcessorTypeCount();
                 std::vector<unsigned> sub_proc_type_corrections(sub_architecture.getNumberOfProcessorTypes(), 0);
@@ -120,8 +127,9 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler<Graph_t, c
                     const unsigned proc_in_sub_sched = sub_schedule.assignedProcessor(subdag_vertex);
                     const unsigned proc_type = sub_architecture.processorType(proc_in_sub_sched);
                     const unsigned local_proc_id_within_type = proc_in_sub_sched - sub_proc_type_corrections[proc_type];
-                    unsigned global_proc_id = global_ids_by_type[proc_type][proc_type_offsets[proc_type] + local_proc_id_within_type];
-                    
+                    unsigned global_proc_id
+                        = global_ids_by_type[proc_type][proc_type_offsets[proc_type] + local_proc_id_within_type];
+
                     schedule.setAssignedProcessor(vertex, global_proc_id);
                     schedule.setAssignedSuperstep(vertex, superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex));
                     subdag_vertex++;
@@ -138,7 +146,7 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler<Graph_t, c
     }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 using WavefrontComponentScheduler_def_int_t = WavefrontComponentScheduler<Graph_t, boost_graph_int_t>;
 
-}
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp
index 30b878c1..d7a461b0 100644
--- a/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp
@@ -18,33 +18,35 @@ limitations under the License.
 
 #pragma once
 
-
-#include "osp/graph_algorithms/directed_graph_util.hpp"
-#include "osp/bsp/model/BspInstance.hpp"
-#include <iostream>
-#include <vector>
-#include <string>
-#include <numeric>
 #include <algorithm>
 #include <cmath>
+#include <iostream>
 #include <limits>
+#include <numeric>
 #include <set>
+#include <string>
+#include <vector>
+
+#include "osp/bsp/model/BspInstance.hpp"
+#include "osp/graph_algorithms/directed_graph_util.hpp"
 
 namespace osp {
 
 struct SubgraphSchedule {
     double makespan;
-    std::vector<std::vector<unsigned>> node_assigned_worker_per_type;   
+    std::vector<std::vector<unsigned>> node_assigned_worker_per_type;
     std::vector<bool> was_trimmed;
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 class EftSubgraphScheduler {
-public:
-    
+  public:
     EftSubgraphScheduler() = default;
 
-    SubgraphSchedule run(const BspInstance<Graph_t>& instance, const std::vector<unsigned>& multiplicities, const std::vector<std::vector<v_workw_t<Graph_t>>> & required_proc_types, const std::vector<unsigned>& max_num_procs) {
+    SubgraphSchedule run(const BspInstance<Graph_t> &instance,
+                         const std::vector<unsigned> &multiplicities,
+                         const std::vector<std::vector<v_workw_t<Graph_t>>> &required_proc_types,
+                         const std::vector<unsigned> &max_num_procs) {
         prepare_for_scheduling(instance, multiplicities, required_proc_types, max_num_procs);
         return execute_schedule(instance);
     }
@@ -53,21 +55,14 @@ class EftSubgraphScheduler {
         min_work_per_processor_ = min_work_per_processor;
     }
 
-private:
-
+  private:
     static constexpr bool verbose = false;
 
     using job_id_t = vertex_idx_t<Graph_t>;
 
     v_workw_t<Graph_t> min_work_per_processor_ = 2000;
 
-    enum class JobStatus {
-        WAITING,
-        READY,
-        RUNNING,
-        COMPLETED
-    };
-
+    enum class JobStatus { WAITING, READY, RUNNING, COMPLETED };
 
     struct Job {
         job_id_t id;
@@ -78,7 +73,7 @@ class EftSubgraphScheduler {
         unsigned max_num_procs = 1;
 
         job_id_t in_degree_current = 0;
-        
+
         JobStatus status = JobStatus::WAITING;
         v_workw_t<Graph_t> upward_rank = 0.0;
 
@@ -86,35 +81,39 @@ class EftSubgraphScheduler {
         std::vector<unsigned> assigned_workers;
         double start_time = -1.0;
         double finish_time = -1.0;
-
     };
 
     // Custom comparator for storing Job pointers in the ready set, sorted by rank.
     struct JobPtrCompare {
-        bool operator()(const Job* lhs, const Job* rhs) const {
+        bool operator()(const Job *lhs, const Job *rhs) const {
             if (lhs->upward_rank != rhs->upward_rank) {
                 return lhs->upward_rank > rhs->upward_rank;
             }
-            return lhs->id > rhs->id; // Tie-breaking
+            return lhs->id > rhs->id;    // Tie-breaking
         }
     };
 
     std::vector<Job> jobs_;
-    std::set<const Job*, JobPtrCompare> ready_jobs_;
+    std::set<const Job *, JobPtrCompare> ready_jobs_;
 
-    void prepare_for_scheduling(const BspInstance<Graph_t>& instance, const std::vector<unsigned>& multiplicities, const std::vector<std::vector<v_workw_t<Graph_t>>> & required_proc_types, const std::vector<unsigned>& max_num_procs) {        
+    void prepare_for_scheduling(const BspInstance<Graph_t> &instance,
+                                const std::vector<unsigned> &multiplicities,
+                                const std::vector<std::vector<v_workw_t<Graph_t>>> &required_proc_types,
+                                const std::vector<unsigned> &max_num_procs) {
         jobs_.resize(instance.numberOfVertices());
         if constexpr (verbose) {
             std::cout << "--- Preparing for Subgraph Scheduling ---" << std::endl;
         }
-        const auto & graph = instance.getComputationalDag();
+        const auto &graph = instance.getComputationalDag();
         const size_t num_worker_types = instance.getArchitecture().getProcessorTypeCount().size();
 
         calculate_upward_ranks(graph);
 
-        if constexpr (verbose) std::cout << "Initializing jobs..." << std::endl;
+        if constexpr (verbose) {
+            std::cout << "Initializing jobs..." << std::endl;
+        }
         job_id_t idx = 0;
-        for (auto& job : jobs_) {
+        for (auto &job : jobs_) {
             job.id = idx;
             job.in_degree_current = graph.in_degree(idx);
             if (job.in_degree_current == 0) {
@@ -124,68 +123,74 @@ class EftSubgraphScheduler {
                 job.status = JobStatus::WAITING;
             }
             job.total_work = graph.vertex_work_weight(idx);
-            job.max_num_procs = std::min(max_num_procs[idx], static_cast<unsigned>((job.total_work + min_work_per_processor_ - 1) / min_work_per_processor_));
+            job.max_num_procs
+                = std::min(max_num_procs[idx],
+                           static_cast<unsigned>((job.total_work + min_work_per_processor_ - 1) / min_work_per_processor_));
             job.multiplicity = std::min(multiplicities[idx], job.max_num_procs);
-            job.required_proc_types = required_proc_types[idx];           
+            job.required_proc_types = required_proc_types[idx];
             job.assigned_workers.resize(num_worker_types, 0);
             job.start_time = -1.0;
             job.finish_time = -1.0;
 
             if constexpr (verbose) {
-                std::cout << "  - Job " << idx << ": rank=" << job.upward_rank << ", mult=" << job.multiplicity << ", max_procs=" << job.max_num_procs
-                          << ", work=" << job.total_work << ", status=" << (job.status == JobStatus::READY ? "READY" : "WAITING") << std::endl;
+                std::cout << "  - Job " << idx << ": rank=" << job.upward_rank << ", mult=" << job.multiplicity
+                          << ", max_procs=" << job.max_num_procs << ", work=" << job.total_work
+                          << ", status=" << (job.status == JobStatus::READY ? "READY" : "WAITING") << std::endl;
             }
             idx++;
-        }        
+        }
     }
 
-    void calculate_upward_ranks(const Graph_t & graph) {
+    void calculate_upward_ranks(const Graph_t &graph) {
         const auto reverse_top_order = GetTopOrderReverse(graph);
 
-        for (const auto& vertex : reverse_top_order) {
+        for (const auto &vertex : reverse_top_order) {
             v_workw_t<Graph_t> max_successor_rank = 0.0;
-            for (const auto& child : graph.children(vertex)) {
+            for (const auto &child : graph.children(vertex)) {
                 max_successor_rank = std::max(max_successor_rank, jobs_.at(child).upward_rank);
             }
-            
-            Job& job = jobs_.at(vertex);
+
+            Job &job = jobs_.at(vertex);
             job.upward_rank = graph.vertex_work_weight(vertex) + max_successor_rank;
         }
     }
 
-    SubgraphSchedule execute_schedule(const BspInstance<Graph_t>& instance) {
-        double current_time = 0.0; 
+    SubgraphSchedule execute_schedule(const BspInstance<Graph_t> &instance) {
+        double current_time = 0.0;
         std::vector<unsigned> available_workers = instance.getArchitecture().getProcessorTypeCount();
         const size_t num_worker_types = available_workers.size();
         std::vector<job_id_t> running_jobs;
         unsigned completed_count = 0;
-        const auto& graph = instance.getComputationalDag();
+        const auto &graph = instance.getComputationalDag();
 
         if constexpr (verbose) {
             std::cout << "\n--- Subgraph Scheduling Execution Started ---" << std::endl;
             std::cout << "Total jobs: " << jobs_.size() << std::endl;
             std::cout << "Initial available workers: ";
-            for(size_t i=0; i<num_worker_types; ++i) std::cout << "T" << i << ":" << available_workers[i] << " ";
+            for (size_t i = 0; i < num_worker_types; ++i) {
+                std::cout << "T" << i << ":" << available_workers[i] << " ";
+            }
             std::cout << std::endl;
         }
 
-        while (completed_count < jobs_.size()) {       
-
+        while (completed_count < jobs_.size()) {
             if constexpr (verbose) {
                 std::cout << "\n[T=" << current_time << "] --- New Scheduling Step ---" << std::endl;
                 std::cout << "Completed jobs: " << completed_count << "/" << jobs_.size() << std::endl;
                 std::cout << "Available workers: ";
-                for(size_t i=0; i<num_worker_types; ++i) std::cout << "T" << i << ":" << available_workers[i] << " ";
+                for (size_t i = 0; i < num_worker_types; ++i) {
+                    std::cout << "T" << i << ":" << available_workers[i] << " ";
+                }
                 std::cout << std::endl;
                 std::cout << "Ready queue size: " << ready_jobs_.size() << ". Running jobs: " << running_jobs.size() << std::endl;
             }
 
-            std::vector<Job*> jobs_to_start;
+            std::vector<Job *> jobs_to_start;
             v_workw_t<Graph_t> total_runnable_priority = 0.0;
 
             // Iterate through ready jobs and assign minimum resources if available.
-            for (const Job* job_ptr : ready_jobs_) {
-                Job& job = jobs_[job_ptr->id];
+            for (const Job *job_ptr : ready_jobs_) {
+                Job &job = jobs_[job_ptr->id];
                 bool can_start = true;
                 for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) {
                     if (job.required_proc_types[type_idx] > 0 && available_workers[type_idx] < job.multiplicity) {
@@ -211,21 +216,31 @@ class EftSubgraphScheduler {
                     std::cout << "Allocating workers to " << jobs_to_start.size() << " runnable jobs..." << std::endl;
                 }
 
-                //Distribute remaining workers proportionally among the jobs that just started.
+                // Distribute remaining workers proportionally among the jobs that just started.
                 const std::vector<unsigned> remaining_workers_pool = available_workers;
-                for (Job* job_ptr : jobs_to_start) {
-                    Job& job = *job_ptr;
+                for (Job *job_ptr : jobs_to_start) {
+                    Job &job = *job_ptr;
                     for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) {
                         if (job.required_proc_types[type_idx] > 0) {
-                            const unsigned current_total_assigned = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u);
-                            const unsigned max_additional_workers = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0;
-
-                            const double proportion = (total_runnable_priority > 0) ? (static_cast<double>(job.upward_rank) / static_cast<double>(total_runnable_priority)) : (1.0 / static_cast<double>(jobs_to_start.size()));
-                            const unsigned proportional_share = static_cast<unsigned>(static_cast<double>(remaining_workers_pool[type_idx]) * proportion);
-                            const unsigned num_proportional_chunks = (job.multiplicity > 0) ? proportional_share / job.multiplicity : 0;
-                            const unsigned num_available_chunks = (job.multiplicity > 0) ? available_workers[type_idx] / job.multiplicity : 0;
-                            const unsigned num_chunks_allowed_by_max = (job.multiplicity > 0) ? max_additional_workers / job.multiplicity : 0;
-                            const unsigned num_chunks_to_assign = std::min({num_proportional_chunks, num_available_chunks, num_chunks_allowed_by_max});
+                            const unsigned current_total_assigned
+                                = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u);
+                            const unsigned max_additional_workers
+                                = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0;
+
+                            const double proportion
+                                = (total_runnable_priority > 0)
+                                      ? (static_cast<double>(job.upward_rank) / static_cast<double>(total_runnable_priority))
+                                      : (1.0 / static_cast<double>(jobs_to_start.size()));
+                            const unsigned proportional_share
+                                = static_cast<unsigned>(static_cast<double>(remaining_workers_pool[type_idx]) * proportion);
+                            const unsigned num_proportional_chunks
+                                = (job.multiplicity > 0) ? proportional_share / job.multiplicity : 0;
+                            const unsigned num_available_chunks
+                                = (job.multiplicity > 0) ? available_workers[type_idx] / job.multiplicity : 0;
+                            const unsigned num_chunks_allowed_by_max
+                                = (job.multiplicity > 0) ? max_additional_workers / job.multiplicity : 0;
+                            const unsigned num_chunks_to_assign
+                                = std::min({num_proportional_chunks, num_available_chunks, num_chunks_allowed_by_max});
                             const unsigned assigned = num_chunks_to_assign * job.multiplicity;
                             job.assigned_workers[type_idx] += assigned;
                             available_workers[type_idx] -= assigned;
@@ -233,13 +248,15 @@ class EftSubgraphScheduler {
                     }
                 }
 
-                //Greedily assign any remaining workers to the highest-rank jobs that can take them.
-                for (Job* job_ptr : jobs_to_start) {
-                    Job& job = *job_ptr;
+                // Greedily assign any remaining workers to the highest-rank jobs that can take them.
+                for (Job *job_ptr : jobs_to_start) {
+                    Job &job = *job_ptr;
                     for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) {
                         if (job.required_proc_types[type_idx] > 0 && job.multiplicity > 0) {
-                            const unsigned current_total_assigned = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u);
-                            const unsigned max_additional_workers = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0;
+                            const unsigned current_total_assigned
+                                = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u);
+                            const unsigned max_additional_workers
+                                = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0;
                             const unsigned num_available_chunks = available_workers[type_idx] / job.multiplicity;
                             const unsigned num_chunks_allowed_by_max = max_additional_workers / job.multiplicity;
                             const unsigned assigned = std::min(num_available_chunks, num_chunks_allowed_by_max) * job.multiplicity;
@@ -249,15 +266,17 @@ class EftSubgraphScheduler {
                     }
                 }
 
-                for (Job* job_ptr : jobs_to_start) {
-                    Job& job = *job_ptr;
+                for (Job *job_ptr : jobs_to_start) {
+                    Job &job = *job_ptr;
 
                     job.status = JobStatus::RUNNING;
                     job.start_time = current_time;
 
                     // Calculate finish time based on total work and total assigned workers.
                     unsigned total_assigned_workers = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u);
-                    double exec_time = (total_assigned_workers > 0) ? static_cast<double>(job.total_work) / static_cast<double>(total_assigned_workers) : 0.0;
+                    double exec_time = (total_assigned_workers > 0)
+                                           ? static_cast<double>(job.total_work) / static_cast<double>(total_assigned_workers)
+                                           : 0.0;
                     job.finish_time = current_time + exec_time;
 
                     running_jobs.push_back(job.id);
@@ -267,14 +286,14 @@ class EftSubgraphScheduler {
 
             // 2. ADVANCE TIME
             if (running_jobs.empty() && completed_count < jobs_.size()) {
-                 std::cerr << "Error: Deadlock detected. No running jobs and " 
-                           << jobs_.size() - completed_count << " jobs incomplete." << std::endl;
+                std::cerr << "Error: Deadlock detected. No running jobs and " << jobs_.size() - completed_count
+                          << " jobs incomplete." << std::endl;
                 if constexpr (verbose) {
                     std::cout << "Deadlock! Ready queue:" << std::endl;
-                    for (const auto* ready_job_ptr : ready_jobs_) {
-                        const Job& job = *ready_job_ptr;
+                    for (const auto *ready_job_ptr : ready_jobs_) {
+                        const Job &job = *ready_job_ptr;
                         std::cout << "  - Job " << job.id << " (mult " << job.multiplicity << ") needs workers: ";
-                        for(size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) {
+                        for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) {
                             if (job.required_proc_types[type_idx] > 0) {
                                 std::cout << "T" << type_idx << ":" << job.multiplicity << " ";
                             }
@@ -282,47 +301,59 @@ class EftSubgraphScheduler {
                         std::cout << std::endl;
                     }
                     std::cout << "Available workers: ";
-                    for(size_t i=0; i<num_worker_types; ++i) std::cout << "T" << i << ":" << available_workers[i] << " ";
+                    for (size_t i = 0; i < num_worker_types; ++i) {
+                        std::cout << "T" << i << ":" << available_workers[i] << " ";
+                    }
                     std::cout << std::endl;
                 }
-                 SubgraphSchedule result;
-                 result.makespan = -1.0;
-                 return result;
+                SubgraphSchedule result;
+                result.makespan = -1.0;
+                return result;
             }
-            if (running_jobs.empty()) break; // All jobs are done
-            
+            if (running_jobs.empty()) {
+                break;    // All jobs are done
+            }
+
             double next_event_time = std::numeric_limits<double>::max();
             for (job_id_t id : running_jobs) {
                 next_event_time = std::min(next_event_time, jobs_.at(id).finish_time);
             }
-            if constexpr (verbose) std::cout << "Advancing time from " << current_time << " to " << next_event_time << std::endl;
+            if constexpr (verbose) {
+                std::cout << "Advancing time from " << current_time << " to " << next_event_time << std::endl;
+            }
             current_time = next_event_time;
 
             // 3. PROCESS COMPLETED JOBS
             auto it = running_jobs.begin();
             while (it != running_jobs.end()) {
-                Job& job = jobs_.at(*it);
+                Job &job = jobs_.at(*it);
                 if (job.finish_time <= current_time) {
                     job.status = JobStatus::COMPLETED;
-                    if constexpr (verbose) std::cout << "Job " << job.id << " finished at T=" << current_time << std::endl;
+                    if constexpr (verbose) {
+                        std::cout << "Job " << job.id << " finished at T=" << current_time << std::endl;
+                    }
                     // Release workers
-                    for(size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) {
+                    for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) {
                         available_workers[type_idx] += job.assigned_workers[type_idx];
                     }
                     completed_count++;
 
                     // Update successors
-                    if constexpr (verbose) std::cout << "  - Updating successors..." << std::endl;
-                    for (const auto& successor_id : graph.children(job.id)) {
-                        Job& successor_job = jobs_.at(successor_id);
+                    if constexpr (verbose) {
+                        std::cout << "  - Updating successors..." << std::endl;
+                    }
+                    for (const auto &successor_id : graph.children(job.id)) {
+                        Job &successor_job = jobs_.at(successor_id);
                         successor_job.in_degree_current--;
                         if (successor_job.in_degree_current == 0) {
                             successor_job.status = JobStatus::READY;
                             ready_jobs_.insert(&successor_job);
-                            if constexpr (verbose) std::cout << "    - Successor " << successor_job.id << " is now READY." << std::endl;
+                            if constexpr (verbose) {
+                                std::cout << "    - Successor " << successor_job.id << " is now READY." << std::endl;
+                            }
                         }
                     }
-                    it = running_jobs.erase(it); // Remove from running list
+                    it = running_jobs.erase(it);    // Remove from running list
                 } else {
                     ++it;
                 }
@@ -333,10 +364,12 @@ class EftSubgraphScheduler {
             std::cout << "\n--- Subgraph Scheduling Finished ---" << std::endl;
             std::cout << "Final Makespan: " << current_time << std::endl;
             std::cout << "Job Summary:" << std::endl;
-            for(const auto& job : jobs_) {
-                std::cout << "  - Job " << job.id << ": Multiplicity=" << job.multiplicity << ", Max Procs=" << job.max_num_procs << ", Work=" << job.total_work << ", Start=" << job.start_time << ", Finish=" << job.finish_time << ", Workers=[";
-                for(size_t i=0; i<job.assigned_workers.size(); ++i) {
-                    std::cout << "T" << i << ":" << job.assigned_workers[i] << (i == job.assigned_workers.size()-1 ? "" : ", ");
+            for (const auto &job : jobs_) {
+                std::cout << "  - Job " << job.id << ": Multiplicity=" << job.multiplicity << ", Max Procs=" << job.max_num_procs
+                          << ", Work=" << job.total_work << ", Start=" << job.start_time << ", Finish=" << job.finish_time
+                          << ", Workers=[";
+                for (size_t i = 0; i < job.assigned_workers.size(); ++i) {
+                    std::cout << "T" << i << ":" << job.assigned_workers[i] << (i == job.assigned_workers.size() - 1 ? "" : ", ");
                 }
                 std::cout << "]" << std::endl;
             }
@@ -345,14 +378,15 @@ class EftSubgraphScheduler {
         SubgraphSchedule result;
         result.makespan = current_time;
         result.node_assigned_worker_per_type.resize(jobs_.size());
-        for(const auto& job : jobs_) {
+        for (const auto &job : jobs_) {
             result.node_assigned_worker_per_type[job.id].resize(num_worker_types);
             for (size_t i = 0; i < num_worker_types; ++i) {
-                result.node_assigned_worker_per_type[job.id][i] = (job.assigned_workers[i] + job.multiplicity - 1) / job.multiplicity;
+                result.node_assigned_worker_per_type[job.id][i]
+                    = (job.assigned_workers[i] + job.multiplicity - 1) / job.multiplicity;
             }
-        } 
+        }
         return result;
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/HashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/HashComputer.hpp
index 51a64dd3..6436ef37 100644
--- a/include/osp/dag_divider/isomorphism_divider/HashComputer.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/HashComputer.hpp
@@ -18,33 +18,33 @@ limitations under the License.
 
 #pragma once
 
-#include <vector>
 #include <unordered_map>
+#include <vector>
 
 namespace osp {
 
 /**
-* @class HashComputer
-* @brief Abstract base class for computing and managing hash values and orbits for graph vertices.
-*
-* This class provides an interface for obtaining hash values for individual vertices,
-* the full list of vertex hashes, the number of unique orbits, and the vertices belonging to specific orbits.
-*
-* @tparam index_type The type used for indexing vertices in the graph.
-*/
-template<typename index_type>
+ * @class HashComputer
+ * @brief Abstract base class for computing and managing hash values and orbits for graph vertices.
+ *
+ * This class provides an interface for obtaining hash values for individual vertices,
+ * the full list of vertex hashes, the number of unique orbits, and the vertices belonging to specific orbits.
+ *
+ * @tparam index_type The type used for indexing vertices in the graph.
+ */
+template <typename index_type>
 class HashComputer {
-public:
+  public:
     virtual ~HashComputer() = default;
 
     virtual std::size_t get_vertex_hash(const index_type &v) const = 0;
     virtual const std::vector<std::size_t> &get_vertex_hashes() const = 0;
     virtual std::size_t num_orbits() const = 0;
-    
+
     virtual const std::vector<index_type> &get_orbit(const index_type &v) const = 0;
     virtual const std::unordered_map<std::size_t, std::vector<index_type>> &get_orbits() const = 0;
 
-    virtual const std::vector<index_type>& get_orbit_from_hash(const std::size_t& hash) const = 0;
+    virtual const std::vector<index_type> &get_orbit_from_hash(const std::size_t &hash) const = 0;
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
index 83556089..507fa12e 100644
--- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
@@ -16,6 +16,11 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
+#include <chrono>
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+
 #include "EftSubgraphScheduler.hpp"
 #include "HashComputer.hpp"
 #include "MerkleHashComputer.hpp"
@@ -24,10 +29,6 @@ limitations under the License.
 #include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/graph_algorithms/subgraph_algorithms.hpp"
-#include <chrono>
-#include <ctime>
-#include <iomanip>
-#include <iostream>
 
 namespace osp {
 
@@ -49,12 +50,11 @@ namespace osp {
  * @tparam Graph_t The type of the input computational DAG.
  * @tparam Constr_Graph_t The type of the constructable computational DAG used for internal representations.
  */
-template<typename Graph_t, typename Constr_Graph_t>
+template <typename Graph_t, typename Constr_Graph_t>
 class IsomorphicSubgraphScheduler {
     static_assert(is_computational_dag_v<Graph_t>, "Graph must be a computational DAG");
     static_assert(is_computational_dag_v<Constr_Graph_t>, "Constr_Graph_t must be a computational DAG");
-    static_assert(is_constructable_cdag_v<Constr_Graph_t>,
-                  "Constr_Graph_t must satisfy the constructable_cdag_vertex concept");
+    static_assert(is_constructable_cdag_v<Constr_Graph_t>, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept");
     static_assert(std::is_same_v<vertex_idx_t<Graph_t>, vertex_idx_t<Constr_Graph_t>>,
                   "Graph_t and Constr_Graph_t must have the same vertex_idx types");
 
@@ -85,19 +85,34 @@ class IsomorphicSubgraphScheduler {
     virtual ~IsomorphicSubgraphScheduler() {}
 
     void setMergeDifferentTypes(bool flag) { merge_different_node_types = flag; }
+
     void setWorkThreshold(v_workw_t<Constr_Graph_t> work_threshold) { work_threshold_ = work_threshold; }
-    void setCriticalPathThreshold(v_workw_t<Constr_Graph_t> critical_path_threshold) { critical_path_threshold_ = critical_path_threshold; }
+
+    void setCriticalPathThreshold(v_workw_t<Constr_Graph_t> critical_path_threshold) {
+        critical_path_threshold_ = critical_path_threshold;
+    }
+
     void setOrbitLockRatio(double orbit_lock_ratio) { orbit_lock_ratio_ = orbit_lock_ratio; }
-    void setNaturalBreaksCountPercentage(double natural_breaks_count_percentage) { natural_breaks_count_percentage_ = natural_breaks_count_percentage; }
+
+    void setNaturalBreaksCountPercentage(double natural_breaks_count_percentage) {
+        natural_breaks_count_percentage_ = natural_breaks_count_percentage;
+    }
+
     void setAllowTrimmedScheduler(bool flag) { allow_use_trimmed_scheduler = flag; }
+
     void set_plot_dot_graphs(bool plot) { plot_dot_graphs_ = plot; }
+
     void disable_use_max_group_size() { use_max_group_size_ = false; }
+
     void setUseMaxBsp(bool flag) { use_max_bsp = flag; }
+
     void enable_use_max_group_size(const unsigned max_group_size) {
         use_max_group_size_ = true;
         max_group_size_ = max_group_size;
     }
+
     void setEnableAdaptiveSymmetryThreshold() { use_adaptive_symmetry_threshold = true; }
+
     void setUseStaticSymmetryLevel(size_t static_symmetry_level) {
         use_adaptive_symmetry_threshold = false;
         symmetry_ = static_symmetry_level;
@@ -116,7 +131,8 @@ class IsomorphicSubgraphScheduler {
 
         std::unique_ptr<HashComputer<vertex_idx_t<Graph_t>>> local_hasher;
         if (!hash_computer_) {
-            local_hasher = std::make_unique<MerkleHashComputer<Graph_t, bwd_merkle_node_hash_func<Graph_t>, true>>(instance.getComputationalDag(), instance.getComputationalDag());
+            local_hasher = std::make_unique<MerkleHashComputer<Graph_t, bwd_merkle_node_hash_func<Graph_t>, true>>(
+                instance.getComputationalDag(), instance.getComputationalDag());
             hash_computer_ = local_hasher.get();
         }
 
@@ -125,13 +141,14 @@ class IsomorphicSubgraphScheduler {
         auto isomorphic_groups = orbit_processor.get_final_groups();
 
         std::vector<bool> was_trimmed(isomorphic_groups.size(), false);
-        trim_subgraph_groups(isomorphic_groups, instance, was_trimmed); // Apply trimming and record which groups were affected
+        trim_subgraph_groups(isomorphic_groups, instance, was_trimmed);    // Apply trimming and record which groups were affected
 
         auto input = prepare_subgraph_scheduling_input(instance, isomorphic_groups, was_trimmed);
 
         EftSubgraphScheduler<Constr_Graph_t> etf_scheduler;
-        SubgraphSchedule subgraph_schedule = etf_scheduler.run(input.instance, input.multiplicities, input.required_proc_types, input.max_num_processors);
-        subgraph_schedule.was_trimmed = std::move(was_trimmed); // Pass through trimming info
+        SubgraphSchedule subgraph_schedule
+            = etf_scheduler.run(input.instance, input.multiplicities, input.required_proc_types, input.max_num_processors);
+        subgraph_schedule.was_trimmed = std::move(was_trimmed);    // Pass through trimming info
 
         std::vector<vertex_idx_t<Graph_t>> partition(instance.numberOfVertices(), 0);
         schedule_isomorphic_group(instance, isomorphic_groups, subgraph_schedule, partition);
@@ -144,8 +161,10 @@ class IsomorphicSubgraphScheduler {
             std::string timestamp = ss.str() + "_";
 
             DotFileWriter writer;
-            writer.write_colored_graph(timestamp + "isomorphic_groups.dot", instance.getComputationalDag(), orbit_processor.get_final_contraction_map());
-            writer.write_colored_graph(timestamp + "orbits_colored.dot", instance.getComputationalDag(), orbit_processor.get_contraction_map());
+            writer.write_colored_graph(
+                timestamp + "isomorphic_groups.dot", instance.getComputationalDag(), orbit_processor.get_final_contraction_map());
+            writer.write_colored_graph(
+                timestamp + "orbits_colored.dot", instance.getComputationalDag(), orbit_processor.get_contraction_map());
             writer.write_graph(timestamp + "iso_groups_contracted.dot", input.instance.getComputationalDag());
             writer.write_colored_graph(timestamp + "graph_partition.dot", instance.getComputationalDag(), partition);
             Constr_Graph_t corase_graph;
@@ -156,7 +175,7 @@ class IsomorphicSubgraphScheduler {
     }
 
   protected:
-    template<typename G_t, typename C_G_t>
+    template <typename G_t, typename C_G_t>
     struct subgraph_scheduler_input {
         BspInstance<C_G_t> instance;
         std::vector<unsigned> multiplicities;
@@ -173,14 +192,16 @@ class IsomorphicSubgraphScheduler {
         for (size_t group_idx = 0; group_idx < isomorphic_groups.size(); ++group_idx) {
             auto &group = isomorphic_groups[group_idx];
             const unsigned group_size = static_cast<unsigned>(group.size());
-            if (group_size <= 1)
+            if (group_size <= 1) {
                 continue;
+            }
 
             unsigned effective_min_proc_type_count = 0;
 
             if (use_max_group_size_) {
                 if constexpr (verbose) {
-                    std::cout << "Group " << group_idx << " (size " << group_size << "): Using fixed max_group_size_ = " << max_group_size_ << " for trimming." << std::endl;
+                    std::cout << "Group " << group_idx << " (size " << group_size
+                              << "): Using fixed max_group_size_ = " << max_group_size_ << " for trimming." << std::endl;
                 }
                 effective_min_proc_type_count = max_group_size_;
             } else {
@@ -219,14 +240,16 @@ class IsomorphicSubgraphScheduler {
                     }
                     if (found_compatible_processor) {
                         if constexpr (verbose) {
-                            std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" << common_node_type
-                                      << "). Min compatible processors: " << min_compatible_processors << "." << std::endl;
+                            std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type ("
+                                      << common_node_type << "). Min compatible processors: " << min_compatible_processors << "."
+                                      << std::endl;
                         }
                         effective_min_proc_type_count = min_compatible_processors;
                     } else {
                         if constexpr (verbose) {
-                            std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" << common_node_type
-                                      << ") but no compatible processors found. Disabling trimming." << std::endl;
+                            std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type ("
+                                      << common_node_type << ") but no compatible processors found. Disabling trimming."
+                                      << std::endl;
                         }
                         // If no compatible processors found for this type, effectively disable trimming for this group.
                         effective_min_proc_type_count = 1;
@@ -239,7 +262,9 @@ class IsomorphicSubgraphScheduler {
                     }
                     effective_min_proc_type_count = *std::min_element(type_count.begin(), type_count.end());
                     if constexpr (verbose) {
-                        std::cout << "Group " << group_idx << " (size " << group_size << "): Multi-type or untyped group. Using default min_proc_type_count: " << effective_min_proc_type_count << "." << std::endl;
+                        std::cout << "Group " << group_idx << " (size " << group_size
+                                  << "): Multi-type or untyped group. Using default min_proc_type_count: "
+                                  << effective_min_proc_type_count << "." << std::endl;
                     }
                 }
             }
@@ -258,12 +283,14 @@ class IsomorphicSubgraphScheduler {
 
             if (gcd < group_size) {
                 if constexpr (verbose) {
-                    std::cout << "  -> Trimming group " << group_idx << ". GCD(" << group_size << ", " << effective_min_proc_type_count
-                              << ") = " << gcd << ". Merging " << group_size / gcd << " subgraphs at a time." << std::endl;
+                    std::cout << "  -> Trimming group " << group_idx << ". GCD(" << group_size << ", "
+                              << effective_min_proc_type_count << ") = " << gcd << ". Merging " << group_size / gcd
+                              << " subgraphs at a time." << std::endl;
                 }
 
-                if (allow_use_trimmed_scheduler)
+                if (allow_use_trimmed_scheduler) {
                     gcd = 1;
+                }
 
                 was_trimmed[group_idx] = true;
                 const unsigned merge_size = group_size / gcd;
@@ -282,7 +309,8 @@ class IsomorphicSubgraphScheduler {
                     for (unsigned k = 0; k < merge_size; ++k) {
                         const auto &sg_to_merge_vertices = group.subgraphs[original_sg_cursor];
                         original_sg_cursor++;
-                        merged_sg_vertices.insert(merged_sg_vertices.end(), sg_to_merge_vertices.begin(), sg_to_merge_vertices.end());
+                        merged_sg_vertices.insert(
+                            merged_sg_vertices.end(), sg_to_merge_vertices.begin(), sg_to_merge_vertices.end());
                     }
                     new_subgraphs.push_back(std::move(merged_sg_vertices));
                 }
@@ -300,7 +328,6 @@ class IsomorphicSubgraphScheduler {
         const BspInstance<Graph_t> &original_instance,
         const std::vector<typename OrbitGraphProcessor<Graph_t, Constr_Graph_t>::Group> &isomorphic_groups,
         const std::vector<bool> &was_trimmed) {
-
         subgraph_scheduler_input<Graph_t, Constr_Graph_t> result;
         result.instance.getArchitecture() = original_instance.getArchitecture();
         const unsigned num_proc_types = original_instance.getArchitecture().getNumberOfProcessorTypes();
@@ -312,9 +339,10 @@ class IsomorphicSubgraphScheduler {
 
         size_t coarse_node_idx = 0;
         for (const auto &group : isomorphic_groups) {
-
             result.max_num_processors[coarse_node_idx] = static_cast<unsigned>(group.size() * group.subgraphs[0].size());
-            result.multiplicities[coarse_node_idx] = (was_trimmed[coarse_node_idx] && allow_use_trimmed_scheduler) ? 1 : static_cast<unsigned>(group.subgraphs.size());
+            result.multiplicities[coarse_node_idx] = (was_trimmed[coarse_node_idx] && allow_use_trimmed_scheduler)
+                                                         ? 1
+                                                         : static_cast<unsigned>(group.subgraphs.size());
             result.required_proc_types[coarse_node_idx].assign(num_proc_types, 0);
 
             for (const auto &subgraph : group.subgraphs) {
@@ -332,8 +360,8 @@ class IsomorphicSubgraphScheduler {
 
             ++coarse_node_idx;
         }
-        coarser_util::construct_coarse_dag(original_instance.getComputationalDag(), result.instance.getComputationalDag(),
-                                           contraction_map);
+        coarser_util::construct_coarse_dag(
+            original_instance.getComputationalDag(), result.instance.getComputationalDag(), contraction_map);
 
         if constexpr (verbose) {
             std::cout << "\n--- Preparing Subgraph Scheduling Input ---\n";
@@ -342,7 +370,8 @@ class IsomorphicSubgraphScheduler {
                 std::cout << "  - Coarse Node " << j << " (from " << isomorphic_groups[j].subgraphs.size()
                           << " isomorphic subgraphs):\n";
                 std::cout << "    - Multiplicity for scheduling: " << result.multiplicities[j] << "\n";
-                std::cout << "    - Total Work (in coarse graph): " << result.instance.getComputationalDag().vertex_work_weight(j) << "\n";
+                std::cout << "    - Total Work (in coarse graph): " << result.instance.getComputationalDag().vertex_work_weight(j)
+                          << "\n";
                 std::cout << "    - Required Processor Types: ";
                 for (unsigned k = 0; k < num_proc_types; ++k) {
                     std::cout << result.required_proc_types[j][k] << " ";
@@ -371,13 +400,15 @@ class IsomorphicSubgraphScheduler {
             std::sort(rep_subgraph_vertices_sorted.begin(), rep_subgraph_vertices_sorted.end());
 
             BspInstance<Constr_Graph_t> representative_instance;
-            auto rep_global_to_local_map = create_induced_subgraph_map(instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted);
+            auto rep_global_to_local_map = create_induced_subgraph_map(
+                instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted);
 
             representative_instance.getArchitecture() = instance.getArchitecture();
             const auto &procs_for_group = sub_sched.node_assigned_worker_per_type[group_idx];
             std::vector<v_memw_t<Constr_Graph_t>> mem_weights(procs_for_group.size(), 0);
             for (unsigned proc_type = 0; proc_type < procs_for_group.size(); ++proc_type) {
-                mem_weights[proc_type] = static_cast<v_memw_t<Constr_Graph_t>>(instance.getArchitecture().maxMemoryBoundProcType(proc_type));
+                mem_weights[proc_type]
+                    = static_cast<v_memw_t<Constr_Graph_t>>(instance.getArchitecture().maxMemoryBoundProcType(proc_type));
             }
             representative_instance.getArchitecture().SetProcessorsConsequTypes(procs_for_group, mem_weights);
             representative_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
@@ -395,13 +426,16 @@ class IsomorphicSubgraphScheduler {
             Scheduler<Constr_Graph_t> *scheduler_for_group_ptr;
             std::unique_ptr<Scheduler<Constr_Graph_t>> trimmed_scheduler_owner;
             if (use_trimmed_scheduler) {
-                if constexpr (verbose)
+                if constexpr (verbose) {
                     std::cout << "Using TrimmedGroupScheduler for group " << group_idx << std::endl;
-                trimmed_scheduler_owner = std::make_unique<TrimmedGroupScheduler<Constr_Graph_t>>(*bsp_scheduler_, min_non_zero_procs);
+                }
+                trimmed_scheduler_owner
+                    = std::make_unique<TrimmedGroupScheduler<Constr_Graph_t>>(*bsp_scheduler_, min_non_zero_procs);
                 scheduler_for_group_ptr = trimmed_scheduler_owner.get();
             } else {
-                if constexpr (verbose)
+                if constexpr (verbose) {
                     std::cout << "Using standard BSP scheduler for group " << group_idx << std::endl;
+                }
                 scheduler_for_group_ptr = bsp_scheduler_;
             }
 
@@ -434,7 +468,8 @@ class IsomorphicSubgraphScheduler {
                     std::cout << "T" << type_idx << ":" << type_counts[type_idx] << " ";
                 }
                 std::cout << std::endl;
-                std::cout << "    Sync cost: " << sub_arch.synchronisationCosts() << ", Comm cost: " << sub_arch.communicationCosts() << std::endl;
+                std::cout << "    Sync cost: " << sub_arch.synchronisationCosts()
+                          << ", Comm cost: " << sub_arch.communicationCosts() << std::endl;
             }
 
             scheduler_for_group_ptr->computeSchedule(bsp_schedule);
@@ -470,7 +505,8 @@ class IsomorphicSubgraphScheduler {
                 writer.write_colored_graph(timestamp + "iso_group_rep_" + std::to_string(group_idx) + ".dot", rep_dag, colors);
             }
 
-            const bool max_bsp = use_max_bsp && (representative_instance.getComputationalDag().num_edges() == 0) && (representative_instance.getComputationalDag().vertex_type(0) == 0);
+            const bool max_bsp = use_max_bsp && (representative_instance.getComputationalDag().num_edges() == 0)
+                                 && (representative_instance.getComputationalDag().vertex_type(0) == 0);
 
             // Build data structures for applying the pattern ---
             // Map (superstep, processor) -> relative partition ID
@@ -479,8 +515,9 @@ class IsomorphicSubgraphScheduler {
             for (vertex_idx_t<Graph_t> j = 0; j < static_cast<vertex_idx_t<Graph_t>>(rep_subgraph_vertices_sorted.size()); ++j) {
                 auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(j), bsp_schedule.assignedProcessor(j));
 
-                if (max_bsp)
+                if (max_bsp) {
                     sp_pair = std::make_pair(j, 0);
+                }
 
                 if (sp_proc_to_relative_partition.find(sp_pair) == sp_proc_to_relative_partition.end()) {
                     sp_proc_to_relative_partition[sp_pair] = num_partitions_per_subgraph++;
@@ -498,11 +535,12 @@ class IsomorphicSubgraphScheduler {
                 // Map from a vertex in the current subgraph to its corresponding local index (0, 1, ...) in the representative's schedule
                 std::unordered_map<vertex_idx_t<Graph_t>, vertex_idx_t<Constr_Graph_t>> current_vertex_to_rep_local_idx;
 
-                if (i == 0) { // The first subgraph is the representative itself
+                if (i == 0) {    // The first subgraph is the representative itself
                     current_vertex_to_rep_local_idx = std::move(rep_global_to_local_map);
-                } else { // For other subgraphs, build the isomorphic mapping
+                } else {    // For other subgraphs, build the isomorphic mapping
                     Constr_Graph_t current_subgraph_graph;
-                    create_induced_subgraph(instance.getComputationalDag(), current_subgraph_graph, current_subgraph_vertices_sorted);
+                    create_induced_subgraph(
+                        instance.getComputationalDag(), current_subgraph_graph, current_subgraph_vertices_sorted);
 
                     MerkleHashComputer<Constr_Graph_t> current_hasher(current_subgraph_graph);
 
@@ -510,7 +548,8 @@ class IsomorphicSubgraphScheduler {
                         const auto &current_orbit_nodes = current_hasher.get_orbit_from_hash(hash);
                         for (size_t k = 0; k < rep_orbit_nodes.size(); ++k) {
                             // Map: current_subgraph_vertex -> representative_subgraph_local_idx
-                            current_vertex_to_rep_local_idx[current_subgraph_vertices_sorted[current_orbit_nodes[k]]] = static_cast<vertex_idx_t<Constr_Graph_t>>(rep_orbit_nodes[k]);
+                            current_vertex_to_rep_local_idx[current_subgraph_vertices_sorted[current_orbit_nodes[k]]]
+                                = static_cast<vertex_idx_t<Constr_Graph_t>>(rep_orbit_nodes[k]);
                         }
                     }
                 }
@@ -518,10 +557,12 @@ class IsomorphicSubgraphScheduler {
                 // Apply the partition pattern
                 for (const auto &current_vertex : current_subgraph_vertices_sorted) {
                     const auto rep_local_idx = current_vertex_to_rep_local_idx.at(current_vertex);
-                    auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(rep_local_idx), bsp_schedule.assignedProcessor(rep_local_idx));
+                    auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(rep_local_idx),
+                                                  bsp_schedule.assignedProcessor(rep_local_idx));
 
-                    if (max_bsp)
+                    if (max_bsp) {
                         sp_pair = std::make_pair(rep_local_idx, 0);
+                    }
 
                     partition[current_vertex] = current_partition_idx + sp_proc_to_relative_partition.at(sp_pair);
                 }
@@ -531,4 +572,4 @@ class IsomorphicSubgraphScheduler {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp
index 07a9d241..9b7c6856 100644
--- a/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp
@@ -17,10 +17,10 @@ limitations under the License.
 #pragma once
 
 #include <functional>
+#include <queue>
 #include <stdexcept>
 #include <unordered_map>
 #include <unordered_set>
-#include <queue>
 #include <vector>
 
 #include "MerkleHashComputer.hpp"
@@ -40,11 +40,10 @@ namespace osp {
  */
 template <typename Graph_t, typename Constr_Graph_t>
 class IsomorphismMapper {
+    using VertexC = vertex_idx_t<Constr_Graph_t>;    // Local vertex ID
+    using VertexG = vertex_idx_t<Graph_t>;           // Global vertex ID
 
-    using VertexC = vertex_idx_t<Constr_Graph_t>; // Local vertex ID
-    using VertexG = vertex_idx_t<Graph_t>;       // Global vertex ID
-
-    const Constr_Graph_t& rep_graph;
+    const Constr_Graph_t &rep_graph;
     const MerkleHashComputer<Constr_Graph_t> rep_hasher;
 
   public:
@@ -52,9 +51,8 @@ class IsomorphismMapper {
      * @brief Constructs an IsomorphismMapper.
      * @param representative_graph The subgraph to use as the "pattern".
      */
-    IsomorphismMapper(const Constr_Graph_t& representative_graph)
-        : rep_graph(representative_graph), rep_hasher(representative_graph),
-          num_vertices(representative_graph.num_vertices()) {}
+    IsomorphismMapper(const Constr_Graph_t &representative_graph)
+        : rep_graph(representative_graph), rep_hasher(representative_graph), num_vertices(representative_graph.num_vertices()) {}
 
     virtual ~IsomorphismMapper() = default;
 
@@ -66,7 +64,7 @@ class IsomorphismMapper {
      * @param current_graph The new isomorphic subgraph.
      * @return A map from `current_local_vertex_id` -> `representative_local_vertex_id`.
      */
-    std::unordered_map<VertexC, VertexC> find_mapping(const Constr_Graph_t& current_graph) const {
+    std::unordered_map<VertexC, VertexC> find_mapping(const Constr_Graph_t &current_graph) const {
         if (current_graph.num_vertices() != num_vertices) {
             throw std::runtime_error("IsomorphismMapper: Graph sizes do not match.");
         }
@@ -76,14 +74,14 @@ class IsomorphismMapper {
 
         // 1. Compute hashes and orbits for the current graph.
         MerkleHashComputer<Constr_Graph_t> current_hasher(current_graph);
-        const auto& rep_orbits = rep_hasher.get_orbits();
-        const auto& current_orbits = current_hasher.get_orbits();
+        const auto &rep_orbits = rep_hasher.get_orbits();
+        const auto &current_orbits = current_hasher.get_orbits();
 
         // 2. Verify that the orbit structures are identical.
         if (rep_orbits.size() != current_orbits.size()) {
             throw std::runtime_error("IsomorphismMapper: Graphs have a different number of orbits.");
         }
-        for (const auto& [hash, rep_orbit_nodes] : rep_orbits) {
+        for (const auto &[hash, rep_orbit_nodes] : rep_orbits) {
             auto it = current_orbits.find(hash);
             if (it == current_orbits.end() || it->second.size() != rep_orbit_nodes.size()) {
                 throw std::runtime_error("IsomorphismMapper: Mismatched orbit structure between graphs.");
@@ -108,12 +106,14 @@ class IsomorphismMapper {
                 }
             }
 
-            if (rep_seed == std::numeric_limits<VertexC>::max()) break; // Should be unreachable if mapped_count < num_vertices
+            if (rep_seed == std::numeric_limits<VertexC>::max()) {
+                break;    // Should be unreachable if mapped_count < num_vertices
+            }
 
             // Find a corresponding unmapped vertex in the current graph's orbit.
-            const auto& candidates = current_orbits.at(rep_hasher.get_vertex_hash(rep_seed));
-            VertexC current_seed = std::numeric_limits<VertexC>::max(); // Should always be found
-            for (const auto& candidate : candidates) {
+            const auto &candidates = current_orbits.at(rep_hasher.get_vertex_hash(rep_seed));
+            VertexC current_seed = std::numeric_limits<VertexC>::max();    // Should always be found
+            for (const auto &candidate : candidates) {
                 if (!current_is_mapped[candidate]) {
                     current_seed = candidate;
                     break;
@@ -135,8 +135,26 @@ class IsomorphismMapper {
                 q.pop();
 
                 // Match neighbors (both parents and children)
-                match_neighbors(current_graph, current_hasher, u_rep, u_curr, map_current_to_rep, rep_is_mapped, current_is_mapped, mapped_count, q, true);
-                match_neighbors(current_graph, current_hasher, u_rep, u_curr, map_current_to_rep, rep_is_mapped, current_is_mapped, mapped_count, q, false);
+                match_neighbors(current_graph,
+                                current_hasher,
+                                u_rep,
+                                u_curr,
+                                map_current_to_rep,
+                                rep_is_mapped,
+                                current_is_mapped,
+                                mapped_count,
+                                q,
+                                true);
+                match_neighbors(current_graph,
+                                current_hasher,
+                                u_rep,
+                                u_curr,
+                                map_current_to_rep,
+                                rep_is_mapped,
+                                current_is_mapped,
+                                mapped_count,
+                                q,
+                                false);
             }
         }
 
@@ -147,26 +165,37 @@ class IsomorphismMapper {
         // 4. Return the inverted map.
         std::unordered_map<VertexC, VertexC> current_local_to_rep_local;
         current_local_to_rep_local.reserve(num_vertices);
-        for (VertexC i = 0; i < num_vertices; ++i) current_local_to_rep_local[map_current_to_rep[i]] = i;
+        for (VertexC i = 0; i < num_vertices; ++i) {
+            current_local_to_rep_local[map_current_to_rep[i]] = i;
+        }
         return current_local_to_rep_local;
     }
 
-private:
+  private:
     const size_t num_vertices;
 
-    void match_neighbors(const Constr_Graph_t& current_graph, const MerkleHashComputer<Constr_Graph_t>& current_hasher,
-                         VertexC u_rep, VertexC u_curr, std::vector<VertexC>& map_current_to_rep,
-                         std::vector<bool>& rep_is_mapped, std::vector<bool>& current_is_mapped,
-                         size_t& mapped_count, std::queue<std::pair<VertexC, VertexC>>& q, bool match_children) const {
-
-        const auto& rep_neighbors_range = match_children ? rep_graph.children(u_rep) : rep_graph.parents(u_rep);
-        const auto& curr_neighbors_range = match_children ? current_graph.children(u_curr) : current_graph.parents(u_curr);
-
-        for (const auto& v_rep : rep_neighbors_range) {
-            if (rep_is_mapped[v_rep]) continue;
+    void match_neighbors(const Constr_Graph_t &current_graph,
+                         const MerkleHashComputer<Constr_Graph_t> &current_hasher,
+                         VertexC u_rep,
+                         VertexC u_curr,
+                         std::vector<VertexC> &map_current_to_rep,
+                         std::vector<bool> &rep_is_mapped,
+                         std::vector<bool> &current_is_mapped,
+                         size_t &mapped_count,
+                         std::queue<std::pair<VertexC, VertexC>> &q,
+                         bool match_children) const {
+        const auto &rep_neighbors_range = match_children ? rep_graph.children(u_rep) : rep_graph.parents(u_rep);
+        const auto &curr_neighbors_range = match_children ? current_graph.children(u_curr) : current_graph.parents(u_curr);
+
+        for (const auto &v_rep : rep_neighbors_range) {
+            if (rep_is_mapped[v_rep]) {
+                continue;
+            }
 
-            for (const auto& v_curr : curr_neighbors_range) {
-                if (current_is_mapped[v_curr]) continue;
+            for (const auto &v_curr : curr_neighbors_range) {
+                if (current_is_mapped[v_curr]) {
+                    continue;
+                }
 
                 if (rep_hasher.get_vertex_hash(v_rep) == current_hasher.get_vertex_hash(v_curr)) {
                     map_current_to_rep[v_rep] = v_curr;
@@ -174,11 +203,11 @@ class IsomorphismMapper {
                     current_is_mapped[v_curr] = true;
                     mapped_count++;
                     q.push({v_rep, v_curr});
-                    break; // Found a match for v_rep, move to the next rep neighbor.
+                    break;    // Found a match for v_rep, move to the next rep neighbor.
                 }
             }
         }
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp
index 61a85e9e..f7bc7106 100644
--- a/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp
@@ -18,20 +18,20 @@ limitations under the License.
 
 #pragma once
 
-#include <vector>
-#include <unordered_map>
 #include <set>
-#include <stdexcept> 
+#include <stdexcept>
+#include <unordered_map>
+#include <vector>
+
+#include "osp/auxiliary/hash_util.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
+#include "osp/dag_divider/isomorphism_divider/HashComputer.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
-#include "osp/auxiliary/hash_util.hpp"
-#include "osp/dag_divider/isomorphism_divider/HashComputer.hpp" 
-
 
 namespace osp {
 
-/** 
+/**
  * @brief Computes Merkle hashes for graph vertices to identify isomorphic orbits.
  *
  * The Merkle hash of a vertex is computed recursively based on its own properties
@@ -44,91 +44,90 @@ namespace osp {
  * @tparam forward If true, hashes are computed based on parents (top-down).
  *                 If false, hashes are computed based on children (bottom-up).
  */
-template<typename Graph_t, typename node_hash_func_t = uniform_node_hash_func<vertex_idx_t<Graph_t>>, bool forward = true>
+template <typename Graph_t, typename node_hash_func_t = uniform_node_hash_func<vertex_idx_t<Graph_t>>, bool forward = true>
 class MerkleHashComputer : public HashComputer<vertex_idx_t<Graph_t>> {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
-    static_assert(std::is_invocable_r<std::size_t, node_hash_func_t, vertex_idx_t<Graph_t>>::value, "node_hash_func_t must be invocable with one vertex_idx_t<Graph_t> argument and return std::size_t.");
+    static_assert(std::is_invocable_r<std::size_t, node_hash_func_t, vertex_idx_t<Graph_t>>::value,
+                  "node_hash_func_t must be invocable with one vertex_idx_t<Graph_t> argument and return std::size_t.");
 
-    using VertexType = vertex_idx_t<Graph_t>; 
+    using VertexType = vertex_idx_t<Graph_t>;
 
     std::vector<std::size_t> vertex_hashes;
     std::unordered_map<std::size_t, std::vector<VertexType>> orbits;
 
     node_hash_func_t node_hash_func;
 
-    inline void compute_hashes_helper(const VertexType &v, std::vector<std::size_t> & parent_child_hashes) {
+    inline void compute_hashes_helper(const VertexType &v, std::vector<std::size_t> &parent_child_hashes) {
+        std::sort(parent_child_hashes.begin(), parent_child_hashes.end());
 
-            std::sort(parent_child_hashes.begin(),parent_child_hashes.end());
+        std::size_t hash = node_hash_func(v);
+        for (const auto &pc_hash : parent_child_hashes) {
+            hash_combine(hash, pc_hash);
+        }
 
-            std::size_t hash = node_hash_func(v);
-            for (const auto& pc_hash : parent_child_hashes) {
-                hash_combine(hash, pc_hash); 
-            }
-   
-            vertex_hashes[v] = hash;
+        vertex_hashes[v] = hash;
 
-            if (orbits.find(hash) == orbits.end()) {
-                orbits[hash] = {v};
-            } else {
-                orbits[hash].push_back(v);
-            }
+        if (orbits.find(hash) == orbits.end()) {
+            orbits[hash] = {v};
+        } else {
+            orbits[hash].push_back(v);
+        }
     }
 
-    template<typename RetT = void> 
-    std::enable_if_t<forward, RetT> compute_hashes(const Graph_t & graph) {
-
+    template <typename RetT = void>
+    std::enable_if_t<forward, RetT> compute_hashes(const Graph_t &graph) {
         vertex_hashes.resize(graph.num_vertices());
-        
+
         for (const VertexType &v : top_sort_view(graph)) {
             std::vector<std::size_t> parent_hashes;
-            for (const VertexType& parent : graph.parents(v)) {
+            for (const VertexType &parent : graph.parents(v)) {
                 parent_hashes.push_back(vertex_hashes[parent]);
             }
             compute_hashes_helper(v, parent_hashes);
         }
     }
 
-    template<typename RetT = void> 
-    std::enable_if_t<not forward, RetT> compute_hashes(const Graph_t & graph) {
-
+    template <typename RetT = void>
+    std::enable_if_t<not forward, RetT> compute_hashes(const Graph_t &graph) {
         vertex_hashes.resize(graph.num_vertices());
-        
+
         const auto top_sort = GetTopOrderReverse(graph);
         for (auto it = top_sort.cbegin(); it != top_sort.cend(); ++it) {
             const VertexType &v = *it;
             std::vector<std::size_t> child_hashes;
-            for (const VertexType& child : graph.children(v)) {
+            for (const VertexType &child : graph.children(v)) {
                 child_hashes.push_back(vertex_hashes[child]);
             }
-            compute_hashes_helper(v, child_hashes);  
-        }      
+            compute_hashes_helper(v, child_hashes);
+        }
     }
 
-  public:   
-
-    template<typename... Args>
-    MerkleHashComputer(const Graph_t &graph_, Args &&...args) : HashComputer<VertexType>(), node_hash_func(std::forward<Args>(args)...) {
-        compute_hashes(graph_);        
+  public:
+    template <typename... Args>
+    MerkleHashComputer(const Graph_t &graph_, Args &&...args)
+        : HashComputer<VertexType>(), node_hash_func(std::forward<Args>(args)...) {
+        compute_hashes(graph_);
     }
 
     virtual ~MerkleHashComputer() override = default;
 
     inline std::size_t get_vertex_hash(const VertexType &v) const override { return vertex_hashes[v]; }
+
     inline const std::vector<std::size_t> &get_vertex_hashes() const override { return vertex_hashes; }
+
     inline std::size_t num_orbits() const override { return orbits.size(); }
-    
-    inline const std::vector<VertexType> &get_orbit(const VertexType &v) const override { return this->get_orbit_from_hash(this->get_vertex_hash(v)); }
-    inline const std::unordered_map<std::size_t, std::vector<VertexType>> &get_orbits() const override { return orbits; }
 
-    inline const std::vector<VertexType>& get_orbit_from_hash(const std::size_t& hash) const override {
-        return orbits.at(hash);
+    inline const std::vector<VertexType> &get_orbit(const VertexType &v) const override {
+        return this->get_orbit_from_hash(this->get_vertex_hash(v));
     }
-};
 
+    inline const std::unordered_map<std::size_t, std::vector<VertexType>> &get_orbits() const override { return orbits; }
+
+    inline const std::vector<VertexType> &get_orbit_from_hash(const std::size_t &hash) const override { return orbits.at(hash); }
+};
 
-template<typename Graph_t, typename node_hash_func_t = uniform_node_hash_func<vertex_idx_t<Graph_t>>, bool Forward = true>
-bool are_isomorphic_by_merkle_hash(const Graph_t& g1, const Graph_t& g2) {
+template <typename Graph_t, typename node_hash_func_t = uniform_node_hash_func<vertex_idx_t<Graph_t>>, bool Forward = true>
+bool are_isomorphic_by_merkle_hash(const Graph_t &g1, const Graph_t &g2) {
     // Basic check: Different numbers of vertices or edges mean they can't be isomorphic.
     if (g1.num_vertices() != g2.num_vertices() || g1.num_edges() != g2.num_edges()) {
         return false;
@@ -137,48 +136,44 @@ bool are_isomorphic_by_merkle_hash(const Graph_t& g1, const Graph_t& g2) {
     // --- Compute Hashes in the Specified Direction ---
     MerkleHashComputer<Graph_t, node_hash_func_t, Forward> hash1(g1);
     MerkleHashComputer<Graph_t, node_hash_func_t, Forward> hash2(g2);
-    
-    const auto& orbits1 = hash1.get_orbits();
-    const auto& orbits2 = hash2.get_orbits();
+
+    const auto &orbits1 = hash1.get_orbits();
+    const auto &orbits2 = hash2.get_orbits();
 
     if (orbits1.size() != orbits2.size()) {
         return false;
     }
 
-    for (const auto& pair : orbits1) {
+    for (const auto &pair : orbits1) {
         const std::size_t hash = pair.first;
-        const auto& orbit_vec = pair.second;
+        const auto &orbit_vec = pair.second;
 
         auto it = orbits2.find(hash);
         if (it == orbits2.end() || it->second.size() != orbit_vec.size()) {
             return false;
         }
     }
-    
+
     return true;
 }
 
-
-template<typename Graph_t>
+template <typename Graph_t>
 struct bwd_merkle_node_hash_func {
     MerkleHashComputer<Graph_t, uniform_node_hash_func<vertex_idx_t<Graph_t>>, false> bw_merkle_hash;
-    
-    bwd_merkle_node_hash_func(const Graph_t & graph) : bw_merkle_hash(graph) { }
 
-    std::size_t operator()(const vertex_idx_t<Graph_t> & v) const {
-        return bw_merkle_hash.get_vertex_hash(v);
-    }
+    bwd_merkle_node_hash_func(const Graph_t &graph) : bw_merkle_hash(graph) {}
+
+    std::size_t operator()(const vertex_idx_t<Graph_t> &v) const { return bw_merkle_hash.get_vertex_hash(v); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct precom_bwd_merkle_node_hash_func {
     MerkleHashComputer<Graph_t, vector_node_hash_func<vertex_idx_t<Graph_t>>, false> bw_merkle_hash;
-    
-    precom_bwd_merkle_node_hash_func(const Graph_t & graph, const std::vector<std::size_t>& node_hashes) : bw_merkle_hash(graph, node_hashes) { }
 
-    std::size_t operator()(const vertex_idx_t<Graph_t> & v) const {
-        return bw_merkle_hash.get_vertex_hash(v);
-    }
+    precom_bwd_merkle_node_hash_func(const Graph_t &graph, const std::vector<std::size_t> &node_hashes)
+        : bw_merkle_hash(graph, node_hashes) {}
+
+    std::size_t operator()(const vertex_idx_t<Graph_t> &v) const { return bw_merkle_hash.get_vertex_hash(v); }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp b/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp
index ddb99122..03bff72d 100644
--- a/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp
@@ -18,6 +18,10 @@ limitations under the License.
 
 #pragma once
 
+#include <algorithm>
+#include <map>
+#include <numeric>
+#include <unordered_set>
 #include <vector>
 
 #include "osp/coarser/coarser_util.hpp"
@@ -27,10 +31,6 @@ limitations under the License.
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/graph_algorithms/subgraph_algorithms.hpp"
 #include "osp/graph_algorithms/transitive_reduction.hpp"
-#include <numeric>
-#include <unordered_set>
-#include <algorithm>
-#include <map>
 
 namespace osp {
 
@@ -42,10 +42,9 @@ namespace osp {
  * It then partitions the DAG by grouping all nodes with the same hash into an "orbit".
  * A coarse graph is constructed where each node represents one such orbit.
  */
-template<typename Graph_t, typename Constr_Graph_t>
+template <typename Graph_t, typename Constr_Graph_t>
 class OrbitGraphProcessor {
   public:
-
     /**
      * @brief Heuristics for selecting which symmetry levels to test during coarsening.
      */
@@ -66,8 +65,7 @@ class OrbitGraphProcessor {
 
     static_assert(is_computational_dag_v<Graph_t>, "Graph must be a computational DAG");
     static_assert(is_computational_dag_v<Constr_Graph_t>, "Constr_Graph_t must be a computational DAG");
-    static_assert(is_constructable_cdag_v<Constr_Graph_t>,
-                  "Constr_Graph_t must satisfy the constructable_cdag_vertex concept");
+    static_assert(is_constructable_cdag_v<Constr_Graph_t>, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept");
     static_assert(std::is_same_v<vertex_idx_t<Graph_t>, vertex_idx_t<Constr_Graph_t>>,
                   "Graph_t and Constr_Graph_t must have the same vertex_idx types");
 
@@ -79,7 +77,7 @@ class OrbitGraphProcessor {
     struct Group {
         // Each vector of vertices represents one of the isomorphic subgraphs in this group.
         std::vector<std::vector<VertexType>> subgraphs;
-        
+
         inline size_t size() const { return subgraphs.size(); }
     };
 
@@ -92,9 +90,9 @@ class OrbitGraphProcessor {
     Constr_Graph_t final_coarse_graph_;
     std::vector<VertexType> final_contraction_map_;
     std::vector<Group> final_groups_;
-    size_t current_symmetry;    
+    size_t current_symmetry;
 
-    size_t min_symmetry_ = 2; // min symmetry threshold    
+    size_t min_symmetry_ = 2;    // min symmetry threshold
     v_workw_t<Constr_Graph_t> work_threshold_ = 0;
     v_workw_t<Constr_Graph_t> critical_path_threshold_ = 0;
     bool merge_different_node_types_ = true;
@@ -102,12 +100,12 @@ class OrbitGraphProcessor {
 
     SymmetryLevelHeuristic symmetry_level_heuristic_ = SymmetryLevelHeuristic::NATURAL_BREAKS;
     std::vector<double> work_percentiles_ = {0.50, 0.75};
-    double natural_breaks_count_percentage_ = 0.2; 
+    double natural_breaks_count_percentage_ = 0.2;
 
     bool use_adaptive_symmetry_threshold_ = true;
 
     struct PairHasher {
-        template<class T1, class T2>
+        template <class T1, class T2>
         std::size_t operator()(const std::pair<T1, T2> &p) const {
             auto h1 = std::hash<T1>{}(p.first);
             auto h2 = std::hash<T2>{}(p.second);
@@ -121,8 +119,9 @@ class OrbitGraphProcessor {
     /**
      * @brief Simulates the merge of node v into u and returns the resulting temporary graph.
      */
-    std::pair<Constr_Graph_t, std::vector<VertexType>>
-    simulate_merge(VertexType u, VertexType v, const Constr_Graph_t &current_coarse_graph) const {
+    std::pair<Constr_Graph_t, std::vector<VertexType>> simulate_merge(VertexType u,
+                                                                      VertexType v,
+                                                                      const Constr_Graph_t &current_coarse_graph) const {
         std::vector<VertexType> temp_contraction_map(current_coarse_graph.num_vertices());
         VertexType new_idx = 0;
         for (VertexType i = 0; i < static_cast<VertexType>(temp_contraction_map.size()); ++i) {
@@ -141,11 +140,14 @@ class OrbitGraphProcessor {
     /**
      * @brief Commits a merge operation by updating the graph state.
      */
-    void commit_merge(VertexType u, VertexType v, Constr_Graph_t &&next_coarse_graph,
+    void commit_merge(VertexType u,
+                      VertexType v,
+                      Constr_Graph_t &&next_coarse_graph,
                       const std::vector<VertexType> &group_remap,
-                      std::vector<std::vector<VertexType>> &&new_subgraphs, Constr_Graph_t &current_coarse_graph,
-                      std::vector<Group> &current_groups, std::vector<VertexType> &current_contraction_map) {
-
+                      std::vector<std::vector<VertexType>> &&new_subgraphs,
+                      Constr_Graph_t &current_coarse_graph,
+                      std::vector<Group> &current_groups,
+                      std::vector<VertexType> &current_contraction_map) {
         current_coarse_graph = std::move(next_coarse_graph);
 
         // Update caches for new vertex indices
@@ -158,7 +160,7 @@ class OrbitGraphProcessor {
 
             if (old_u != v && old_v != v && new_u != new_v) {
                 next_non_viable_edges.insert({new_u, new_v});
-            }           
+            }
         }
         non_viable_edges_cache_ = std::move(next_non_viable_edges);
 
@@ -172,7 +174,6 @@ class OrbitGraphProcessor {
             if (old_u != v && old_v != v && new_u != new_v) {
                 next_non_viable_crit_path_edges.insert({new_u, new_v});
             }
-            
         }
         non_viable_crit_path_edges_cache_ = std::move(next_non_viable_crit_path_edges);
 
@@ -195,30 +196,28 @@ class OrbitGraphProcessor {
     /**
      * @brief Merges small orbits based on work threshold (final cleanup pass).
      */
-    void merge_small_orbits(const Graph_t &original_dag, 
-        Constr_Graph_t& current_coarse_graph, 
-        std::vector<Group>& current_groups, 
-        std::vector<VertexType>& current_contraction_map, 
-        const v_workw_t<Constr_Graph_t> work_threshold, 
-        const v_workw_t<Constr_Graph_t> path_threshold = 0) {
-
+    void merge_small_orbits(const Graph_t &original_dag,
+                            Constr_Graph_t &current_coarse_graph,
+                            std::vector<Group> &current_groups,
+                            std::vector<VertexType> &current_contraction_map,
+                            const v_workw_t<Constr_Graph_t> work_threshold,
+                            const v_workw_t<Constr_Graph_t> path_threshold = 0) {
         bool changed = true;
         while (changed) {
-            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexPoset =
-                get_top_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
-            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexBotPoset =
-                get_bottom_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
+            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexPoset
+                = get_top_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
+            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexBotPoset
+                = get_bottom_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
 
             changed = false;
             for (const auto u : current_coarse_graph.vertices()) {
-                for (const auto v : current_coarse_graph.children(u)) {                  
-
+                for (const auto v : current_coarse_graph.children(u)) {
                     if constexpr (has_typed_vertices_v<Constr_Graph_t>) {
                         if (not merge_different_node_types_) {
                             if (current_coarse_graph.vertex_type(u) != current_coarse_graph.vertex_type(v)) {
                                 if constexpr (verbose) {
                                     std::cout << "  - Merge of " << u << " and " << v << " not viable (different node types)\n";
-                                }                               
+                                }
                                 continue;
                             }
                         }
@@ -233,8 +232,10 @@ class OrbitGraphProcessor {
 
                     const v_workw_t<Constr_Graph_t> u_work_weight = current_coarse_graph.vertex_work_weight(u);
                     const v_workw_t<Constr_Graph_t> v_work_weight = current_coarse_graph.vertex_work_weight(v);
-                    const v_workw_t<Constr_Graph_t> v_threshold = work_threshold * static_cast<v_workw_t<Constr_Graph_t>>(current_groups[v].size());
-                    const v_workw_t<Constr_Graph_t> u_threshold = work_threshold * static_cast<v_workw_t<Constr_Graph_t>>(current_groups[u].size());
+                    const v_workw_t<Constr_Graph_t> v_threshold
+                        = work_threshold * static_cast<v_workw_t<Constr_Graph_t>>(current_groups[v].size());
+                    const v_workw_t<Constr_Graph_t> u_threshold
+                        = work_threshold * static_cast<v_workw_t<Constr_Graph_t>>(current_groups[u].size());
 
                     if (u_work_weight > u_threshold && v_work_weight > v_threshold) {
                         if constexpr (verbose) {
@@ -246,15 +247,15 @@ class OrbitGraphProcessor {
                     if ((vertexPoset[u] + 1 != vertexPoset[v]) && (vertexBotPoset[u] != 1 + vertexBotPoset[v])) {
                         if constexpr (verbose) {
                             std::cout << "  - Merge of " << u << " and " << v
-                                      << " not viable poset. poste v: " << vertexBotPoset[v]
-                                      << " poste u: " << vertexBotPoset[u] << "\n";
+                                      << " not viable poset. poste v: " << vertexBotPoset[v] << " poste u: " << vertexBotPoset[u]
+                                      << "\n";
                         }
                         continue;
                     }
 
                     std::vector<std::vector<VertexType>> new_subgraphs;
                     const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs);
-                    
+
                     if (!merge_is_valid) {
                         if constexpr (verbose) {
                             std::cout << "  - Merge of " << u << " and " << v << " and " << v
@@ -266,10 +267,14 @@ class OrbitGraphProcessor {
 
                     auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph);
 
-                    if (critical_path_weight(temp_coarse_graph) > (path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size()) + critical_path_weight(current_coarse_graph))) {
+                    if (critical_path_weight(temp_coarse_graph)
+                        > (path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size())
+                           + critical_path_weight(current_coarse_graph))) {
                         if constexpr (verbose) {
-                            std::cout << "  - Merge of " << u << " and " << v << " increases critical path. Old cirtical path: " << critical_path_weight(current_coarse_graph)
-                                      << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + " << path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size()) << "\n";
+                            std::cout << "  - Merge of " << u << " and " << v << " increases critical path. Old cirtical path: "
+                                      << critical_path_weight(current_coarse_graph)
+                                      << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + "
+                                      << path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size()) << "\n";
                         }
                         non_viable_crit_path_edges_cache_.insert({u, v});
                         continue;
@@ -280,8 +285,14 @@ class OrbitGraphProcessor {
                                   << temp_coarse_graph.num_vertices() << " nodes.\n";
                     }
 
-                    commit_merge(u, v, std::move(temp_coarse_graph), temp_contraction_map, std::move(new_subgraphs),
-                                 current_coarse_graph, current_groups, current_contraction_map);
+                    commit_merge(u,
+                                 v,
+                                 std::move(temp_coarse_graph),
+                                 temp_contraction_map,
+                                 std::move(new_subgraphs),
+                                 current_coarse_graph,
+                                 current_groups,
+                                 current_contraction_map);
 
                     changed = true;
                     break;
@@ -296,14 +307,19 @@ class OrbitGraphProcessor {
     /**
      * @brief Deprecated non-adaptive merge function.
      */
-    void contract_edges(const Graph_t &original_dag, Constr_Graph_t& current_coarse_graph, std::vector<Group>& current_groups, std::vector<VertexType>& current_contraction_map, const bool merge_symmetry_narrowing, const bool merge_different_node_types, const v_workw_t<Constr_Graph_t> path_threshold = 0) {
-
+    void contract_edges(const Graph_t &original_dag,
+                        Constr_Graph_t &current_coarse_graph,
+                        std::vector<Group> &current_groups,
+                        std::vector<VertexType> &current_contraction_map,
+                        const bool merge_symmetry_narrowing,
+                        const bool merge_different_node_types,
+                        const v_workw_t<Constr_Graph_t> path_threshold = 0) {
         bool changed = true;
         while (changed) {
-            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexPoset =
-                get_top_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
-            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexBotPoset =
-                get_bottom_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
+            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexPoset
+                = get_top_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
+            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexBotPoset
+                = get_bottom_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
 
             changed = false;
             for (const auto &edge : edges(current_coarse_graph)) {
@@ -329,8 +345,8 @@ class OrbitGraphProcessor {
                 const std::size_t v_size = current_groups[v].size();
                 const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs);
                 const std::size_t new_size = new_subgraphs.size();
-                
-                const bool merge_viable = (new_size >= current_symmetry); 
+
+                const bool merge_viable = (new_size >= current_symmetry);
                 const bool both_below_symmetry_threshold = (u_size < current_symmetry) && (v_size < current_symmetry);
 
                 if (!merge_is_valid) {
@@ -349,38 +365,44 @@ class OrbitGraphProcessor {
 
                 auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph);
 
-                if (critical_path_weight(temp_coarse_graph) > (path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size()) + critical_path_weight(current_coarse_graph))) {
+                if (critical_path_weight(temp_coarse_graph)
+                    > (path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size())
+                       + critical_path_weight(current_coarse_graph))) {
                     non_viable_crit_path_edges_cache_.insert({u, v});
                     continue;
                 }
 
-                commit_merge(u, v, std::move(temp_coarse_graph), temp_contraction_map, std::move(new_subgraphs), 
-                             current_coarse_graph, current_groups, current_contraction_map);
+                commit_merge(u,
+                             v,
+                             std::move(temp_coarse_graph),
+                             temp_contraction_map,
+                             std::move(new_subgraphs),
+                             current_coarse_graph,
+                             current_groups,
+                             current_contraction_map);
                 changed = true;
                 break;
             }
         }
     }
 
-
     /**
      * @brief Core adaptive merging function.
      */
-    void contract_edges_adpative_sym(const Graph_t &original_dag, 
-        Constr_Graph_t& current_coarse_graph, 
-        std::vector<Group>& current_groups, 
-        std::vector<VertexType>& current_contraction_map, 
-        const bool merge_different_node_types, 
-        const bool merge_below_threshold,
-        const std::vector<v_workw_t<Graph_t>>& lock_threshold_per_type,
-        const v_workw_t<Constr_Graph_t> path_threshold = 0) {
-
+    void contract_edges_adpative_sym(const Graph_t &original_dag,
+                                     Constr_Graph_t &current_coarse_graph,
+                                     std::vector<Group> &current_groups,
+                                     std::vector<VertexType> &current_contraction_map,
+                                     const bool merge_different_node_types,
+                                     const bool merge_below_threshold,
+                                     const std::vector<v_workw_t<Graph_t>> &lock_threshold_per_type,
+                                     const v_workw_t<Constr_Graph_t> path_threshold = 0) {
         bool changed = true;
         while (changed) {
-            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexPoset =
-                get_top_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
-            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexBotPoset =
-                get_bottom_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
+            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexPoset
+                = get_top_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
+            const std::vector<vertex_idx_t<Constr_Graph_t>> vertexBotPoset
+                = get_bottom_node_distance<Constr_Graph_t, vertex_idx_t<Constr_Graph_t>>(current_coarse_graph);
 
             changed = false;
             for (const auto &edge : edges(current_coarse_graph)) {
@@ -399,7 +421,7 @@ class OrbitGraphProcessor {
                         if (current_coarse_graph.vertex_type(u) != current_coarse_graph.vertex_type(v)) {
                             if constexpr (verbose) {
                                 std::cout << "  - Merge of " << u << " and " << v << " not viable (different node types)\n";
-                            }                           
+                            }
                             continue;
                         }
                     }
@@ -407,8 +429,7 @@ class OrbitGraphProcessor {
 
                 if ((vertexPoset[u] + 1 != vertexPoset[v]) && (vertexBotPoset[u] != 1 + vertexBotPoset[v])) {
                     if constexpr (verbose) {
-                        std::cout << "  - Merge of " << u << " and " << v
-                                  << " not viable poset. poste v: " << vertexBotPoset[v]
+                        std::cout << "  - Merge of " << u << " and " << v << " not viable poset. poste v: " << vertexBotPoset[v]
                                   << " poste u: " << vertexBotPoset[u] << "\n";
                     }
                     continue;
@@ -417,10 +438,10 @@ class OrbitGraphProcessor {
                 std::vector<std::vector<VertexType>> new_subgraphs;
                 const std::size_t u_size = current_groups[u].size();
                 const std::size_t v_size = current_groups[v].size();
-                
+
                 const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs);
                 const std::size_t new_size = new_subgraphs.size();
-                
+
                 if (!merge_is_valid) {
                     if constexpr (verbose) {
                         std::cout << "  - Merge of " << u << " and " << v << " and " << v
@@ -431,14 +452,15 @@ class OrbitGraphProcessor {
                 }
 
                 const bool merge_viable = (new_size >= current_symmetry);
-                const bool both_below_minimal_threshold = merge_below_threshold && (u_size < min_symmetry_) && (v_size < min_symmetry_);
-                
+                const bool both_below_minimal_threshold = merge_below_threshold && (u_size < min_symmetry_)
+                                                          && (v_size < min_symmetry_);
+
                 if (!merge_viable && !both_below_minimal_threshold) {
                     if constexpr (verbose) {
                         std::cout << "  - Merge of " << u << " and " << v << " not viable (Symmetry Threshold)\n";
                         std::cout << "    - u_sym: " << u_size << ", v_sym: " << v_size << " -> new_sym: " << new_size
-                                  << " (current_threshold: " << current_symmetry 
-                                  << ", global_min_threshold: " << min_symmetry_ << ")\n";
+                                  << " (current_threshold: " << current_symmetry << ", global_min_threshold: " << min_symmetry_
+                                  << ")\n";
                     }
                     non_viable_edges_cache_.insert({u, v});
                     continue;
@@ -446,36 +468,35 @@ class OrbitGraphProcessor {
 
                 v_type_t<Graph_t> u_type = 0;
                 v_type_t<Graph_t> v_type = 0;
-                if (not merge_different_node_types && has_typed_vertices_v<Graph_t> ) {
+                if (not merge_different_node_types && has_typed_vertices_v<Graph_t>) {
                     u_type = current_coarse_graph.vertex_type(u);
                     v_type = current_coarse_graph.vertex_type(v);
                 }
 
-                const bool u_is_significant = (u_size >= min_symmetry_) && 
-                    (current_coarse_graph.vertex_work_weight(u) > lock_threshold_per_type[u_type]);
-                const bool v_is_significant = (v_size >= min_symmetry_) && 
-                    (current_coarse_graph.vertex_work_weight(v) > lock_threshold_per_type[v_type]);
+                const bool u_is_significant = (u_size >= min_symmetry_)
+                                              && (current_coarse_graph.vertex_work_weight(u) > lock_threshold_per_type[u_type]);
+                const bool v_is_significant = (v_size >= min_symmetry_)
+                                              && (current_coarse_graph.vertex_work_weight(v) > lock_threshold_per_type[v_type]);
 
-                if (u_is_significant && v_is_significant)
-                {
+                if (u_is_significant && v_is_significant) {
                     // Both are significant ---
                     if (new_size < std::min(u_size, v_size)) {
                         if constexpr (verbose) {
-                            std::cout << "  - Merge of " << u << " and " << v << " not viable (Symmetry Narrowing below min of two significant nodes)\n";
+                            std::cout << "  - Merge of " << u << " and " << v
+                                      << " not viable (Symmetry Narrowing below min of two significant nodes)\n";
                             std::cout << "    - u_sym: " << u_size << ", v_sym: " << v_size << " -> new_sym: " << new_size << "\n";
                         }
                         non_viable_edges_cache_.insert({u, v});
                         continue;
                     }
-                }
-                else if (u_is_significant || v_is_significant)
-                {
+                } else if (u_is_significant || v_is_significant) {
                     // Exactly one is significant ---
                     const std::size_t significant_node_size = u_is_significant ? u_size : v_size;
-                    
+
                     if (new_size < significant_node_size) {
                         if constexpr (verbose) {
-                            std::cout << "  - Merge of " << u << " and " << v << " not viable (Symmetry Narrowing of a single significant node)\n";
+                            std::cout << "  - Merge of " << u << " and " << v
+                                      << " not viable (Symmetry Narrowing of a single significant node)\n";
                             std::cout << "    - u_sym: " << u_size << " (sig: " << u_is_significant << ")"
                                       << ", v_sym: " << v_size << " (sig: " << v_is_significant << ")"
                                       << " -> new_sym: " << new_size << "\n";
@@ -484,14 +505,18 @@ class OrbitGraphProcessor {
                         continue;
                     }
                 }
-                
+
                 // Critical Path Check
                 auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph);
 
-                if (critical_path_weight(temp_coarse_graph) > (path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size()) + critical_path_weight(current_coarse_graph))) {
+                if (critical_path_weight(temp_coarse_graph)
+                    > (path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size())
+                       + critical_path_weight(current_coarse_graph))) {
                     if constexpr (verbose) {
-                        std::cout << "  - Merge of " << u << " and " << v << " increases critical path. Old cirtical path: " << critical_path_weight(current_coarse_graph)
-                                  << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + " << path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size()) << "\n";
+                        std::cout << "  - Merge of " << u << " and " << v
+                                  << " increases critical path. Old cirtical path: " << critical_path_weight(current_coarse_graph)
+                                  << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + "
+                                  << path_threshold * static_cast<v_workw_t<Constr_Graph_t>>(new_subgraphs.size()) << "\n";
                     }
                     non_viable_crit_path_edges_cache_.insert({u, v});
                     continue;
@@ -503,8 +528,14 @@ class OrbitGraphProcessor {
                               << temp_coarse_graph.num_vertices() << " nodes.\n";
                 }
 
-                commit_merge(u, v, std::move(temp_coarse_graph), temp_contraction_map, std::move(new_subgraphs), 
-                             current_coarse_graph, current_groups, current_contraction_map);
+                commit_merge(u,
+                             v,
+                             std::move(temp_coarse_graph),
+                             temp_contraction_map,
+                             std::move(new_subgraphs),
+                             current_coarse_graph,
+                             current_groups,
+                             current_contraction_map);
 
                 changed = true;
                 break;
@@ -512,31 +543,34 @@ class OrbitGraphProcessor {
         }
     }
 
-
   public:
-
     explicit OrbitGraphProcessor() {}
 
     void setMergeDifferentNodeTypes(bool flag) { merge_different_node_types_ = flag; }
+
     void set_work_threshold(v_workw_t<Constr_Graph_t> work_threshold) { work_threshold_ = work_threshold; }
-    void setCriticalPathThreshold(v_workw_t<Constr_Graph_t> critical_path_threshold) { critical_path_threshold_ = critical_path_threshold; }
+
+    void setCriticalPathThreshold(v_workw_t<Constr_Graph_t> critical_path_threshold) {
+        critical_path_threshold_ = critical_path_threshold;
+    }
+
     void setLockRatio(double lock_ratio) { lock_orbit_ratio = lock_ratio; }
-    
+
     void setSymmetryLevelHeuristic(SymmetryLevelHeuristic heuristic) { symmetry_level_heuristic_ = heuristic; }
-    void setWorkPercentiles(const std::vector<double>& percentiles) {
+
+    void setWorkPercentiles(const std::vector<double> &percentiles) {
         work_percentiles_ = percentiles;
         std::sort(work_percentiles_.begin(), work_percentiles_.end());
     }
 
-    void setUseStaticSymmetryLevel(size_t static_symmetry_level) { 
+    void setUseStaticSymmetryLevel(size_t static_symmetry_level) {
         symmetry_level_heuristic_ = SymmetryLevelHeuristic::NATURAL_BREAKS;
-        use_adaptive_symmetry_threshold_ = false; 
-        current_symmetry = static_symmetry_level; 
+        use_adaptive_symmetry_threshold_ = false;
+        current_symmetry = static_symmetry_level;
     }
 
     void setNaturalBreaksCountPercentage(double percentage) { natural_breaks_count_percentage_ = percentage; }
 
-
     /**
      * @brief Discovers isomorphic groups (orbits) and constructs a coarse graph.
      */
@@ -565,17 +599,19 @@ class OrbitGraphProcessor {
             }
             coarse_node_idx++;
         }
-    
+
         std::vector<v_workw_t<Graph_t>> work_per_vertex_type;
         work_per_vertex_type.resize(merge_different_node_types_ ? 1U : dag.num_vertex_types(), 0);
-        
+
         std::map<size_t, size_t> orbit_size_counts;
         std::map<size_t, v_workw_t<Graph_t>> work_per_orbit_size;
         v_workw_t<Graph_t> total_work = 0;
         for (const auto &[hash, vertices] : orbits) {
             const size_t orbit_size = vertices.size();
-            
-            if (orbit_size == 1U) continue; // exclude single node orbits from total work
+
+            if (orbit_size == 1U) {
+                continue;    // exclude single node orbits from total work
+            }
 
             orbit_size_counts[orbit_size]++;
 
@@ -583,7 +619,7 @@ class OrbitGraphProcessor {
             for (const auto v : vertices) {
                 orbit_work += dag.vertex_work_weight(v);
             }
-            
+
             if (not merge_different_node_types_ && has_typed_vertices_v<Graph_t>) {
                 work_per_vertex_type[dag.vertex_type(vertices[0])] += orbit_work;
             } else {
@@ -591,46 +627,52 @@ class OrbitGraphProcessor {
             }
 
             work_per_orbit_size[orbit_size] += orbit_work;
-            total_work += orbit_work;            
+            total_work += orbit_work;
         }
 
         std::vector<v_workw_t<Graph_t>> lock_threshold_per_type(work_per_vertex_type.size());
         for (size_t i = 0; i < work_per_vertex_type.size(); ++i) {
             lock_threshold_per_type[i] = static_cast<v_workw_t<Graph_t>>(lock_orbit_ratio * work_per_vertex_type[i]);
         }
-        
+
         std::vector<double> rel_acc_work_per_orbit_size;
-        std::vector<size_t> symmetry_levels_to_test = compute_symmetry_levels(rel_acc_work_per_orbit_size, work_per_orbit_size, total_work, orbit_size_counts);
+        std::vector<size_t> symmetry_levels_to_test
+            = compute_symmetry_levels(rel_acc_work_per_orbit_size, work_per_orbit_size, total_work, orbit_size_counts);
 
         if constexpr (verbose) {
             std::cout << "\n--- Orbit Analysis ---\n";
-            for (auto const& [size, count] : orbit_size_counts) {
-                if (total_work > 0)
-                    std::cout << "  - Orbits of size " << size << ": " << count << " groups, weight: " << 100.0 * static_cast<double>(work_per_orbit_size[size]) / static_cast<double>(total_work) << "%\n";            
-                else
+            for (auto const &[size, count] : orbit_size_counts) {
+                if (total_work > 0) {
+                    std::cout << "  - Orbits of size " << size << ": " << count << " groups, weight: "
+                              << 100.0 * static_cast<double>(work_per_orbit_size[size]) / static_cast<double>(total_work) << "%\n";
+                } else {
                     std::cout << "  - Orbits of size " << size << ": " << count << " groups, weight: 0.0%\n";
+                }
             }
             std::cout << "  Cumulative work distribution by orbit size (largest to smallest):\n";
             size_t i = 0;
-            for (auto it = orbit_size_counts.rbegin(); it != orbit_size_counts.rend() && i < rel_acc_work_per_orbit_size.size(); ++it, ++i) {
-                std::cout << "    - Orbits with size >= " << it->first << ": "
-                          << std::fixed << std::setprecision(2) << rel_acc_work_per_orbit_size[i] * 100 << "%\n";
+            for (auto it = orbit_size_counts.rbegin(); it != orbit_size_counts.rend() && i < rel_acc_work_per_orbit_size.size();
+                 ++it, ++i) {
+                std::cout << "    - Orbits with size >= " << it->first << ": " << std::fixed << std::setprecision(2)
+                          << rel_acc_work_per_orbit_size[i] * 100 << "%\n";
             }
             std::cout << "  Work distribution by vertex type:\n";
             for (size_t j = 0; j < work_per_vertex_type.size(); ++j) {
-                if (total_work > 0)
-                    std::cout << "    - Vertex type " << j << ": " << 100.0 * static_cast<double>(work_per_vertex_type[j]) / static_cast<double>(total_work) << "%\n";
-                else
-                     std::cout << "    - Vertex type " << j << ": 0.0%\n";
+                if (total_work > 0) {
+                    std::cout << "    - Vertex type " << j << ": "
+                              << 100.0 * static_cast<double>(work_per_vertex_type[j]) / static_cast<double>(total_work) << "%\n";
+                } else {
+                    std::cout << "    - Vertex type " << j << ": 0.0%\n";
+                }
             }
-            
+
             std::cout << "--------------------------------\n";
             std::cout << " Symmetry levels to test: " << "\n";
             for (const auto level : symmetry_levels_to_test) {
                 std::cout << "  - " << level << "\n";
             }
-            std::cout << "--------------------------------\n";             
-        }       
+            std::cout << "--------------------------------\n";
+        }
 
         coarser_util::construct_coarse_dag(dag, coarse_graph_, contraction_map_);
 
@@ -638,17 +680,20 @@ class OrbitGraphProcessor {
             perform_coarsening_adaptive_symmetry(dag, coarse_graph_, lock_threshold_per_type, symmetry_levels_to_test);
         } else {
             size_t total_size_count = 0U;
-            for (const auto& [size, count] : orbit_size_counts) {
+            for (const auto &[size, count] : orbit_size_counts) {
                 total_size_count += count;
-            }  
+            }
+
+            for (const auto &[size, count] : orbit_size_counts) {
+                if (size == 1U || size > current_symmetry) {
+                    continue;
+                }
 
-            for (const auto& [size, count] : orbit_size_counts) {
-                if (size == 1U || size > current_symmetry) continue;
-                
                 if (count > total_size_count / 2) {
-                     if constexpr (verbose) {
-                        std::cout << "Setting current_symmetry to " << size << " because " << count << " orbits of size " << size << " are more than half of the total number of orbits.\n";
-                     }
+                    if constexpr (verbose) {
+                        std::cout << "Setting current_symmetry to " << size << " because " << count << " orbits of size " << size
+                                  << " are more than half of the total number of orbits.\n";
+                    }
                     current_symmetry = size;
                 }
             }
@@ -658,31 +703,34 @@ class OrbitGraphProcessor {
     }
 
   private:
-
-    std::vector<size_t> compute_symmetry_levels(std::vector<double> & rel_acc_work_per_orbit_size, const std::map<size_t, v_workw_t<Graph_t>> work_per_orbit_size, const v_workw_t<Graph_t> total_work, const std::map<size_t, size_t> orbit_size_counts) {
-
+    std::vector<size_t> compute_symmetry_levels(std::vector<double> &rel_acc_work_per_orbit_size,
+                                                const std::map<size_t, v_workw_t<Graph_t>> work_per_orbit_size,
+                                                const v_workw_t<Graph_t> total_work,
+                                                const std::map<size_t, size_t> orbit_size_counts) {
         std::vector<size_t> symmetry_levels_to_test;
         min_symmetry_ = 2;
 
         switch (symmetry_level_heuristic_) {
-            case SymmetryLevelHeuristic::PERCENTILE_BASED:
-            {
-                if constexpr (verbose) { std::cout << "Using PERCENTILE_BASED heuristic for symmetry levels.\n"; }
+            case SymmetryLevelHeuristic::PERCENTILE_BASED: {
+                if constexpr (verbose) {
+                    std::cout << "Using PERCENTILE_BASED heuristic for symmetry levels.\n";
+                }
                 size_t percentile_idx = 0;
                 v_workw_t<Graph_t> cumulative_work = 0;
-                for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) 
-                {
+                for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) {
                     cumulative_work += it->second;
-                    if (total_work == 0) continue; // Avoid division by zero
+                    if (total_work == 0) {
+                        continue;    // Avoid division by zero
+                    }
                     double current_work_ratio = static_cast<double>(cumulative_work) / static_cast<double>(total_work);
-                    rel_acc_work_per_orbit_size.push_back(current_work_ratio); // For printing
+                    rel_acc_work_per_orbit_size.push_back(current_work_ratio);    // For printing
 
                     if (percentile_idx < work_percentiles_.size() && current_work_ratio >= work_percentiles_[percentile_idx]) {
                         if (it->first > min_symmetry_) {
                             symmetry_levels_to_test.push_back(it->first);
                         }
-                        while (percentile_idx < work_percentiles_.size() &&
-                               current_work_ratio >= work_percentiles_[percentile_idx]) {
+                        while (percentile_idx < work_percentiles_.size()
+                               && current_work_ratio >= work_percentiles_[percentile_idx]) {
                             percentile_idx++;
                         }
                     }
@@ -690,36 +738,43 @@ class OrbitGraphProcessor {
                 break;
             }
 
-            case SymmetryLevelHeuristic::NATURAL_BREAKS:
-            {
-                if constexpr (verbose) { std::cout << "Using NATURAL_BREAKS heuristic for symmetry levels.\n"; }
+            case SymmetryLevelHeuristic::NATURAL_BREAKS: {
+                if constexpr (verbose) {
+                    std::cout << "Using NATURAL_BREAKS heuristic for symmetry levels.\n";
+                }
 
                 size_t total_orbit_groups = 0;
-                for (const auto& [size, count] : orbit_size_counts) {
+                for (const auto &[size, count] : orbit_size_counts) {
                     total_orbit_groups += count;
                 }
-                size_t count_threshold = static_cast<size_t>(static_cast<double>(total_orbit_groups) * natural_breaks_count_percentage_);
+                size_t count_threshold
+                    = static_cast<size_t>(static_cast<double>(total_orbit_groups) * natural_breaks_count_percentage_);
                 if (count_threshold == 0 && total_orbit_groups > 0) {
-                    count_threshold = 1; // Ensure threshold is at least 1 if possible
+                    count_threshold = 1;    // Ensure threshold is at least 1 if possible
+                }
+                if constexpr (verbose) {
+                    std::cout << "  - Total orbit groups: " << total_orbit_groups << ", count threshold: " << count_threshold
+                              << "\n";
                 }
-                if constexpr (verbose) { std::cout << "  - Total orbit groups: " << total_orbit_groups << ", count threshold: " << count_threshold << "\n"; }
 
                 std::vector<size_t> sorted_sizes;
                 sorted_sizes.reserve(orbit_size_counts.size());
-                for (const auto& [size, count] : orbit_size_counts) {
+                for (const auto &[size, count] : orbit_size_counts) {
                     sorted_sizes.push_back(size);
                 }
-                std::sort(sorted_sizes.rbegin(), sorted_sizes.rend()); // Sort descending
+                std::sort(sorted_sizes.rbegin(), sorted_sizes.rend());    // Sort descending
 
                 if (!sorted_sizes.empty()) {
                     for (size_t i = 0; i < sorted_sizes.size(); ++i) {
                         const size_t current_size = sorted_sizes[i];
-                        if (current_size < min_symmetry_) continue;
+                        if (current_size < min_symmetry_) {
+                            continue;
+                        }
 
                         // Add if this size's count is significant
                         const size_t current_count = orbit_size_counts.at(current_size);
                         bool count_significant = (current_count >= count_threshold);
-                        
+
                         if (count_significant) {
                             symmetry_levels_to_test.push_back(current_size);
                             continue;
@@ -730,7 +785,7 @@ class OrbitGraphProcessor {
                 if (symmetry_levels_to_test.empty()) {
                     size_t max_count = 0;
                     size_t size_with_max_count = 0;
-                    for (const auto& [size, count] : orbit_size_counts) {
+                    for (const auto &[size, count] : orbit_size_counts) {
                         if (count > max_count) {
                             max_count = count;
                             size_with_max_count = size;
@@ -745,23 +800,27 @@ class OrbitGraphProcessor {
                 v_workw_t<Graph_t> cumulative_work = 0;
                 for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) {
                     cumulative_work += it->second;
-                    if (total_work > 0)
-                        rel_acc_work_per_orbit_size.push_back(static_cast<double>(cumulative_work) / static_cast<double>(total_work));
+                    if (total_work > 0) {
+                        rel_acc_work_per_orbit_size.push_back(static_cast<double>(cumulative_work)
+                                                              / static_cast<double>(total_work));
+                    }
                 }
                 break;
             }
 
             case SymmetryLevelHeuristic::CURRENT_DEFAULT:
-            default:
-            {
-                if constexpr (verbose) { std::cout << "Using CURRENT_DEFAULT heuristic for symmetry levels.\n"; }
+            default: {
+                if constexpr (verbose) {
+                    std::cout << "Using CURRENT_DEFAULT heuristic for symmetry levels.\n";
+                }
                 double threshold = lock_orbit_ratio;
                 v_workw_t<Graph_t> cumulative_work = 0;
                 for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) {
                     cumulative_work += it->second;
-                    const double rel_work = (total_work == 0) ? 0 : static_cast<double>(cumulative_work) / static_cast<double>(total_work);         
-                    rel_acc_work_per_orbit_size.push_back(rel_work); // For printing
-                    
+                    const double rel_work
+                        = (total_work == 0) ? 0 : static_cast<double>(cumulative_work) / static_cast<double>(total_work);
+                    rel_acc_work_per_orbit_size.push_back(rel_work);    // For printing
+
                     if (rel_work >= threshold && it->first > min_symmetry_) {
                         symmetry_levels_to_test.push_back(it->first);
                         threshold += lock_orbit_ratio * 0.5;
@@ -770,12 +829,13 @@ class OrbitGraphProcessor {
                 break;
             }
         }
-        
-        if (symmetry_levels_to_test.empty()) 
+
+        if (symmetry_levels_to_test.empty()) {
             symmetry_levels_to_test.push_back(2);
+        }
 
         min_symmetry_ = symmetry_levels_to_test.back();
-        
+
         // De-duplicate and sort descending
         std::sort(symmetry_levels_to_test.rbegin(), symmetry_levels_to_test.rend());
         auto last = std::unique(symmetry_levels_to_test.begin(), symmetry_levels_to_test.end());
@@ -784,7 +844,6 @@ class OrbitGraphProcessor {
         return symmetry_levels_to_test;
     }
 
-
     /**
      * @brief Non-adaptive coarsening (deprecated).
      */
@@ -806,7 +865,6 @@ class OrbitGraphProcessor {
             current_groups[coarse_node].subgraphs.push_back({i});
         }
 
-
         if constexpr (has_typed_vertices_v<Constr_Graph_t>) {
             if constexpr (verbose) {
                 std::cout << "Attempting to merge same node types.\n";
@@ -815,13 +873,13 @@ class OrbitGraphProcessor {
             contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, true, false);
         }
 
-
         if constexpr (verbose) {
             std::cout << "Attempting to merge different node types.\n";
         }
-        contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, false, merge_different_node_types_);
-        contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, true, merge_different_node_types_);
-    
+        contract_edges(
+            original_dag, current_coarse_graph, current_groups, current_contraction_map, false, merge_different_node_types_);
+        contract_edges(
+            original_dag, current_coarse_graph, current_groups, current_contraction_map, true, merge_different_node_types_);
 
         if constexpr (verbose) {
             std::cout << "Attempting to merge small orbits.\n";
@@ -831,8 +889,14 @@ class OrbitGraphProcessor {
         non_viable_crit_path_edges_cache_.clear();
         non_viable_edges_cache_.clear();
 
-        contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, true, merge_different_node_types_, work_threshold_);
-        
+        contract_edges(original_dag,
+                       current_coarse_graph,
+                       current_groups,
+                       current_contraction_map,
+                       true,
+                       merge_different_node_types_,
+                       work_threshold_);
+
         final_coarse_graph_ = std::move(current_coarse_graph);
         final_contraction_map_ = std::move(current_contraction_map);
         final_groups_ = std::move(current_groups);
@@ -842,7 +906,10 @@ class OrbitGraphProcessor {
         }
     }
 
-    void perform_coarsening_adaptive_symmetry(const Graph_t &original_dag, const Constr_Graph_t &initial_coarse_graph, const std::vector<v_workw_t<Graph_t>>& lock_threshold_per_type, const std::vector<size_t>& symmetry_levels_to_test) {
+    void perform_coarsening_adaptive_symmetry(const Graph_t &original_dag,
+                                              const Constr_Graph_t &initial_coarse_graph,
+                                              const std::vector<v_workw_t<Graph_t>> &lock_threshold_per_type,
+                                              const std::vector<size_t> &symmetry_levels_to_test) {
         final_coarse_graph_ = Constr_Graph_t();
         final_contraction_map_.clear();
 
@@ -858,9 +925,10 @@ class OrbitGraphProcessor {
             const VertexType coarse_node = contraction_map_[i];
             current_groups[coarse_node].subgraphs.push_back({i});
         }
-    
+
         if constexpr (verbose) {
-            std::cout << " Starting adaptive symmetry coarsening with critical_path_threshold: " << critical_path_threshold_ << "\n";
+            std::cout << " Starting adaptive symmetry coarsening with critical_path_threshold: " << critical_path_threshold_
+                      << "\n";
         }
 
         for (const auto sym : symmetry_levels_to_test) {
@@ -872,22 +940,41 @@ class OrbitGraphProcessor {
 
             non_viable_edges_cache_.clear();
 
-            contract_edges_adpative_sym(original_dag, current_coarse_graph, current_groups, current_contraction_map, false, is_last_loop, lock_threshold_per_type);
-            
-            if (merge_different_node_types_)
-                contract_edges_adpative_sym(original_dag, current_coarse_graph, current_groups, current_contraction_map, merge_different_node_types_, is_last_loop, lock_threshold_per_type);
-            
-            non_viable_crit_path_edges_cache_.clear();
-            contract_edges_adpative_sym(original_dag, current_coarse_graph, current_groups, current_contraction_map, merge_different_node_types_, is_last_loop, lock_threshold_per_type, critical_path_threshold_);
+            contract_edges_adpative_sym(original_dag,
+                                        current_coarse_graph,
+                                        current_groups,
+                                        current_contraction_map,
+                                        false,
+                                        is_last_loop,
+                                        lock_threshold_per_type);
+
+            if (merge_different_node_types_) {
+                contract_edges_adpative_sym(original_dag,
+                                            current_coarse_graph,
+                                            current_groups,
+                                            current_contraction_map,
+                                            merge_different_node_types_,
+                                            is_last_loop,
+                                            lock_threshold_per_type);
+            }
 
+            non_viable_crit_path_edges_cache_.clear();
+            contract_edges_adpative_sym(original_dag,
+                                        current_coarse_graph,
+                                        current_groups,
+                                        current_contraction_map,
+                                        merge_different_node_types_,
+                                        is_last_loop,
+                                        lock_threshold_per_type,
+                                        critical_path_threshold_);
         }
-    
+
         if constexpr (verbose) {
             std::cout << " Merging small orbits with work threshold: " << work_threshold_ << "\n";
         }
         non_viable_edges_cache_.clear();
         merge_small_orbits(original_dag, current_coarse_graph, current_groups, current_contraction_map, work_threshold_);
-        
+
         final_coarse_graph_ = std::move(current_coarse_graph);
         final_contraction_map_ = std::move(current_contraction_map);
         final_groups_ = std::move(current_groups);
@@ -913,12 +1000,13 @@ class OrbitGraphProcessor {
     /**
      * @brief Checks if merging two groups is structurally viable.
      */
-    bool is_merge_viable(const Graph_t &original_dag, const Group &group_u, const Group &group_v,
+    bool is_merge_viable(const Graph_t &original_dag,
+                         const Group &group_u,
+                         const Group &group_v,
                          std::vector<std::vector<VertexType>> &out_new_subgraphs) const {
-
         std::vector<VertexType> all_nodes;
-        all_nodes.reserve(group_u.subgraphs.size() * (group_u.subgraphs.empty() ? 0 : group_u.subgraphs[0].size()) + 
-                          group_v.subgraphs.size() * (group_v.subgraphs.empty() ? 0 : group_v.subgraphs[0].size()));
+        all_nodes.reserve(group_u.subgraphs.size() * (group_u.subgraphs.empty() ? 0 : group_u.subgraphs[0].size())
+                          + group_v.subgraphs.size() * (group_v.subgraphs.empty() ? 0 : group_v.subgraphs[0].size()));
         for (const auto &sg : group_u.subgraphs) {
             all_nodes.insert(all_nodes.end(), sg.begin(), sg.end());
         }
@@ -937,12 +1025,12 @@ class OrbitGraphProcessor {
         Constr_Graph_t induced_subgraph;
 
         auto map = create_induced_subgraph_map(original_dag, induced_subgraph, all_nodes);
-        std::vector<VertexType> components; // local -> component_id
+        std::vector<VertexType> components;    // local -> component_id
         size_t num_components = compute_weakly_connected_components(induced_subgraph, components);
         out_new_subgraphs.assign(num_components, std::vector<VertexType>());
-        
-        if (all_nodes.empty()) { // Handle empty graph case
-             return true;
+
+        if (all_nodes.empty()) {    // Handle empty graph case
+            return true;
         }
 
         for (const auto &node : all_nodes) {
@@ -971,10 +1059,14 @@ class OrbitGraphProcessor {
 
   public:
     const Graph_t &get_coarse_graph() const { return coarse_graph_; }
+
     const std::vector<VertexType> &get_contraction_map() const { return contraction_map_; }
+
     const Graph_t &get_final_coarse_graph() const { return final_coarse_graph_; }
+
     const std::vector<VertexType> &get_final_contraction_map() const { return final_contraction_map_; }
+
     const std::vector<Group> &get_final_groups() const { return final_groups_; }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp
index c179161d..391c5819 100644
--- a/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp
@@ -18,8 +18,9 @@ limitations under the License.
 
 #pragma once
 
-#include <vector>
 #include <unordered_map>
+#include <vector>
+
 #include "osp/dag_divider/isomorphism_divider/HashComputer.hpp"
 
 namespace osp {
@@ -33,9 +34,8 @@ namespace osp {
  *
  * @tparam IndexType The type used for indexing the objects
  */
-template<typename IndexType>
+template <typename IndexType>
 class PrecomputedHashComputer : public HashComputer<IndexType> {
-
     std::vector<std::size_t> vertex_hashes;
     std::unordered_map<std::size_t, std::vector<IndexType>> orbits;
 
@@ -45,9 +45,9 @@ class PrecomputedHashComputer : public HashComputer<IndexType> {
      *
      * @param precomputed_hashes A vector of hash values for objects 0 to n-1.
      */
-    PrecomputedHashComputer(const std::vector<std::size_t>& precomputed_hashes) : vertex_hashes(precomputed_hashes) {
+    PrecomputedHashComputer(const std::vector<std::size_t> &precomputed_hashes) : vertex_hashes(precomputed_hashes) {
         for (std::size_t i = 0; i < vertex_hashes.size(); ++i) {
-            const auto& hash = vertex_hashes[i];
+            const auto &hash = vertex_hashes[i];
             orbits[hash].push_back(static_cast<IndexType>(i));
         }
     }
@@ -55,15 +55,18 @@ class PrecomputedHashComputer : public HashComputer<IndexType> {
     virtual ~PrecomputedHashComputer() override = default;
 
     inline std::size_t get_vertex_hash(const IndexType &v) const override { return vertex_hashes[v]; }
+
     inline const std::vector<std::size_t> &get_vertex_hashes() const override { return vertex_hashes; }
+
     inline std::size_t num_orbits() const override { return orbits.size(); }
-    
-    inline const std::vector<IndexType> &get_orbit(const IndexType &v) const override { return this->get_orbit_from_hash(this->get_vertex_hash(v)); }
-    inline const std::unordered_map<std::size_t, std::vector<IndexType>> &get_orbits() const override { return orbits; }
 
-    inline const std::vector<IndexType>& get_orbit_from_hash(const std::size_t& hash) const override {
-        return orbits.at(hash);
+    inline const std::vector<IndexType> &get_orbit(const IndexType &v) const override {
+        return this->get_orbit_from_hash(this->get_vertex_hash(v));
     }
+
+    inline const std::unordered_map<std::size_t, std::vector<IndexType>> &get_orbits() const override { return orbits; }
+
+    inline const std::vector<IndexType> &get_orbit_from_hash(const std::size_t &hash) const override { return orbits.at(hash); }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
index 97fa53a5..4b52b935 100644
--- a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
@@ -18,11 +18,12 @@ limitations under the License.
 
 #pragma once
 
+#include <iostream>
+#include <numeric>
+
 #include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/graph_algorithms/computational_dag_util.hpp"
 #include "osp/graph_algorithms/subgraph_algorithms.hpp"
-#include <iostream>
-#include <numeric>
 
 namespace osp {
 
@@ -35,9 +36,8 @@ namespace osp {
  * potentially disconnected, subgraph that resulted from merging smaller isomorphic subgraphs. It divides
  * the input graph into its weakly connected components and schedules them on proportionally allocated processors.
  */
-template<typename Constr_Graph_t>
+template <typename Constr_Graph_t>
 class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
-
     Scheduler<Constr_Graph_t> *sub_scheduler;
     unsigned min_non_zero_procs_;
 
@@ -122,30 +122,33 @@ class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
         std::vector<unsigned> sub_arch_proc_type_offsets(sub_arch.getNumberOfProcessorTypes(), 0);
         const auto &sub_arch_proc_type_counts = sub_arch.getProcessorTypeCount();
         for (unsigned type_idx = 1; type_idx < sub_arch.getNumberOfProcessorTypes(); ++type_idx) {
-            sub_arch_proc_type_offsets[type_idx] = sub_arch_proc_type_offsets[type_idx - 1] + sub_arch_proc_type_counts[type_idx - 1];
+            sub_arch_proc_type_offsets[type_idx]
+                = sub_arch_proc_type_offsets[type_idx - 1] + sub_arch_proc_type_counts[type_idx - 1];
         }
 
         unsigned max_supersteps = 0;
         for (unsigned i = 0; i < min_non_zero_procs_; ++i) {
-
             std::vector<vertex_idx_t<Constr_Graph_t>> group_vertices;
             for (unsigned comp_idx : component_indices_per_group[i]) {
-                group_vertices.insert(group_vertices.end(), components_vertices[comp_idx].begin(), components_vertices[comp_idx].end());
+                group_vertices.insert(
+                    group_vertices.end(), components_vertices[comp_idx].begin(), components_vertices[comp_idx].end());
             }
             std::sort(group_vertices.begin(), group_vertices.end());
 
             BspInstance<Constr_Graph_t> sub_instanc;
             sub_instanc.getArchitecture() = sub_arch;
-            sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix());                      // Inherit compatibility
-            auto global_to_local_map = create_induced_subgraph_map(dag, sub_instanc.getComputationalDag(), group_vertices); // Create induced subgraph
+            sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix());    // Inherit compatibility
+            auto global_to_local_map = create_induced_subgraph_map(
+                dag, sub_instanc.getComputationalDag(), group_vertices);    // Create induced subgraph
 
             // Create a schedule object for the sub-problem
             BspSchedule<Constr_Graph_t> sub_schedule(sub_instanc);
 
             // Call the sub-scheduler to compute the schedule for this group of components
             auto status = sub_scheduler->computeSchedule(sub_schedule);
-            if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND)
+            if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) {
                 return status;
+            }
 
             // Map the sub-schedule back to the main schedule.
             for (const auto &v_global : group_vertices) {
@@ -161,9 +164,8 @@ class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
                 // The base offset of this processor type in the main 'arch'.
                 // The offset for the current 'i'-th block of processors of this type.
                 // The local index within that type block.
-                const unsigned global_proc = arch_proc_type_offsets[proc_type] +
-                                             (i * sub_proc_counts[proc_type]) +
-                                             local_idx_within_type;
+                const unsigned global_proc
+                    = arch_proc_type_offsets[proc_type] + (i * sub_proc_counts[proc_type]) + local_idx_within_type;
                 schedule.setAssignedProcessor(v_global, global_proc);
                 schedule.setAssignedSuperstep(v_global, sub_superstep);
             }
@@ -175,4 +177,4 @@ class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp
index eb484409..c916b55c 100644
--- a/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp
+++ b/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp
@@ -17,15 +17,16 @@ limitations under the License.
 */
 #pragma once
 
-#include "osp/concepts/computational_dag_concept.hpp"
-#include <vector>
 #include <numeric>
 #include <queue>
 #include <unordered_map>
 #include <unordered_set>
-#include "osp/auxiliary/datastructures/union_find.hpp"
-#include "SequenceSplitter.hpp"
+#include <vector>
+
 #include "SequenceGenerator.hpp"
+#include "SequenceSplitter.hpp"
+#include "osp/auxiliary/datastructures/union_find.hpp"
+#include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/dag_divider/DagDivider.hpp"
 
 namespace osp {
@@ -34,35 +35,37 @@ namespace osp {
  * @class AbstractWavefrontDivider
  * @brief Base class for wavefront-based DAG dividers.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class AbstractWavefrontDivider : public IDagDivider<Graph_t> {
-    static_assert(is_computational_dag_v<Graph_t>,
-                  "AbstractWavefrontDivider can only be used with computational DAGs.");
+    static_assert(is_computational_dag_v<Graph_t>, "AbstractWavefrontDivider can only be used with computational DAGs.");
 
-protected:
+  protected:
     using VertexType = vertex_idx_t<Graph_t>;
 
-    const Graph_t* dag_ptr_ = nullptr;
+    const Graph_t *dag_ptr_ = nullptr;
 
     /**
      * @brief Helper to get connected components for a specific range of levels.
      * This method is now const-correct.
      */
-    std::vector<std::vector<VertexType>> get_components_for_range(
-        size_t start_level, size_t end_level,
-        const std::vector<std::vector<VertexType>>& level_sets) const {
-        
+    std::vector<std::vector<VertexType>> get_components_for_range(size_t start_level,
+                                                                  size_t end_level,
+                                                                  const std::vector<std::vector<VertexType>> &level_sets) const {
         union_find_universe_t<Graph_t> uf;
         for (size_t i = start_level; i < end_level; ++i) {
             for (const auto vertex : level_sets[i]) {
                 uf.add_object(vertex, dag_ptr_->vertex_work_weight(vertex), dag_ptr_->vertex_mem_weight(vertex));
             }
-            for (const auto& node : level_sets[i]) {
-                for (const auto& child : dag_ptr_->children(node)) {
-                    if (uf.is_in_universe(child)) uf.join_by_name(node, child);
+            for (const auto &node : level_sets[i]) {
+                for (const auto &child : dag_ptr_->children(node)) {
+                    if (uf.is_in_universe(child)) {
+                        uf.join_by_name(node, child);
+                    }
                 }
-                for (const auto& parent : dag_ptr_->parents(node)) {
-                    if (uf.is_in_universe(parent)) uf.join_by_name(parent, node);
+                for (const auto &parent : dag_ptr_->parents(node)) {
+                    if (uf.is_in_universe(parent)) {
+                        uf.join_by_name(parent, node);
+                    }
                 }
             }
         }
@@ -83,19 +86,19 @@ class AbstractWavefrontDivider : public IDagDivider<Graph_t> {
      * @brief Computes wavefronts for a specific subset of vertices.
      * This method is now const.
      */
-    std::vector<std::vector<VertexType>> compute_wavefronts_for_subgraph(
-        const std::vector<VertexType>& vertices) const {
-        
-        if (vertices.empty()) return {};
+    std::vector<std::vector<VertexType>> compute_wavefronts_for_subgraph(const std::vector<VertexType> &vertices) const {
+        if (vertices.empty()) {
+            return {};
+        }
 
         std::vector<std::vector<VertexType>> level_sets;
         std::unordered_set<VertexType> vertex_set(vertices.begin(), vertices.end());
         std::unordered_map<VertexType, int> in_degree;
         std::queue<VertexType> q;
 
-        for (const auto& v : vertices) {
+        for (const auto &v : vertices) {
             in_degree[v] = 0;
-            for (const auto& p : dag_ptr_->parents(v)) {
+            for (const auto &p : dag_ptr_->parents(v)) {
                 if (vertex_set.count(p)) {
                     in_degree[v]++;
                 }
@@ -112,7 +115,7 @@ class AbstractWavefrontDivider : public IDagDivider<Graph_t> {
                 VertexType u = q.front();
                 q.pop();
                 current_level.push_back(u);
-                for (const auto& v : dag_ptr_->children(u)) {
+                for (const auto &v : dag_ptr_->children(u)) {
                     if (vertex_set.count(v)) {
                         in_degree[v]--;
                         if (in_degree[v] == 0) {
@@ -127,4 +130,4 @@ class AbstractWavefrontDivider : public IDagDivider<Graph_t> {
     }
 };
 
-} // end namespace osp
\ No newline at end of file
+}    // end namespace osp
diff --git a/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp
index 7eb60b46..c382169b 100644
--- a/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp
+++ b/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp
@@ -17,14 +17,15 @@ limitations under the License.
 */
 #pragma once
 
-#include <vector>
 #include <algorithm>
 #include <iostream>
-#include <memory>
 #include <iterator>
+#include <memory>
+#include <vector>
+
 #include "AbstractWavefrontDivider.hpp"
-#include "SequenceSplitter.hpp"
 #include "SequenceGenerator.hpp"
+#include "SequenceSplitter.hpp"
 
 namespace osp {
 
@@ -37,9 +38,9 @@ namespace osp {
  * section, it recursively repeats the process, allowing for a hierarchical
  * division of the DAG.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class RecursiveWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
-public:
+  public:
     constexpr static bool enable_debug_print = true;
 
     RecursiveWavefrontDivider() {
@@ -63,51 +64,49 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
         return all_sections;
     }
 
-    RecursiveWavefrontDivider& set_metric(SequenceMetric metric) {
+    RecursiveWavefrontDivider &set_metric(SequenceMetric metric) {
         sequence_metric_ = metric;
         return *this;
     }
 
-    RecursiveWavefrontDivider& use_variance_splitter(double mult, double threshold, size_t min_len = 1) {
+    RecursiveWavefrontDivider &use_variance_splitter(double mult, double threshold, size_t min_len = 1) {
         splitter_ = std::make_unique<VarianceSplitter>(mult, threshold, min_len);
         min_subseq_len_ = min_len;
         return *this;
     }
 
-    RecursiveWavefrontDivider& use_largest_step_splitter(double threshold, size_t min_len) {
+    RecursiveWavefrontDivider &use_largest_step_splitter(double threshold, size_t min_len) {
         splitter_ = std::make_unique<LargestStepSplitter>(threshold, min_len);
         min_subseq_len_ = min_len;
         return *this;
     }
 
-    RecursiveWavefrontDivider& use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) {
+    RecursiveWavefrontDivider &use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) {
         splitter_ = std::make_unique<ThresholdScanSplitter>(diff_threshold, abs_threshold, min_len);
         min_subseq_len_ = min_len;
         return *this;
     }
-    
-    RecursiveWavefrontDivider& set_max_depth(size_t max_depth) {
+
+    RecursiveWavefrontDivider &set_max_depth(size_t max_depth) {
         max_depth_ = max_depth;
         return *this;
     }
 
-private:
+  private:
     using VertexType = vertex_idx_t<Graph_t>;
     using LevelSetConstIterator = typename std::vector<std::vector<VertexType>>::const_iterator;
     using DifferenceType = typename std::iterator_traits<LevelSetConstIterator>::difference_type;
 
-
     SequenceMetric sequence_metric_ = SequenceMetric::COMPONENT_COUNT;
     std::unique_ptr<SequenceSplitter> splitter_;
     size_t min_subseq_len_ = 4;
     size_t max_depth_ = std::numeric_limits<size_t>::max();
 
-    void divide_recursive(
-        LevelSetConstIterator level_begin, LevelSetConstIterator level_end,
-        const std::vector<std::vector<VertexType>>& global_level_sets,
-        std::vector<std::vector<std::vector<VertexType>>>& all_sections,
-        size_t current_depth) const {
-
+    void divide_recursive(LevelSetConstIterator level_begin,
+                          LevelSetConstIterator level_end,
+                          const std::vector<std::vector<VertexType>> &global_level_sets,
+                          std::vector<std::vector<std::vector<VertexType>>> &all_sections,
+                          size_t current_depth) const {
         const auto current_range_size = static_cast<size_t>(std::distance(level_begin, level_end));
         size_t start_level_idx = static_cast<size_t>(std::distance(global_level_sets.cbegin(), level_begin));
         size_t end_level_idx = static_cast<size_t>(std::distance(global_level_sets.cbegin(), level_end));
@@ -115,7 +114,7 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
         // --- Base Cases for Recursion ---
         if (current_depth >= max_depth_ || current_range_size < min_subseq_len_) {
             if constexpr (enable_debug_print) {
-                std::cout << "[DEBUG depth " << current_depth << "] Base case reached. Creating section from levels " 
+                std::cout << "[DEBUG depth " << current_depth << "] Base case reached. Creating section from levels "
                           << start_level_idx << " to " << end_level_idx << "." << std::endl;
             }
             // Ensure the section is not empty before adding
@@ -133,7 +132,9 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
 
         if constexpr (enable_debug_print) {
             std::cout << "[DEBUG depth " << current_depth << "] Analyzing sequence: ";
-            for(const auto& val : sequence) std::cout << val << " ";
+            for (const auto &val : sequence) {
+                std::cout << val << " ";
+            }
             std::cout << std::endl;
         }
 
@@ -142,7 +143,7 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
         // --- Base Case: No further cuts found ---
         if (local_cuts.empty()) {
             if constexpr (enable_debug_print) {
-                std::cout << "[DEBUG depth " << current_depth << "] No cuts found. Creating section from levels " 
+                std::cout << "[DEBUG depth " << current_depth << "] No cuts found. Creating section from levels "
                           << start_level_idx << " to " << end_level_idx << "." << std::endl;
             }
             all_sections.push_back(this->get_components_for_range(start_level_idx, end_level_idx, global_level_sets));
@@ -153,9 +154,8 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
             std::cout << "[DEBUG depth " << current_depth << "] Found " << local_cuts.size() << " cuts: ";
             for (const auto c : local_cuts) {
                 std::cout << c << ", ";
-            } 
-            std::cout << "in level range [" 
-                      << start_level_idx << ", " << end_level_idx << "). Recursing." << std::endl;
+            }
+            std::cout << "in level range [" << start_level_idx << ", " << end_level_idx << "). Recursing." << std::endl;
         }
 
         // --- Recurse on the new, smaller sub-problems ---
@@ -163,20 +163,18 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
         local_cuts.erase(std::unique(local_cuts.begin(), local_cuts.end()), local_cuts.end());
 
         auto current_sub_begin = level_begin;
-        for (const auto& local_cut_idx : local_cuts) {
+        for (const auto &local_cut_idx : local_cuts) {
             auto cut_iterator = level_begin + static_cast<DifferenceType>(local_cut_idx);
             if (cut_iterator > current_sub_begin) {
-                divide_recursive(current_sub_begin, cut_iterator,
-                                 global_level_sets, all_sections, current_depth + 1);
+                divide_recursive(current_sub_begin, cut_iterator, global_level_sets, all_sections, current_depth + 1);
             }
             current_sub_begin = cut_iterator;
         }
         // Recurse on the final segment from the last cut to the end.
         if (current_sub_begin < level_end) {
-            divide_recursive(current_sub_begin, level_end,
-                             global_level_sets, all_sections, current_depth + 1);
+            divide_recursive(current_sub_begin, level_end, global_level_sets, all_sections, current_depth + 1);
         }
     }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp
index e32be944..c815b615 100644
--- a/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp
+++ b/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp
@@ -17,13 +17,14 @@ limitations under the License.
 */
 #pragma once
 
-#include <vector>
 #include <algorithm>
 #include <iostream>
 #include <memory>
+#include <vector>
+
 #include "AbstractWavefrontDivider.hpp"
-#include "SequenceSplitter.hpp"
 #include "SequenceGenerator.hpp"
+#include "SequenceSplitter.hpp"
 
 namespace osp {
 
@@ -32,14 +33,12 @@ namespace osp {
  * @brief Divides a DAG by scanning all wavefronts and applying a splitting algorithm.
  * This revised version uses a fluent API for safer and clearer algorithm configuration.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class ScanWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
-public:
+  public:
     constexpr static bool enable_debug_print = true;
 
-    ScanWavefrontDivider() {
-        use_largest_step_splitter(3.0, 4);
-    }
+    ScanWavefrontDivider() { use_largest_step_splitter(3.0, 4); }
 
     std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> divide(const Graph_t &dag) override {
         this->dag_ptr_ = &dag;
@@ -54,67 +53,69 @@ class ScanWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
 
         SequenceGenerator<Graph_t> generator(dag, level_sets);
         std::vector<double> sequence = generator.generate(sequence_metric_);
-        
+
         if constexpr (enable_debug_print) {
             std::cout << "[DEBUG]   Metric: " << static_cast<int>(sequence_metric_) << std::endl;
             std::cout << "[DEBUG]   Generated sequence: ";
-            for(const auto& val : sequence) std::cout << val << " ";
+            for (const auto &val : sequence) {
+                std::cout << val << " ";
+            }
             std::cout << std::endl;
         }
- 
+
         std::vector<size_t> cut_levels = splitter_->split(sequence);
         std::sort(cut_levels.begin(), cut_levels.end());
         cut_levels.erase(std::unique(cut_levels.begin(), cut_levels.end()), cut_levels.end());
-        
+
         if constexpr (enable_debug_print) {
             std::cout << "[DEBUG]   Final cut levels: ";
-            for(const auto& level : cut_levels) std::cout << level << " ";
+            for (const auto &level : cut_levels) {
+                std::cout << level << " ";
+            }
             std::cout << std::endl;
         }
-        
+
         return create_vertex_maps_from_cuts(cut_levels, level_sets);
     }
 
-    ScanWavefrontDivider& set_metric(SequenceMetric metric) {
+    ScanWavefrontDivider &set_metric(SequenceMetric metric) {
         sequence_metric_ = metric;
         return *this;
     }
 
-    ScanWavefrontDivider& use_variance_splitter(double mult, double threshold, size_t min_len = 1) {
+    ScanWavefrontDivider &use_variance_splitter(double mult, double threshold, size_t min_len = 1) {
         splitter_ = std::make_unique<VarianceSplitter>(mult, threshold, min_len);
         return *this;
     }
 
-    ScanWavefrontDivider& use_largest_step_splitter(double threshold, size_t min_len) {
+    ScanWavefrontDivider &use_largest_step_splitter(double threshold, size_t min_len) {
         splitter_ = std::make_unique<LargestStepSplitter>(threshold, min_len);
         return *this;
     }
 
-    ScanWavefrontDivider& use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) {
+    ScanWavefrontDivider &use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) {
         splitter_ = std::make_unique<ThresholdScanSplitter>(diff_threshold, abs_threshold, min_len);
         return *this;
     }
 
-private:
+  private:
     using VertexType = vertex_idx_t<Graph_t>;
 
     SequenceMetric sequence_metric_ = SequenceMetric::COMPONENT_COUNT;
     std::unique_ptr<SequenceSplitter> splitter_;
 
     std::vector<std::vector<std::vector<VertexType>>> create_vertex_maps_from_cuts(
-        const std::vector<size_t>& cut_levels,
-        const std::vector<std::vector<VertexType>>& level_sets) const {
-        
+        const std::vector<size_t> &cut_levels, const std::vector<std::vector<VertexType>> &level_sets) const {
         if (cut_levels.empty()) {
             // If there are no cuts, return a single section with all components.
-            return { this->get_components_for_range(0, level_sets.size(), level_sets) };
+            return {this->get_components_for_range(0, level_sets.size(), level_sets)};
         }
 
         std::vector<std::vector<std::vector<VertexType>>> vertex_maps;
         size_t start_level = 0;
 
-        for (const auto& cut_level : cut_levels) {
-            if (start_level < cut_level) { // Avoid creating empty sections
+        for (const auto &cut_level : cut_levels) {
+            if (start_level < cut_level) {    // Avoid creating empty sections
                 vertex_maps.push_back(this->get_components_for_range(start_level, cut_level, level_sets));
             }
             start_level = cut_level;
@@ -128,4 +129,4 @@ class ScanWavefrontDivider : public AbstractWavefrontDivider<Graph_t> {
     }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp b/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp
index 73c978e0..9dd925ac 100644
--- a/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp
+++ b/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp
@@ -17,8 +17,9 @@ limitations under the License.
 */
 #pragma once
 
-#include <vector>
 #include <numeric>
+#include <vector>
+
 #include "WavefrontStatisticsCollector.hpp"
 
 namespace osp {
@@ -29,12 +30,12 @@ enum class SequenceMetric { COMPONENT_COUNT, AVAILABLE_PARALLELISM };
  * @class SequenceGenerator
  * @brief Helper to generate a numerical sequence based on a chosen metric.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class SequenceGenerator {
     using VertexType = vertex_idx_t<Graph_t>;
 
-public:
-    SequenceGenerator(const Graph_t& dag, const std::vector<std::vector<VertexType>>& level_sets)
+  public:
+    SequenceGenerator(const Graph_t &dag, const std::vector<std::vector<VertexType>> &level_sets)
         : dag_(dag), level_sets_(level_sets) {}
 
     std::vector<double> generate(SequenceMetric metric) const {
@@ -47,13 +48,13 @@ class SequenceGenerator {
         }
     }
 
-private:
+  private:
     std::vector<double> generate_component_count() const {
         WavefrontStatisticsCollector<Graph_t> collector(dag_, level_sets_);
         auto fwd_stats = collector.compute_forward();
         std::vector<double> seq;
         seq.reserve(fwd_stats.size());
-        for (const auto& stat : fwd_stats) {
+        for (const auto &stat : fwd_stats) {
             seq.push_back(static_cast<double>(stat.connected_components_vertices.size()));
         }
         return seq;
@@ -65,7 +66,7 @@ class SequenceGenerator {
         double cumulative_work = 0.0;
         for (size_t i = 0; i < level_sets_.size(); ++i) {
             double level_work = 0.0;
-            for (const auto& vertex : level_sets_[i]) {
+            for (const auto &vertex : level_sets_[i]) {
                 level_work += dag_.vertex_work_weight(vertex);
             }
             cumulative_work += level_work;
@@ -74,8 +75,8 @@ class SequenceGenerator {
         return seq;
     }
 
-    const Graph_t& dag_;
-    const std::vector<std::vector<VertexType>>& level_sets_;
+    const Graph_t &dag_;
+    const std::vector<std::vector<VertexType>> &level_sets_;
 };
 
-} // end namespace osp
+}    // end namespace osp
diff --git a/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp b/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp
index 1cfc7018..2cde1ad0 100644
--- a/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp
+++ b/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp
@@ -16,12 +16,12 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 #pragma once
-#include <vector>
-#include <limits>
-#include <numeric>
 #include <algorithm>
 #include <cmath>
-#include <iterator> // Required for std::distance and std::iterator_traits
+#include <iterator>    // Required for std::distance and std::iterator_traits
+#include <limits>
+#include <numeric>
+#include <vector>
 
 namespace osp {
 
@@ -32,7 +32,7 @@ enum class SplitAlgorithm { LARGEST_STEP, VARIANCE, THRESHOLD_SCAN };
  * @brief Abstract base class for algorithms that split a sequence of numbers.
  */
 class SequenceSplitter {
-public:
+  public:
     virtual ~SequenceSplitter() = default;
 
     /**
@@ -40,10 +40,9 @@ class SequenceSplitter {
      * @param seq The sequence of numbers to split.
      * @return A vector of indices where the sequence is split.
      */
-    virtual std::vector<size_t> split(const std::vector<double>& seq) = 0;
+    virtual std::vector<size_t> split(const std::vector<double> &seq) = 0;
 };
 
-
 /**
  * @class VarianceSplitter
  * @brief Splits a sequence recursively based on variance reduction.
@@ -51,17 +50,17 @@ class SequenceSplitter {
  * sub-sequences by a factor (var_mult_) and if the original variance is above a threshold.
  */
 class VarianceSplitter : public SequenceSplitter {
-public:
-    VarianceSplitter(double var_mult, double var_threshold,
+  public:
+    VarianceSplitter(double var_mult,
+                     double var_threshold,
                      size_t min_subseq_len = 1,
                      size_t max_depth = std::numeric_limits<size_t>::max())
-        : var_mult_(var_mult),
-          var_threshold_(var_threshold),
-          min_subseq_len_(min_subseq_len),
-          max_depth_(max_depth) {}
+        : var_mult_(var_mult), var_threshold_(var_threshold), min_subseq_len_(min_subseq_len), max_depth_(max_depth) {}
 
-    std::vector<size_t> split(const std::vector<double>& seq) override {
-        if (seq.empty()) return {};
+    std::vector<size_t> split(const std::vector<double> &seq) override {
+        if (seq.empty()) {
+            return {};
+        }
 
         // Precompute prefix sums for the entire sequence
         prefix_sum_.assign(seq.size() + 1, 0.0);
@@ -78,9 +77,9 @@ class VarianceSplitter : public SequenceSplitter {
         return splits;
     }
 
-private:
+  private:
     // Compute mean & variance in [l, r) in O(1)
-    void compute_variance(size_t l, size_t r, double& mean, double& variance) const {
+    void compute_variance(size_t l, size_t r, double &mean, double &variance) const {
         size_t n = r - l;
         if (n <= 1) {
             mean = (n == 1) ? (prefix_sum_[r] - prefix_sum_[l]) : 0.0;
@@ -93,8 +92,10 @@ class VarianceSplitter : public SequenceSplitter {
         variance = sq_sum / static_cast<double>(n) - mean * mean;
     }
 
-    void split_recursive(size_t l, size_t r, std::vector<size_t>& splits, size_t depth) {
-        if (depth >= max_depth_ || r - l < 2 * min_subseq_len_) return;
+    void split_recursive(size_t l, size_t r, std::vector<size_t> &splits, size_t depth) {
+        if (depth >= max_depth_ || r - l < 2 * min_subseq_len_) {
+            return;
+        }
 
         double mean, variance;
         compute_variance(l, r, mean, variance);
@@ -112,9 +113,11 @@ class VarianceSplitter : public SequenceSplitter {
         }
     }
 
-    bool compute_best_split(size_t l, size_t r, size_t& best_split, double original_variance) const {
+    bool compute_best_split(size_t l, size_t r, size_t &best_split, double original_variance) const {
         size_t n = r - l;
-        if (n < 2) return false;
+        if (n < 2) {
+            return false;
+        }
 
         double min_weighted_variance_sum = std::numeric_limits<double>::max();
         best_split = 0;
@@ -124,8 +127,7 @@ class VarianceSplitter : public SequenceSplitter {
             compute_variance(l, i, left_mean, left_var);
             compute_variance(i, r, right_mean, right_var);
 
-            double weighted_sum = static_cast<double>(i - l) * left_var +
-                                  static_cast<double>(r - i) * right_var;
+            double weighted_sum = static_cast<double>(i - l) * left_var + static_cast<double>(r - i) * right_var;
 
             if (weighted_sum < min_weighted_variance_sum) {
                 min_weighted_variance_sum = weighted_sum;
@@ -134,8 +136,7 @@ class VarianceSplitter : public SequenceSplitter {
         }
 
         double total_original_variance = original_variance * static_cast<double>(n);
-        return best_split > l &&
-               min_weighted_variance_sum < var_mult_ * total_original_variance;
+        return best_split > l && min_weighted_variance_sum < var_mult_ * total_original_variance;
     }
 
     double var_mult_;
@@ -146,7 +147,6 @@ class VarianceSplitter : public SequenceSplitter {
     std::vector<double> prefix_sq_sum_;
 };
 
-
 /**
  * @class LargestStepSplitter
  * @brief Splits a monotonic sequence recursively at the point of the largest change.
@@ -154,32 +154,31 @@ class VarianceSplitter : public SequenceSplitter {
  * exceeds a given threshold.
  */
 class LargestStepSplitter : public SequenceSplitter {
-private:
+  private:
     using ConstIterator = std::vector<double>::const_iterator;
     using difference_type = typename std::iterator_traits<ConstIterator>::difference_type;
 
-public:
-    LargestStepSplitter(double diff_threshold,
-                        size_t min_subseq_len,
-                        size_t max_depth = std::numeric_limits<size_t>::max())
-        : diff_threshold_(diff_threshold),
-          min_subseq_len_(min_subseq_len),
-          max_depth_(max_depth) {}
+  public:
+    LargestStepSplitter(double diff_threshold, size_t min_subseq_len, size_t max_depth = std::numeric_limits<size_t>::max())
+        : diff_threshold_(diff_threshold), min_subseq_len_(min_subseq_len), max_depth_(max_depth) {}
 
-    std::vector<size_t> split(const std::vector<double>& seq) override {
+    std::vector<size_t> split(const std::vector<double> &seq) override {
         std::vector<size_t> splits;
         split_recursive(seq.begin(), seq.end(), splits, 0, 0);
         std::sort(splits.begin(), splits.end());
         return splits;
     }
 
-private:
-    void split_recursive(ConstIterator begin, ConstIterator end,
-                         std::vector<size_t>& splits, size_t offset, size_t current_depth) {
-        if (current_depth >= max_depth_) return;
+  private:
+    void split_recursive(ConstIterator begin, ConstIterator end, std::vector<size_t> &splits, size_t offset, size_t current_depth) {
+        if (current_depth >= max_depth_) {
+            return;
+        }
 
         const difference_type size = std::distance(begin, end);
-        if (static_cast<size_t>(size) < 2 * min_subseq_len_) return;
+        if (static_cast<size_t>(size) < 2 * min_subseq_len_) {
+            return;
+        }
 
         double max_diff = 0.0;
         difference_type split_point_local = 0;
@@ -197,8 +196,8 @@ class LargestStepSplitter : public SequenceSplitter {
         if (max_diff > diff_threshold_ && split_point_local > 0) {
             size_t split_point_global = static_cast<size_t>(split_point_local) + offset;
 
-            if ((split_point_local >= static_cast<difference_type>(min_subseq_len_)) &&
-                ((size - split_point_local) >= static_cast<difference_type>(min_subseq_len_))) {
+            if ((split_point_local >= static_cast<difference_type>(min_subseq_len_))
+                && ((size - split_point_local) >= static_cast<difference_type>(min_subseq_len_))) {
                 splits.push_back(split_point_global);
 
                 ConstIterator split_it = begin + split_point_local;
@@ -213,47 +212,41 @@ class LargestStepSplitter : public SequenceSplitter {
     size_t max_depth_;
 };
 
-
 /**
  * @class ThresholdScanSplitter
  * @brief Splits a sequence by scanning for significant changes or crossing an absolute threshold.
  * This is a non-recursive splitter that performs a single pass.
  */
 class ThresholdScanSplitter : public SequenceSplitter {
-public:
-    ThresholdScanSplitter(double diff_threshold,
-                          double absolute_threshold,
-                          size_t min_subseq_len = 1)
-        : diff_threshold_(diff_threshold),
-          absolute_threshold_(absolute_threshold),
-          min_subseq_len_(min_subseq_len) {}
-
-    std::vector<size_t> split(const std::vector<double>& seq) override {
+  public:
+    ThresholdScanSplitter(double diff_threshold, double absolute_threshold, size_t min_subseq_len = 1)
+        : diff_threshold_(diff_threshold), absolute_threshold_(absolute_threshold), min_subseq_len_(min_subseq_len) {}
+
+    std::vector<size_t> split(const std::vector<double> &seq) override {
         std::vector<size_t> splits;
-        if (seq.size() < 2) return splits;
+        if (seq.size() < 2) {
+            return splits;
+        }
 
         size_t last_cut = 0;
         for (size_t i = 0; i < seq.size() - 1; ++i) {
             bool should_cut = false;
             double current = seq[i];
-            double next = seq[i+1];
+            double next = seq[i + 1];
 
             // A split is triggered by a significant change OR by crossing the absolute threshold.
-            if (current > next) { // Dropping
-                if ((current - next) > diff_threshold_ ||
-                    (next < absolute_threshold_ && current >= absolute_threshold_)) {
+            if (current > next) {    // Dropping
+                if ((current - next) > diff_threshold_ || (next < absolute_threshold_ && current >= absolute_threshold_)) {
                     should_cut = true;
                 }
-            } else if (current < next) { // Rising
-                if ((next - current) > diff_threshold_ ||
-                    (next > absolute_threshold_ && current <= absolute_threshold_)) {
+            } else if (current < next) {    // Rising
+                if ((next - current) > diff_threshold_ || (next > absolute_threshold_ && current <= absolute_threshold_)) {
                     should_cut = true;
                 }
             }
-            
+
             if (should_cut) {
-                if ((i + 1 - last_cut) >= min_subseq_len_ &&
-                    (seq.size() - (i + 1)) >= min_subseq_len_) {
+                if ((i + 1 - last_cut) >= min_subseq_len_ && (seq.size() - (i + 1)) >= min_subseq_len_) {
                     splits.push_back(i + 1);
                     last_cut = i + 1;
                 }
@@ -262,10 +255,10 @@ class ThresholdScanSplitter : public SequenceSplitter {
         return splits;
     }
 
-private:
+  private:
     double diff_threshold_;
     double absolute_threshold_;
     size_t min_subseq_len_;
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp b/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp
index 77622b38..65f7d3e5 100644
--- a/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp
+++ b/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp
@@ -16,8 +16,9 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 #pragma once
+#include <algorithm>    // for std::reverse
 #include <vector>
-#include <algorithm> // for std::reverse
+
 #include "osp/auxiliary/datastructures/union_find.hpp"
 
 namespace osp {
@@ -26,7 +27,7 @@ namespace osp {
  * @struct WavefrontStatistics
  * @brief Holds statistical data for a single wavefront.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 struct WavefrontStatistics {
     using VertexType = vertex_idx_t<Graph_t>;
 
@@ -39,12 +40,12 @@ struct WavefrontStatistics {
  * @class WavefrontStatisticsCollector
  * @brief Computes forward and backward wavefront statistics for a given DAG.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class WavefrontStatisticsCollector {
     using VertexType = vertex_idx_t<Graph_t>;
     using UnionFind = union_find_universe_t<Graph_t>;
 
-public:
+  public:
     WavefrontStatisticsCollector(const Graph_t &dag, const std::vector<std::vector<VertexType>> &level_sets)
         : dag_(dag), level_sets_(level_sets) {}
 
@@ -79,9 +80,8 @@ class WavefrontStatisticsCollector {
         return stats;
     }
 
-
-private:
-    void update_union_find(UnionFind& uf, size_t level_idx) const {
+  private:
+    void update_union_find(UnionFind &uf, size_t level_idx) const {
         // Add all vertices from the current level to the universe
         for (const auto vertex : level_sets_[level_idx]) {
             uf.add_object(vertex, dag_.vertex_work_weight(vertex), dag_.vertex_mem_weight(vertex));
@@ -101,14 +101,14 @@ class WavefrontStatisticsCollector {
         }
     }
 
-    void collect_stats_for_level(WavefrontStatistics<Graph_t>& stats, UnionFind& uf) const {
+    void collect_stats_for_level(WavefrontStatistics<Graph_t> &stats, UnionFind &uf) const {
         const auto components = uf.get_connected_components_weights_and_memories();
         stats.connected_components_vertices.reserve(components.size());
         stats.connected_components_weights.reserve(components.size());
         stats.connected_components_memories.reserve(components.size());
 
-        for (const auto& comp : components) {
-            auto& [vertices, weight, memory] = comp;
+        for (const auto &comp : components) {
+            auto &[vertices, weight, memory] = comp;
             stats.connected_components_vertices.emplace_back(vertices);
             stats.connected_components_weights.emplace_back(weight);
             stats.connected_components_memories.emplace_back(memory);
@@ -119,4 +119,4 @@ class WavefrontStatisticsCollector {
     const std::vector<std::vector<VertexType>> &level_sets_;
 };
 
-} // end namespace osp
+}    // end namespace osp
diff --git a/include/osp/graph_algorithms/computational_dag_construction_util.hpp b/include/osp/graph_algorithms/computational_dag_construction_util.hpp
index 553996a6..597b7dc1 100644
--- a/include/osp/graph_algorithms/computational_dag_construction_util.hpp
+++ b/include/osp/graph_algorithms/computational_dag_construction_util.hpp
@@ -27,15 +27,16 @@ namespace osp {
  * @brief Constructs a computational DAG from another graph.
  *
  * This function copies the structure and properties of a source graph into a target graph structure.
- * Assumes that the vertices of the source graph are indexed from 0 to N-1. If the target graph is empty, indices are sequentially assigned starting from 0.
- * If the target graph is not empty, new vertices will be added to the target graph and their indices will be sequentially assigned starting from the index N.
+ * Assumes that the vertices of the source graph are indexed from 0 to N-1. If the target graph is empty, indices are sequentially
+ * assigned starting from 0. If the target graph is not empty, new vertices will be added to the target graph and their indices
+ * will be sequentially assigned starting from the index N.
  *
  * @tparam Graph_from The type of the source graph. Must satisfy `is_computational_dag`.
  * @tparam Graph_to The type of the target graph. Must satisfy `is_constructable_cdag_vertex`.
  * @param from The source graph.
  * @param to The target graph.
  */
-template<typename Graph_from, typename Graph_to>
+template <typename Graph_from, typename Graph_to>
 void constructComputationalDag(const Graph_from &from, Graph_to &to) {
     static_assert(is_computational_dag_v<Graph_from>, "Graph_from must satisfy the computational_dag concept");
     static_assert(is_constructable_cdag_vertex_v<Graph_to>, "Graph_to must satisfy the constructable_cdag_vertex concept");
@@ -45,11 +46,13 @@ void constructComputationalDag(const Graph_from &from, Graph_to &to) {
 
     for (const auto &v_idx : from.vertices()) {
         if constexpr (has_typed_vertices_v<Graph_from> and has_typed_vertices_v<Graph_to>) {
-            vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx),
-                                               from.vertex_mem_weight(v_idx), from.vertex_type(v_idx)));
+            vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx),
+                                               from.vertex_comm_weight(v_idx),
+                                               from.vertex_mem_weight(v_idx),
+                                               from.vertex_type(v_idx)));
         } else {
-            vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx),
-                                               from.vertex_mem_weight(v_idx)));
+            vertex_map.push_back(
+                to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx), from.vertex_mem_weight(v_idx)));
         }
     }
 
@@ -66,4 +69,4 @@ void constructComputationalDag(const Graph_from &from, Graph_to &to) {
     }
 }
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/computational_dag_util.hpp b/include/osp/graph_algorithms/computational_dag_util.hpp
index 5fba1c8a..3c8a339b 100644
--- a/include/osp/graph_algorithms/computational_dag_util.hpp
+++ b/include/osp/graph_algorithms/computational_dag_util.hpp
@@ -20,14 +20,13 @@ limitations under the License.
 
 #include <numeric>
 
-#include "osp/concepts/computational_dag_concept.hpp"
 #include "directed_graph_top_sort.hpp"
+#include "osp/concepts/computational_dag_concept.hpp"
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 v_memw_t<Graph_t> max_memory_weight(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     static_assert(has_vertex_weights_v<Graph_t>, "Graph_t must have vertex weights");
 
@@ -39,9 +38,8 @@ v_memw_t<Graph_t> max_memory_weight(const Graph_t &graph) {
     return max_memory_weight;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 v_memw_t<Graph_t> max_memory_weight(const v_type_t<Graph_t> &nodeType_, const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     static_assert(has_vertex_weights_v<Graph_t>, "Graph_t must have vertex weights");
     static_assert(has_typed_vertices_v<Graph_t>, "Graph_t must have typed vertices");
@@ -56,36 +54,35 @@ v_memw_t<Graph_t> max_memory_weight(const v_type_t<Graph_t> &nodeType_, const Gr
     return max_memory_weight;
 }
 
-template<typename Graph_t, typename VertexIterator>
+template <typename Graph_t, typename VertexIterator>
 v_workw_t<Graph_t> sumOfVerticesWorkWeights(VertexIterator begin, VertexIterator end, const Graph_t &graph) {
     static_assert(has_vertex_weights_v<Graph_t>, "Graph_t must have vertex weights");
 
-    return std::accumulate(begin, end, 0, [&](const auto sum, const vertex_idx_t<Graph_t> &v) {
-        return sum + graph.vertex_work_weight(v);
-    });
+    return std::accumulate(
+        begin, end, 0, [&](const auto sum, const vertex_idx_t<Graph_t> &v) { return sum + graph.vertex_work_weight(v); });
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 v_workw_t<Graph_t> sumOfVerticesWorkWeights(const Graph_t &graph) {
     static_assert(has_vertex_weights_v<Graph_t>, "Graph_t must have vertex weights");
 
-    return std::accumulate(graph.vertices().begin(), graph.vertices().end(), static_cast<v_workw_t<Graph_t>>(0), [&](const v_workw_t<Graph_t> sum, const vertex_idx_t<Graph_t> &v) {
-        return sum + graph.vertex_work_weight(v);
-    });
+    return std::accumulate(
+        graph.vertices().begin(),
+        graph.vertices().end(),
+        static_cast<v_workw_t<Graph_t>>(0),
+        [&](const v_workw_t<Graph_t> sum, const vertex_idx_t<Graph_t> &v) { return sum + graph.vertex_work_weight(v); });
 }
 
-template<typename Graph_t>
-v_workw_t<Graph_t> sumOfVerticesWorkWeights(const std::initializer_list<vertex_idx_t<Graph_t>> vertices_,
-                                            const Graph_t &graph) {
+template <typename Graph_t>
+v_workw_t<Graph_t> sumOfVerticesWorkWeights(const std::initializer_list<vertex_idx_t<Graph_t>> vertices_, const Graph_t &graph) {
     return sumOfVerticesWorkWeights(vertices_.begin(), vertices_.end(), graph);
 }
 
-template<typename VertexIterator, typename Graph_t>
+template <typename VertexIterator, typename Graph_t>
 v_commw_t<Graph_t> sumOfVerticesCommunicationWeights(VertexIterator begin, VertexIterator end, const Graph_t &graph) {
     static_assert(has_vertex_weights_v<Graph_t>, "Graph_t must have vertex weights");
-    return std::accumulate(begin, end, 0, [&](const auto sum, const vertex_idx_t<Graph_t> &v) {
-        return sum + graph.vertex_comm_weight(v);
-    });
+    return std::accumulate(
+        begin, end, 0, [&](const auto sum, const vertex_idx_t<Graph_t> &v) { return sum + graph.vertex_comm_weight(v); });
 }
 
 /**
@@ -94,12 +91,12 @@ v_commw_t<Graph_t> sumOfVerticesCommunicationWeights(VertexIterator begin, Verte
  * @tparam Instance_t The type of the instance object (e.g., BspInstance) used for compatibility checks.
  * @tparam VertexIterator An iterator over vertex indices of the subgraph.
  */
-template<typename SubGraph_t, typename Instance_t, typename VertexIterator>
-v_workw_t<SubGraph_t> sumOfCompatibleWorkWeights(VertexIterator begin, VertexIterator end, const SubGraph_t &graph,
-                                                 const Instance_t& main_instance, unsigned processorType) {
+template <typename SubGraph_t, typename Instance_t, typename VertexIterator>
+v_workw_t<SubGraph_t> sumOfCompatibleWorkWeights(
+    VertexIterator begin, VertexIterator end, const SubGraph_t &graph, const Instance_t &main_instance, unsigned processorType) {
     static_assert(has_vertex_weights_v<SubGraph_t>, "SubGraph_t must have vertex weights");
-    return std::accumulate(begin, end, static_cast<v_workw_t<SubGraph_t>>(0),
-        [&](const v_workw_t<SubGraph_t> sum, const vertex_idx_t<SubGraph_t> &v) {
+    return std::accumulate(
+        begin, end, static_cast<v_workw_t<SubGraph_t>>(0), [&](const v_workw_t<SubGraph_t> sum, const vertex_idx_t<SubGraph_t> &v) {
             if (main_instance.isCompatibleType(graph.vertex_type(v), processorType)) {
                 return sum + graph.vertex_work_weight(v);
             }
@@ -110,44 +107,42 @@ v_workw_t<SubGraph_t> sumOfCompatibleWorkWeights(VertexIterator begin, VertexIte
 /**
  * @brief Overload to calculate compatible work weight for all vertices in a graph.
  */
-template<typename SubGraph_t, typename Instance_t>
-v_workw_t<SubGraph_t> sumOfCompatibleWorkWeights(const SubGraph_t &graph, const Instance_t& main_instance,
-                                                 unsigned processorType) {
+template <typename SubGraph_t, typename Instance_t>
+v_workw_t<SubGraph_t> sumOfCompatibleWorkWeights(const SubGraph_t &graph, const Instance_t &main_instance, unsigned processorType) {
     return sumOfCompatibleWorkWeights(graph.vertices().begin(), graph.vertices().end(), graph, main_instance, processorType);
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 v_commw_t<Graph_t> sumOfVerticesCommunicationWeights(const Graph_t &graph) {
     static_assert(has_vertex_weights_v<Graph_t>, "Graph_t must have vertex weights");
 
-    return std::accumulate(graph.vertices().begin(), graph.vertices().end(), static_cast<v_commw_t<Graph_t>>(0), [&](const v_commw_t<Graph_t> sum, const vertex_idx_t<Graph_t> &v) {
-        return sum + graph.vertex_comm_weight(v);
-    });
+    return std::accumulate(
+        graph.vertices().begin(),
+        graph.vertices().end(),
+        static_cast<v_commw_t<Graph_t>>(0),
+        [&](const v_commw_t<Graph_t> sum, const vertex_idx_t<Graph_t> &v) { return sum + graph.vertex_comm_weight(v); });
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 v_commw_t<Graph_t> sumOfVerticesCommunicationWeights(const std::initializer_list<vertex_idx_t<Graph_t>> &vertices_,
                                                      const Graph_t &graph) {
     return sumOfVerticesCommunicationWeights(vertices_.begin(), vertices_.end(), graph);
 }
 
-template<typename EdgeIterator, typename Graph_t>
+template <typename EdgeIterator, typename Graph_t>
 e_commw_t<Graph_t> sumOfEdgesCommunicationWeights(EdgeIterator begin, EdgeIterator end, const Graph_t &graph) {
-
     static_assert(has_edge_weights_v<Graph_t>, "Graph_t must have edge weights");
     return std::accumulate(
         begin, end, 0, [&](const auto sum, const edge_desc_t<Graph_t> &e) { return sum + graph.edge_comm_weight(e); });
 }
 
-template<typename Graph_t>
-e_commw_t<Graph_t> sumOfEdgesCommunicationWeights(const std::initializer_list<edge_desc_t<Graph_t>> &edges_,
-                                                  const Graph_t &graph) {
+template <typename Graph_t>
+e_commw_t<Graph_t> sumOfEdgesCommunicationWeights(const std::initializer_list<edge_desc_t<Graph_t>> &edges_, const Graph_t &graph) {
     return sumOfEdgesCommunicationWeights(edges_.begin(), edges_.end(), graph);
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 v_workw_t<Graph_t> critical_path_weight(const Graph_t &graph) {
-
     static_assert(is_directed_graph_edge_desc_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     static_assert(has_vertex_weights_v<Graph_t>, "Graph_t must have vertex weights");
 
@@ -160,7 +155,6 @@ v_workw_t<Graph_t> critical_path_weight(const Graph_t &graph) {
 
     // calculating lenght of longest path
     for (const auto &node : GetTopOrder(graph)) {
-
         v_workw_t<Graph_t> max_temp = 0;
         for (const auto &parent : graph.parents(node)) {
             max_temp = std::max(max_temp, top_length[parent]);
@@ -169,7 +163,6 @@ v_workw_t<Graph_t> critical_path_weight(const Graph_t &graph) {
         top_length[node] = max_temp + graph.vertex_work_weight(node);
 
         if (top_length[node] > critical_path_weight) {
-
             critical_path_weight = top_length[node];
         }
     }
@@ -177,4 +170,4 @@ v_workw_t<Graph_t> critical_path_weight(const Graph_t &graph) {
     return critical_path_weight;
 }
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/cuthill_mckee.hpp b/include/osp/graph_algorithms/cuthill_mckee.hpp
index f99fc19a..848330d4 100644
--- a/include/osp/graph_algorithms/cuthill_mckee.hpp
+++ b/include/osp/graph_algorithms/cuthill_mckee.hpp
@@ -23,13 +23,13 @@ limitations under the License.
 #include <vector>
 
 #include "osp/concepts/computational_dag_concept.hpp"
-#include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/graph_algorithms/directed_graph_path_util.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
+#include "osp/graph_algorithms/directed_graph_util.hpp"
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct cm_vertex {
     using VertexType = vertex_idx_t<Graph_t>;
     VertexType vertex;
@@ -39,19 +39,18 @@ struct cm_vertex {
     VertexType degree;
 
     cm_vertex() : vertex(0), parent_position(0), degree(0) {}
+
     cm_vertex(VertexType vertex_, VertexType degree_, VertexType parent_position_)
         : vertex(vertex_), parent_position(parent_position_), degree(degree_) {}
 
     bool operator<(cm_vertex const &rhs) const {
-        return (parent_position < rhs.parent_position) ||
-               (parent_position == rhs.parent_position and degree < rhs.degree) ||
-               (parent_position == rhs.parent_position and degree == rhs.degree and vertex < rhs.vertex);
+        return (parent_position < rhs.parent_position) || (parent_position == rhs.parent_position and degree < rhs.degree)
+               || (parent_position == rhs.parent_position and degree == rhs.degree and vertex < rhs.vertex);
     }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_wavefront(const Graph_t &dag, bool permutation = false) {
-
     using VertexType = vertex_idx_t<Graph_t>;
     using cm_vertex = cm_vertex<Graph_t>;
 
@@ -67,29 +66,25 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_wavefront(const Graph_t &dag, b
     std::vector<cm_vertex> new_wavefront;
     VertexType node_counter = 0;
     while (node_counter < dag.num_vertices()) {
-
         new_wavefront.clear();
         std::sort(current_wavefront.begin(), current_wavefront.end());
 
         if (permutation) {
             for (VertexType i = 0; i < static_cast<VertexType>(current_wavefront.size()); i++) {
-
                 result[current_wavefront[i].vertex] = node_counter + i;
             }
         } else {
             for (size_t i = 0; i < current_wavefront.size(); i++) {
-
                 result[node_counter + i] = current_wavefront[i].vertex;
             }
         }
 
-        if (node_counter + static_cast<VertexType>(current_wavefront.size()) == dag.num_vertices())
+        if (node_counter + static_cast<VertexType>(current_wavefront.size()) == dag.num_vertices()) {
             break;
+        }
 
         for (VertexType i = 0; i < static_cast<VertexType>(current_wavefront.size()); i++) {
-
             for (const auto &child : dag.children(current_wavefront[i].vertex)) {
-
                 predecessors_count[child]++;
                 predecessors_position[child] = std::min(predecessors_position[child], node_counter + i);
 
@@ -107,10 +102,8 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_wavefront(const Graph_t &dag, b
     return result;
 }
 
-
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag, bool start_at_sink, bool perm = false) {
-
     using VertexType = vertex_idx_t<Graph_t>;
     using cm_vertex = cm_vertex<Graph_t>;
 
@@ -125,7 +118,6 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag,
         const std::vector<unsigned> top_node_distance = get_top_node_distance(dag);
         for (const auto &i : dag.vertices()) {
             if (is_sink(i, dag)) {
-
                 max_node_distances[i] = top_node_distance[i];
 
                 if (top_node_distance[i] > max_distance) {
@@ -139,7 +131,6 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag,
         const std::vector<unsigned> bottom_node_distance = get_bottom_node_distance(dag);
         for (const auto &i : dag.vertices()) {
             if (is_source(i, dag)) {
-
                 max_node_distances[i] = bottom_node_distance[i];
 
                 if (bottom_node_distance[i] > max_distance) {
@@ -174,7 +165,6 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag,
 
     VertexType node_counter = 1;
     while (node_counter < dag.num_vertices()) {
-
         std::sort(current_level.begin(), current_level.end());
 
         if (perm) {
@@ -191,14 +181,11 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag,
             break;
         }
 
-        std::unordered_map<VertexType , VertexType> node_priority;
+        std::unordered_map<VertexType, VertexType> node_priority;
 
         for (VertexType i = 0; i < current_level.size(); i++) {
-
             for (const auto &child : dag.children(current_level[i].vertex)) {
-
                 if (visited.find(child) == visited.end()) {
-
                     if (node_priority.find(child) == node_priority.end()) {
                         node_priority[child] = node_counter + i;
                     } else {
@@ -208,7 +195,6 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag,
             }
 
             for (const auto &parent : dag.parents(current_level[i].vertex)) {
-
                 if (visited.find(parent) == visited.end()) {
                     if (node_priority.find(parent) == node_priority.end()) {
                         node_priority[parent] = node_counter + i;
@@ -221,11 +207,10 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag,
 
         node_counter += current_level.size();
 
-        if (node_priority.empty()) { // the dag has more than one connected components
+        if (node_priority.empty()) {    // the dag has more than one connected components
 
             unsigned max_distance = 0;
             for (const auto [node, distance] : max_node_distances) {
-
                 if (visited.find(node) == visited.end() and distance > max_distance) {
                     max_distance = distance;
                     first_node = node;
@@ -243,30 +228,23 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag,
             current_level.reserve(dag.in_degree(first_node) + dag.out_degree(first_node));
 
             for (const auto &child : dag.children(first_node)) {
-
-                current_level.push_back(
-                    cm_vertex(child, dag.in_degree(child) + dag.out_degree(child), node_counter));
+                current_level.push_back(cm_vertex(child, dag.in_degree(child) + dag.out_degree(child), node_counter));
                 visited.insert(child);
             }
 
             for (const auto &parent : dag.parents(first_node)) {
-
-                current_level.push_back(
-                    cm_vertex(parent, dag.in_degree(parent) + dag.out_degree(parent), node_counter));
+                current_level.push_back(cm_vertex(parent, dag.in_degree(parent) + dag.out_degree(parent), node_counter));
                 visited.insert(parent);
             }
 
             node_counter++;
 
         } else {
-
             current_level.clear();
             current_level.reserve(node_priority.size());
 
             for (const auto &[node, priority] : node_priority) {
-
-                current_level.push_back(
-                    cm_vertex(node, dag.in_degree(node) + dag.out_degree(node), priority));
+                current_level.push_back(cm_vertex(node, dag.in_degree(node) + dag.out_degree(node), priority));
                 visited.insert(node);
             }
         }
@@ -276,7 +254,7 @@ std::vector<vertex_idx_t<Graph_t>> cuthill_mckee_undirected(const Graph_t &dag,
 }
 
 // Cuthill-McKee Wavefront
-template<typename Graph_t>
+template <typename Graph_t>
 inline std::vector<vertex_idx_t<Graph_t>> GetTopOrderCuthillMcKeeWavefront(const Graph_t &dag) {
     std::vector<vertex_idx_t<Graph_t>> order;
     if (dag.num_vertices() > 0) {
@@ -290,7 +268,7 @@ inline std::vector<vertex_idx_t<Graph_t>> GetTopOrderCuthillMcKeeWavefront(const
 }
 
 // Cuthill-McKee Undirected
-template<typename Graph_t>
+template <typename Graph_t>
 inline std::vector<vertex_idx_t<Graph_t>> GetTopOrderCuthillMcKeeUndirected(const Graph_t &dag) {
     std::vector<vertex_idx_t<Graph_t>> order;
     if (dag.num_vertices() > 0) {
@@ -303,5 +281,4 @@ inline std::vector<vertex_idx_t<Graph_t>> GetTopOrderCuthillMcKeeUndirected(cons
     return order;
 }
 
-
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp b/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp
index c8eb6975..845cc27d 100644
--- a/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp
+++ b/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp
@@ -25,17 +25,16 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "osp/auxiliary/Balanced_Coin_Flips.hpp"
-#include "osp/concepts/directed_graph_concept.hpp"
 #include "directed_graph_top_sort.hpp"
 #include "directed_graph_util.hpp"
+#include "osp/auxiliary/Balanced_Coin_Flips.hpp"
+#include "osp/concepts/directed_graph_concept.hpp"
 
 namespace osp {
 
-
-
-template<typename Graph_t>
-std::vector<edge_desc_t<Graph_t>> get_contractable_edges_from_poset_int_map(const std::vector<int> &poset_int_map, const Graph_t &graph) {
+template <typename Graph_t>
+std::vector<edge_desc_t<Graph_t>> get_contractable_edges_from_poset_int_map(const std::vector<int> &poset_int_map,
+                                                                            const Graph_t &graph) {
     static_assert(is_directed_graph_edge_desc_v<Graph_t>, "Graph_t must satisfy the directed_graph_edge_desc concept");
 
     std::vector<edge_desc_t<Graph_t>> output;
@@ -43,13 +42,13 @@ std::vector<edge_desc_t<Graph_t>> get_contractable_edges_from_poset_int_map(cons
     for (const auto &edge : edges(graph)) {
         vertex_idx_t<Graph_t> src = source(edge, graph);
         vertex_idx_t<Graph_t> tgt = target(edge, graph);
-        
+
         if (poset_int_map[tgt] == poset_int_map[src] + 1) {
             output.emplace_back(edge);
         }
     }
-    
+
     return output;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp b/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp
index 286fcc59..ff0ff192 100644
--- a/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp
+++ b/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp
@@ -22,14 +22,14 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "osp/concepts/directed_graph_concept.hpp"
+#include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 
 namespace osp {
 
-template<typename Graph_t>
-std::pair<edge_desc_t<Graph_t>, bool> edge_desc(const vertex_idx_t<Graph_t> &src, const vertex_idx_t<Graph_t> &dest,
+template <typename Graph_t>
+std::pair<edge_desc_t<Graph_t>, bool> edge_desc(const vertex_idx_t<Graph_t> &src,
+                                                const vertex_idx_t<Graph_t> &dest,
                                                 const Graph_t &graph) {
-
     static_assert(is_directed_graph_edge_desc_v<Graph_t>, "Graph_t must satisfy the directed_graph edge desc concept");
 
     for (const auto &edge : out_edges(src, graph)) {
@@ -40,16 +40,14 @@ std::pair<edge_desc_t<Graph_t>, bool> edge_desc(const vertex_idx_t<Graph_t> &src
     return {edge_desc_t<Graph_t>(), false};
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::unordered_set<edge_desc_t<Graph_t>> long_edges_in_triangles(const Graph_t &graph) {
-
     static_assert(is_directed_graph_edge_desc_v<Graph_t>, "Graph_t must satisfy the directed_graph edge desc concept");
     static_assert(has_hashable_edge_desc_v<Graph_t>, "Graph_t must satisfy the has_hashable_edge_desc concept");
 
     std::unordered_set<edge_desc_t<Graph_t>> long_edges;
 
     for (const auto &vertex : graph.vertices()) {
-
         std::unordered_set<vertex_idx_t<Graph_t>> children_set;
 
         for (const auto &v : graph.children(vertex)) {
@@ -57,11 +55,9 @@ std::unordered_set<edge_desc_t<Graph_t>> long_edges_in_triangles(const Graph_t &
         }
 
         for (const auto &edge : out_edges(vertex, graph)) {
-
             const auto &child = target(edge, graph);
 
             for (const auto &parent : graph.parents(child)) {
-
                 if (children_set.find(parent) != children_set.cend()) {
                     long_edges.emplace(edge);
                     break;
@@ -73,4 +69,4 @@ std::unordered_set<edge_desc_t<Graph_t>> long_edges_in_triangles(const Graph_t &
     return long_edges;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp b/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp
index b05e54c4..af73869b 100644
--- a/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp
+++ b/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp
@@ -19,18 +19,18 @@ limitations under the License.
 #pragma once
 
 #include <omp.h>
+
 #include <queue>
 #include <unordered_set>
 #include <vector>
 
-#include "osp/concepts/directed_graph_concept.hpp"
 #include "directed_graph_edge_desc_util.hpp"
+#include "osp/concepts/directed_graph_concept.hpp"
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::unordered_set<edge_desc_t<Graph_t>> long_edges_in_triangles_parallel(const Graph_t &graph) {
-
     static_assert(is_directed_graph_edge_desc_v<Graph_t>, "Graph_t must satisfy the directed_graph edge desc concept");
     static_assert(has_hashable_edge_desc_v<Graph_t>, "Graph_t must satisfy the has_hashable_edge_desc concept");
 
@@ -43,7 +43,7 @@ std::unordered_set<edge_desc_t<Graph_t>> long_edges_in_triangles_parallel(const
 
 #pragma omp parallel for schedule(dynamic, 4)
     for (vertex_idx_t<Graph_t> vertex = 0; vertex < graph.num_vertices(); ++vertex) {
-    // for (const auto &vertex : graph.vertices()) {
+        // for (const auto &vertex : graph.vertices()) {
 
         const unsigned int proc = static_cast<unsigned>(omp_get_thread_num());
 
@@ -53,11 +53,9 @@ std::unordered_set<edge_desc_t<Graph_t>> long_edges_in_triangles_parallel(const
         }
 
         for (const auto &edge : out_edges(vertex, graph)) {
-
             const auto &child = target(edge, graph);
 
             for (const auto &parent : graph.parents(child)) {
-
                 if (children_set.find(parent) != children_set.cend()) {
                     deleted_edges_thread[proc].emplace_back(edge);
                     break;
@@ -75,4 +73,4 @@ std::unordered_set<edge_desc_t<Graph_t>> long_edges_in_triangles_parallel(const
     return long_edges;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/directed_graph_edge_view.hpp b/include/osp/graph_algorithms/directed_graph_edge_view.hpp
index cf2829a5..e66cfa71 100644
--- a/include/osp/graph_algorithms/directed_graph_edge_view.hpp
+++ b/include/osp/graph_algorithms/directed_graph_edge_view.hpp
@@ -17,9 +17,10 @@ limitations under the License.
 */
 
 #pragma once
-#include "osp/concepts/directed_graph_concept.hpp"
 #include <type_traits>
 
+#include "osp/concepts/directed_graph_concept.hpp"
+
 namespace osp {
 
 /**
@@ -31,14 +32,14 @@ namespace osp {
  *
  * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph_v` concept.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class edge_view {
   private:
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph_;
 
-    template<typename child_iterator_t>
+    template <typename child_iterator_t>
     class DirectedEdgeIterator {
       public:
         using iterator_category = std::forward_iterator_tag;
@@ -49,14 +50,15 @@ class edge_view {
 
         struct arrow_proxy {
             value_type value;
+
             const value_type *operator->() const noexcept { return &value; }
         };
 
       private:
-        const Graph_t *graph_;                 // Pointer to the graph
-        vertex_idx_t<Graph_t> currentVertex_;  // Current source vertex
-        child_iterator_t currentChild_;        // Iterator to the current target vertex in current_vertex's adjacency list
-        vertex_idx_t<Graph_t> currentEdgeIdx_; // Global index of the current edge in the traversal order
+        const Graph_t *graph_;                    // Pointer to the graph
+        vertex_idx_t<Graph_t> currentVertex_;     // Current source vertex
+        child_iterator_t currentChild_;           // Iterator to the current target vertex in current_vertex's adjacency list
+        vertex_idx_t<Graph_t> currentEdgeIdx_;    // Global index of the current edge in the traversal order
 
         void advanceToValid() {
             while (currentVertex_ != graph_->num_vertices()) {
@@ -83,7 +85,6 @@ class edge_view {
 
         DirectedEdgeIterator(const vertex_idx_t<Graph_t> edge_idx, const Graph_t &graph)
             : graph_(&graph), currentVertex_(0), currentEdgeIdx_(edge_idx) {
-
             if (currentEdgeIdx_ >= graph_->num_edges()) {
                 currentEdgeIdx_ = graph_->num_edges();
                 currentVertex_ = graph_->num_vertices();
@@ -110,6 +111,7 @@ class edge_view {
         }
 
         [[nodiscard]] value_type operator*() const { return {currentVertex_, *currentChild_}; }
+
         [[nodiscard]] arrow_proxy operator->() const { return {operator*()}; }
 
         DirectedEdgeIterator &operator++() {
@@ -137,16 +139,19 @@ class edge_view {
     };
 
   public:
-    using DirEdgeIterator = DirectedEdgeIterator<decltype(std::declval<Graph_t>().children(std::declval<vertex_idx_t<Graph_t>>()).begin())>;
+    using DirEdgeIterator
+        = DirectedEdgeIterator<decltype(std::declval<Graph_t>().children(std::declval<vertex_idx_t<Graph_t>>()).begin())>;
     using iterator = DirEdgeIterator;
     using constIterator = DirEdgeIterator;
 
     explicit edge_view(const Graph_t &graph) : graph_(graph) {}
 
     [[nodiscard]] auto begin() const { return DirEdgeIterator(graph_); }
+
     [[nodiscard]] auto cbegin() const { return DirEdgeIterator(graph_); }
 
     [[nodiscard]] auto end() const { return DirEdgeIterator(graph_.num_edges(), graph_); }
+
     [[nodiscard]] auto cend() const { return DirEdgeIterator(graph_.num_edges(), graph_); }
 
     [[nodiscard]] auto size() const { return graph_.num_edges(); }
@@ -163,7 +168,7 @@ class edge_view {
  * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph_v` concept.
  * @tparam IsOutgoing If true, iterates over outgoing edges; otherwise, incoming edges.
  */
-template<typename Graph_t, bool IsOutgoing>
+template <typename Graph_t, bool IsOutgoing>
 class IncidentEdgeView {
   private:
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
@@ -171,7 +176,7 @@ class IncidentEdgeView {
     const Graph_t &graph_;
     vertex_idx_t<Graph_t> anchorVertex_;
 
-    template<typename child_iterator_t>
+    template <typename child_iterator_t>
     class IncidentEdgeIterator {
       public:
         using iterator_category = typename std::iterator_traits<child_iterator_t>::iterator_category;
@@ -182,6 +187,7 @@ class IncidentEdgeView {
 
         struct arrow_proxy {
             value_type value;
+
             const value_type *operator->() const noexcept { return &value; }
         };
 
@@ -191,6 +197,7 @@ class IncidentEdgeView {
 
       public:
         IncidentEdgeIterator() = default;
+
         IncidentEdgeIterator(vertex_idx_t<Graph_t> u, child_iterator_t it) : anchorVertex_(u), currentIt_(it) {}
 
         [[nodiscard]] value_type operator*() const {
@@ -200,6 +207,7 @@ class IncidentEdgeView {
                 return {*currentIt_, anchorVertex_};
             }
         }
+
         [[nodiscard]] arrow_proxy operator->() const { return {operator*()}; }
 
         IncidentEdgeIterator &operator++() {
@@ -224,17 +232,16 @@ class IncidentEdgeView {
             return temp;
         }
 
-        [[nodiscard]] bool operator==(const IncidentEdgeIterator &other) const noexcept {
-            return currentIt_ == other.currentIt_;
-        }
+        [[nodiscard]] bool operator==(const IncidentEdgeIterator &other) const noexcept { return currentIt_ == other.currentIt_; }
 
         [[nodiscard]] bool operator!=(const IncidentEdgeIterator &other) const noexcept { return !(*this == other); }
     };
 
     // Helper to deduce iterator type based on direction
-    using base_iterator_type =
-        std::conditional_t<IsOutgoing, decltype(std::declval<Graph_t>().children(std::declval<vertex_idx_t<Graph_t>>()).begin()),
-                           decltype(std::declval<Graph_t>().parents(std::declval<vertex_idx_t<Graph_t>>()).begin())>;
+    using base_iterator_type
+        = std::conditional_t<IsOutgoing,
+                             decltype(std::declval<Graph_t>().children(std::declval<vertex_idx_t<Graph_t>>()).begin()),
+                             decltype(std::declval<Graph_t>().parents(std::declval<vertex_idx_t<Graph_t>>()).begin())>;
 
   public:
     using iterator = IncidentEdgeIterator<base_iterator_type>;
@@ -249,6 +256,7 @@ class IncidentEdgeView {
             return iterator(anchorVertex_, graph_.parents(anchorVertex_).begin());
         }
     }
+
     [[nodiscard]] auto cbegin() const { return begin(); }
 
     [[nodiscard]] auto end() const {
@@ -258,6 +266,7 @@ class IncidentEdgeView {
             return iterator(anchorVertex_, graph_.parents(anchorVertex_).end());
         }
     }
+
     [[nodiscard]] auto cend() const { return end(); }
 
     [[nodiscard]] auto size() const {
@@ -267,6 +276,7 @@ class IncidentEdgeView {
             return graph_.in_degree(anchorVertex_);
         }
     }
+
     [[nodiscard]] bool empty() const {
         if constexpr (IsOutgoing) {
             return graph_.out_degree(anchorVertex_) == 0;
@@ -279,13 +289,13 @@ class IncidentEdgeView {
 /**
  * @brief A view over the outgoing edges of a specific vertex in a directed graph.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 using OutEdgeView = IncidentEdgeView<Graph_t, true>;
 
 /**
  * @brief A view over the incoming edges of a specific vertex in a directed graph.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 using InEdgeView = IncidentEdgeView<Graph_t, false>;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/directed_graph_path_util.hpp b/include/osp/graph_algorithms/directed_graph_path_util.hpp
index ee6e972d..a1675964 100644
--- a/include/osp/graph_algorithms/directed_graph_path_util.hpp
+++ b/include/osp/graph_algorithms/directed_graph_path_util.hpp
@@ -26,12 +26,13 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "osp/auxiliary/Balanced_Coin_Flips.hpp"
-#include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 #include "directed_graph_top_sort.hpp"
 #include "directed_graph_util.hpp"
+#include "osp/auxiliary/Balanced_Coin_Flips.hpp"
+#include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 
 namespace osp {
+
 /**
  * @brief Checks if a path exists between two vertices in a directed graph.
  *
@@ -44,9 +45,8 @@ namespace osp {
  * @param graph The graph to search in.
  * @return true if a path exists from src to dest, false otherwise.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 bool has_path(const vertex_idx_t<Graph_t> src, const vertex_idx_t<Graph_t> dest, const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     for (const auto &child : bfs_view(graph, src)) {
@@ -58,9 +58,8 @@ bool has_path(const vertex_idx_t<Graph_t> src, const vertex_idx_t<Graph_t> dest,
     return false;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::size_t longestPath(const std::set<vertex_idx_t<Graph_t>> &vertices, const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     using VertexType = vertex_idx_t<Graph_t>;
@@ -71,9 +70,11 @@ std::size_t longestPath(const std::set<vertex_idx_t<Graph_t>> &vertices, const G
     // Find source nodes
     for (const VertexType &node : vertices) {
         unsigned indeg = 0;
-        for (const VertexType &parent : graph.parents(node))
-            if (vertices.count(parent) == 1)
+        for (const VertexType &parent : graph.parents(node)) {
+            if (vertices.count(parent) == 1) {
                 ++indeg;
+            }
+        }
 
         if (indeg == 0) {
             bfs_queue.push(node);
@@ -89,8 +90,9 @@ std::size_t longestPath(const std::set<vertex_idx_t<Graph_t>> &vertices, const G
         bfs_queue.pop();
 
         for (const VertexType &child : graph.children(current)) {
-            if (vertices.count(child) == 0)
+            if (vertices.count(child) == 0) {
                 continue;
+            }
 
             ++visit_counter[child];
             if (visit_counter[child] == in_degrees[child]) {
@@ -100,13 +102,13 @@ std::size_t longestPath(const std::set<vertex_idx_t<Graph_t>> &vertices, const G
         }
     }
 
-    return std::accumulate(vertices.cbegin(), vertices.cend(), 0u,
-                           [&](const std::size_t mx, const VertexType &node) { return std::max(mx, distances[node]); });
+    return std::accumulate(vertices.cbegin(), vertices.cend(), 0u, [&](const std::size_t mx, const VertexType &node) {
+        return std::max(mx, distances[node]);
+    });
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::size_t longestPath(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     using VertexType = vertex_idx_t<Graph_t>;
@@ -126,7 +128,6 @@ std::size_t longestPath(const Graph_t &graph) {
         bfs_queue.pop();
 
         for (const VertexType &child : graph.children(current)) {
-
             ++visit_counter[child];
             if (visit_counter[child] == graph.in_degree(child)) {
                 bfs_queue.push(child);
@@ -139,9 +140,8 @@ std::size_t longestPath(const Graph_t &graph) {
     return max_edgecount;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> longestChain(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     using VertexType = vertex_idx_t<Graph_t>;
@@ -159,7 +159,6 @@ std::vector<vertex_idx_t<Graph_t>> longestChain(const Graph_t &graph) {
 
     // calculating lenght of longest path
     for (const VertexType &node : top_sort_view(graph)) {
-
         unsigned max_temp = 0;
         for (const auto &parent : graph.parents(node)) {
             max_temp = std::max(max_temp, top_length[parent]);
@@ -175,7 +174,6 @@ std::vector<vertex_idx_t<Graph_t>> longestChain(const Graph_t &graph) {
     // reconstructing longest path
     chain.push_back(end_longest_chain);
     while (graph.in_degree(end_longest_chain) != 0) {
-
         for (const VertexType &in_node : graph.parents(end_longest_chain)) {
             if (top_length[in_node] != top_length[end_longest_chain] - 1) {
                 continue;
@@ -191,7 +189,7 @@ std::vector<vertex_idx_t<Graph_t>> longestChain(const Graph_t &graph) {
     return chain;
 }
 
-template<typename Graph_t, typename T = unsigned>
+template <typename Graph_t, typename T = unsigned>
 std::vector<T> get_bottom_node_distance(const Graph_t &graph) {
     static_assert(std::is_integral_v<T>, "T must be of integral type");
 
@@ -201,7 +199,6 @@ std::vector<T> get_bottom_node_distance(const Graph_t &graph) {
 
     const auto top_order = GetTopOrder(graph);
     for (std::size_t i = top_order.size() - 1; i < top_order.size(); i--) {
-
         T max_temp = 0;
         for (const auto &j : graph.children(top_order[i])) {
             max_temp = std::max(max_temp, bottom_distance[j]);
@@ -211,7 +208,7 @@ std::vector<T> get_bottom_node_distance(const Graph_t &graph) {
     return bottom_distance;
 }
 
-template<typename Graph_t, typename T = unsigned>
+template <typename Graph_t, typename T = unsigned>
 std::vector<T> get_top_node_distance(const Graph_t &graph) {
     static_assert(std::is_integral_v<T>, "T must be of integral type");
 
@@ -229,9 +226,8 @@ std::vector<T> get_top_node_distance(const Graph_t &graph) {
     return top_distance;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<std::vector<vertex_idx_t<Graph_t>>> compute_wavefronts(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     std::vector<std::vector<vertex_idx_t<Graph_t>>> wavefronts;
@@ -239,7 +235,6 @@ std::vector<std::vector<vertex_idx_t<Graph_t>>> compute_wavefronts(const Graph_t
 
     wavefronts.push_back(std::vector<vertex_idx_t<Graph_t>>());
     for (const auto &vertex : graph.vertices()) {
-
         if (graph.in_degree(vertex) == 0) {
             wavefronts.back().push_back(vertex);
         } else {
@@ -250,12 +245,9 @@ std::vector<std::vector<vertex_idx_t<Graph_t>>> compute_wavefronts(const Graph_t
     vertex_idx_t<Graph_t> counter = static_cast<vertex_idx_t<Graph_t>>(wavefronts.back().size());
 
     while (counter < graph.num_vertices()) {
-
         std::vector<vertex_idx_t<Graph_t>> next_wavefront;
         for (const auto &v_prev_wavefront : wavefronts.back()) {
-
             for (const auto &child : graph.children(v_prev_wavefront)) {
-
                 parents_visited[child]--;
                 if (parents_visited[child] == 0) {
                     next_wavefront.push_back(child);
@@ -270,9 +262,8 @@ std::vector<std::vector<vertex_idx_t<Graph_t>>> compute_wavefronts(const Graph_t
     return wavefronts;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<int> get_strict_poset_integer_map(unsigned const noise, double const poisson_param, const Graph_t &graph) {
-
     static_assert(is_directed_graph_edge_desc_v<Graph_t>, "Graph_t must satisfy the directed_graph_edge_desc concept");
 
     if (noise > static_cast<unsigned>(std::numeric_limits<int>::max())) {
@@ -307,32 +298,27 @@ std::vector<int> get_strict_poset_integer_map(unsigned const noise, double const
     }
 
     for (const auto &source : source_vertices_view(graph)) {
-
-        if (max_path - bot_distance[source] + 1U + 2U * noise >
-            static_cast<unsigned>(std::numeric_limits<int>::max())) {
+        if (max_path - bot_distance[source] + 1U + 2U * noise > static_cast<unsigned>(std::numeric_limits<int>::max())) {
             throw std::overflow_error("Overflow in get_strict_poset_integer_map");
         }
-        new_top[source] =
-            randInt(static_cast<int>(max_path - bot_distance[source] + 1 + 2 * noise)) - static_cast<int>(noise);
+        new_top[source] = randInt(static_cast<int>(max_path - bot_distance[source] + 1 + 2 * noise)) - static_cast<int>(noise);
     }
 
     for (const auto &sink : sink_vertices_view(graph)) {
         if (max_path - top_distance[sink] + 1U + 2U * noise > static_cast<unsigned>(std::numeric_limits<int>::max())) {
             throw std::overflow_error("Overflow in get_strict_poset_integer_map");
         }
-        new_bot[sink] =
-            randInt(static_cast<int>(max_path - top_distance[sink] + 1U + 2U * noise)) - static_cast<int>(noise);
+        new_bot[sink] = randInt(static_cast<int>(max_path - top_distance[sink] + 1U + 2U * noise)) - static_cast<int>(noise);
     }
 
     for (const auto &vertex : top_order) {
-
-        if (is_source(vertex, graph))
+        if (is_source(vertex, graph)) {
             continue;
+        }
 
         int max_temp = std::numeric_limits<int>::min();
 
         for (const auto &edge : in_edges(vertex, graph)) {
-
             int temp = new_top[source(edge, graph)];
             if (up_or_down.at(edge)) {
                 if (poisson_param <= 0.0) {
@@ -347,9 +333,9 @@ std::vector<int> get_strict_poset_integer_map(unsigned const noise, double const
     }
 
     for (std::reverse_iterator iter = top_order.crbegin(); iter != top_order.crend(); ++iter) {
-
-        if (is_sink(*iter, graph))
+        if (is_sink(*iter, graph)) {
             continue;
+        }
 
         int max_temp = std::numeric_limits<int>::min();
 
@@ -370,4 +356,4 @@ std::vector<int> get_strict_poset_integer_map(unsigned const noise, double const
     return output;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/directed_graph_top_sort.hpp b/include/osp/graph_algorithms/directed_graph_top_sort.hpp
index 492bc37e..f8d24124 100644
--- a/include/osp/graph_algorithms/directed_graph_top_sort.hpp
+++ b/include/osp/graph_algorithms/directed_graph_top_sort.hpp
@@ -23,10 +23,10 @@ limitations under the License.
 #include <random>
 #include <vector>
 
+#include "directed_graph_util.hpp"
 #include "osp/auxiliary/math/math_helper.hpp"
 #include "osp/auxiliary/misc.hpp"
 #include "osp/concepts/directed_graph_concept.hpp"
-#include "directed_graph_util.hpp"
 
 /**
  * @file directed_graph_top_sort.hpp
@@ -55,9 +55,8 @@ namespace osp {
  * @param graph The graph to check.
  * @return true if the vertices are in topological order, false otherwise.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 bool checkNodesInTopologicalOrder(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     for (const auto &node : graph.vertices()) {
@@ -71,17 +70,14 @@ bool checkNodesInTopologicalOrder(const Graph_t &graph) {
     return true;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> GetTopOrder(const Graph_t &graph) {
-
     if constexpr (has_vertices_in_top_order_v<Graph_t>) {
-
         std::vector<vertex_idx_t<Graph_t>> topOrd(graph.num_vertices());
         std::iota(topOrd.begin(), topOrd.end(), static_cast<vertex_idx_t<Graph_t>>(0));
         return topOrd;
 
     } else {
-
         using VertexType = vertex_idx_t<Graph_t>;
 
         std::vector<VertexType> predecessors_count(graph.num_vertices(), 0);
@@ -91,8 +87,9 @@ std::vector<vertex_idx_t<Graph_t>> GetTopOrder(const Graph_t &graph) {
         std::queue<VertexType> next;
 
         // Find source nodes
-        for (const VertexType &v : source_vertices_view(graph))
+        for (const VertexType &v : source_vertices_view(graph)) {
             next.push(v);
+        }
 
         // Execute BFS
         while (!next.empty()) {
@@ -102,31 +99,30 @@ std::vector<vertex_idx_t<Graph_t>> GetTopOrder(const Graph_t &graph) {
 
             for (const VertexType &current : graph.children(node)) {
                 ++predecessors_count[current];
-                if (predecessors_count[current] == graph.in_degree(current))
+                if (predecessors_count[current] == graph.in_degree(current)) {
                     next.push(current);
+                }
             }
         }
 
-        if (static_cast<VertexType>(TopOrder.size()) != graph.num_vertices())
-            throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" +
-                                     std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) +
-                                     "]");
+        if (static_cast<VertexType>(TopOrder.size()) != graph.num_vertices()) {
+            throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() ["
+                                     + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+        }
 
         return TopOrder;
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> GetTopOrderReverse(const Graph_t &graph) {
-
     std::vector<vertex_idx_t<Graph_t>> TopOrder = GetTopOrder(graph);
     std::reverse(TopOrder.begin(), TopOrder.end());
     return TopOrder;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> GetTopOrderGorder(const Graph_t &graph) {
-
     // Generating modified Gorder topological order cf. "Speedup Graph Processing by Graph Ordering" by Hao Wei, Jeffrey
     // Xu Yu, Can Lu, and Xuemin Lin
 
@@ -143,10 +139,9 @@ std::vector<vertex_idx_t<Graph_t>> GetTopOrderGorder(const Graph_t &graph) {
     std::vector<double> priorities(graph.num_vertices(), 0.0);
 
     auto v_cmp = [&priorities, &graph](const VertexType &lhs, const VertexType &rhs) {
-        return (priorities[lhs] < priorities[rhs]) ||
-               ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) < graph.out_degree(rhs))) ||
-               ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) == graph.out_degree(rhs)) &&
-                (lhs > rhs));
+        return (priorities[lhs] < priorities[rhs])
+               || ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) < graph.out_degree(rhs)))
+               || ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) == graph.out_degree(rhs)) && (lhs > rhs));
     };
 
     std::priority_queue<VertexType, std::vector<VertexType>, decltype(v_cmp)> ready_q(v_cmp);
@@ -187,22 +182,24 @@ std::vector<vertex_idx_t<Graph_t>> GetTopOrderGorder(const Graph_t &graph) {
         }
     }
 
-    if (TopOrder.size() != graph.num_vertices())
-        throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" +
-                                 std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+    if (TopOrder.size() != graph.num_vertices()) {
+        throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() ["
+                                 + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+    }
 
     return TopOrder;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> GetFilteredTopOrder(const std::vector<bool> &valid, const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     std::vector<vertex_idx_t<Graph_t>> filteredOrder;
-    for (const auto &node : GetTopOrder(graph))
-        if (valid[node])
+    for (const auto &node : GetTopOrder(graph)) {
+        if (valid[node]) {
             filteredOrder.push_back(node);
+        }
+    }
 
     return filteredOrder;
 }
@@ -216,26 +213,27 @@ std::vector<vertex_idx_t<Graph_t>> GetFilteredTopOrder(const std::vector<bool> &
  * @tparam T The type of the container wrapper.
  * @tparam Graph_t The type of the graph.
  */
-template<typename T, typename Graph_t>
+template <typename T, typename Graph_t>
 struct is_container_wrapper {
   private:
-    template<typename U>
+    template <typename U>
     static auto test(int) -> decltype(std::declval<U>().push(std::declval<vertex_idx_t<Graph_t>>()),
-                                      std::declval<U>().pop_next(), std::declval<U>().empty(), std::true_type());
+                                      std::declval<U>().pop_next(),
+                                      std::declval<U>().empty(),
+                                      std::true_type());
 
-    template<typename>
+    template <typename>
     static std::false_type test(...);
 
   public:
     static constexpr bool value = decltype(test<T>(0))::value;
 };
 
-template<typename T, typename Graph_t>
+template <typename T, typename Graph_t>
 inline constexpr bool is_container_wrapper_v = is_container_wrapper<T, Graph_t>::value;
 
-template<typename Graph_t, typename container_wrapper>
+template <typename Graph_t, typename container_wrapper>
 struct top_sort_iterator {
-
     static_assert(is_container_wrapper_v<container_wrapper, Graph_t>,
                   "container_wrapper must satisfy the container wrapper concept");
 
@@ -255,7 +253,6 @@ struct top_sort_iterator {
 
     top_sort_iterator(const Graph_t &graph_, container_wrapper &next_, vertex_idx_t<Graph_t> start)
         : graph(graph_), next(next_), current_vertex(start), predecessors_count(graph_.num_vertices(), 0) {
-
         if (current_vertex == graph.num_vertices()) {
             return;
         }
@@ -264,7 +261,7 @@ struct top_sort_iterator {
             if (is_source(v, graph)) {
                 next.push(v);
             } else {
-                predecessors_count[v] = static_cast<vertex_idx_t<Graph_t>>( graph.in_degree(v) );
+                predecessors_count[v] = static_cast<vertex_idx_t<Graph_t>>(graph.in_degree(v));
             }
         }
         current_vertex = next.pop_next();
@@ -281,7 +278,6 @@ struct top_sort_iterator {
 
     // Prefix increment
     top_sort_iterator &operator++() {
-
         if (next.empty()) {
             current_vertex = graph.num_vertices();
             return *this;
@@ -308,6 +304,7 @@ struct top_sort_iterator {
     friend bool operator==(const top_sort_iterator &one, const top_sort_iterator &other) {
         return one.current_vertex == other.current_vertex;
     };
+
     friend bool operator!=(const top_sort_iterator &one, const top_sort_iterator &other) {
         return one.current_vertex != other.current_vertex;
     };
@@ -325,9 +322,8 @@ struct top_sort_iterator {
  * @tparam Graph_t The type of the directed graph. Must satisfy the `is_directed_graph` concept.
  *
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class top_sort_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -365,9 +361,8 @@ class top_sort_view {
  * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph` concept.
  *
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class dfs_top_sort_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -393,9 +388,8 @@ class dfs_top_sort_view {
  * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph` concept.
  *
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class bfs_top_sort_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -411,9 +405,8 @@ class bfs_top_sort_view {
     auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> bfs_top_sort(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     std::vector<vertex_idx_t<Graph_t>> top_sort;
 
@@ -423,9 +416,8 @@ std::vector<vertex_idx_t<Graph_t>> bfs_top_sort(const Graph_t &graph) {
     return top_sort;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> dfs_top_sort(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     std::vector<vertex_idx_t<Graph_t>> top_sort;
 
@@ -435,18 +427,17 @@ std::vector<vertex_idx_t<Graph_t>> dfs_top_sort(const Graph_t &graph) {
     return top_sort;
 }
 
-template<typename Graph_t, typename priority_eval_f, typename T>
+template <typename Graph_t, typename priority_eval_f, typename T>
 struct priority_queue_wrapper {
-
     priority_eval_f prio_f;
 
     struct heap_node {
-
         vertex_idx_t<Graph_t> node;
 
         T priority;
 
         heap_node() : node(0), priority(0) {}
+
         heap_node(vertex_idx_t<Graph_t> n, T p) : node(n), priority(p) {}
 
         bool operator<(heap_node const &rhs) const {
@@ -457,7 +448,7 @@ struct priority_queue_wrapper {
     std::vector<heap_node> heap;
 
   public:
-    template<typename... Args>
+    template <typename... Args>
     priority_queue_wrapper(Args &&...args) : prio_f(std::forward<Args>(args)...) {}
 
     void push(const vertex_idx_t<Graph_t> &v) {
@@ -475,9 +466,8 @@ struct priority_queue_wrapper {
     bool empty() const { return heap.empty(); }
 };
 
-template<typename Graph_t, typename priority_eval_f, typename T>
+template <typename Graph_t, typename priority_eval_f, typename T>
 class priority_top_sort_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -487,7 +477,7 @@ class priority_top_sort_view {
     using ts_iterator = top_sort_iterator<Graph_t, container>;
 
   public:
-    template<typename... Args>
+    template <typename... Args>
     priority_top_sort_view(const Graph_t &graph_, Args &&...args)
         : graph(graph_), vertex_container(std::forward<Args>(args)...) {}
 
@@ -496,9 +486,8 @@ class priority_top_sort_view {
     auto end() const { return ts_iterator(graph, vertex_container, graph.num_vertices()); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 class locality_top_sort_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -519,9 +508,8 @@ class locality_top_sort_view {
     auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> GetTopOrderMinIndex(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     using VertexType = vertex_idx_t<Graph_t>;
@@ -533,22 +521,21 @@ std::vector<vertex_idx_t<Graph_t>> GetTopOrderMinIndex(const Graph_t &graph) {
         TopOrder.push_back(vert);
     }
 
-    if (TopOrder.size() != graph.num_vertices())
-        throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" +
-                                 std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+    if (TopOrder.size() != graph.num_vertices()) {
+        throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() ["
+                                 + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+    }
 
     return TopOrder;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 class max_children_top_sort_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
 
     struct max_children_eval_f {
-
         const Graph_t &graph;
 
         max_children_eval_f(const Graph_t &g) : graph(g) {}
@@ -558,8 +545,7 @@ class max_children_top_sort_view {
 
     priority_queue_wrapper<Graph_t, max_children_eval_f, vertex_idx_t<Graph_t>> vertex_container;
 
-    using ts_iterator =
-        top_sort_iterator<Graph_t, priority_queue_wrapper<Graph_t, max_children_eval_f, vertex_idx_t<Graph_t>>>;
+    using ts_iterator = top_sort_iterator<Graph_t, priority_queue_wrapper<Graph_t, max_children_eval_f, vertex_idx_t<Graph_t>>>;
 
   public:
     max_children_top_sort_view(const Graph_t &graph_) : graph(graph_), vertex_container(graph_) {}
@@ -569,9 +555,8 @@ class max_children_top_sort_view {
     auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> GetTopOrderMaxChildren(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     using VertexType = vertex_idx_t<Graph_t>;
@@ -583,26 +568,24 @@ std::vector<vertex_idx_t<Graph_t>> GetTopOrderMaxChildren(const Graph_t &graph)
         TopOrder.push_back(vert);
     }
 
-    if (TopOrder.size() != graph.num_vertices())
-        throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" +
-                                 std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+    if (TopOrder.size() != graph.num_vertices()) {
+        throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() ["
+                                 + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+    }
 
     return TopOrder;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 class random_top_sort_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
 
     struct random_eval_f {
-
         std::vector<vertex_idx_t<Graph_t>> priority;
 
         random_eval_f(const std::size_t num) : priority(num, 0) {
-
             std::iota(priority.begin(), priority.end(), 0);
             std::random_device rd;
             std::mt19937 g(rd());
@@ -614,8 +597,7 @@ class random_top_sort_view {
 
     priority_queue_wrapper<Graph_t, random_eval_f, vertex_idx_t<Graph_t>> vertex_container;
 
-    using ts_iterator =
-        top_sort_iterator<Graph_t, priority_queue_wrapper<Graph_t, random_eval_f, vertex_idx_t<Graph_t>>>;
+    using ts_iterator = top_sort_iterator<Graph_t, priority_queue_wrapper<Graph_t, random_eval_f, vertex_idx_t<Graph_t>>>;
 
   public:
     random_top_sort_view(const Graph_t &graph_) : graph(graph_), vertex_container(graph.num_vertices()) {}
@@ -625,9 +607,8 @@ class random_top_sort_view {
     auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> GetTopOrderRandom(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     using VertexType = vertex_idx_t<Graph_t>;
@@ -639,22 +620,21 @@ std::vector<vertex_idx_t<Graph_t>> GetTopOrderRandom(const Graph_t &graph) {
         TopOrder.push_back(vert);
     }
 
-    if (TopOrder.size() != graph.num_vertices())
-        throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" +
-                                 std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+    if (TopOrder.size() != graph.num_vertices()) {
+        throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() ["
+                                 + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]");
+    }
 
     return TopOrder;
 }
 
-template<typename Graph_t, typename prio_t>
+template <typename Graph_t, typename prio_t>
 class priority_vec_top_sort_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
 
     struct priority_eval_f {
-
         const std::vector<prio_t> &priority;
 
         priority_eval_f(const std::vector<prio_t> &p) : priority(p) {}
@@ -675,4 +655,4 @@ class priority_vec_top_sort_view {
     auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/directed_graph_util.hpp b/include/osp/graph_algorithms/directed_graph_util.hpp
index ffd6b482..8e373acd 100644
--- a/include/osp/graph_algorithms/directed_graph_util.hpp
+++ b/include/osp/graph_algorithms/directed_graph_util.hpp
@@ -18,6 +18,7 @@ limitations under the License.
 
 #pragma once
 
+#include <limits>
 #include <queue>
 #include <unordered_set>
 #include <vector>
@@ -45,9 +46,8 @@ namespace osp {
  * @param graph The graph to check.
  * @return true if there is an edge from src to dest, false otherwise.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 bool edge(const vertex_idx_t<Graph_t> &src, const vertex_idx_t<Graph_t> &dest, const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     for (const auto &child : graph.children(src)) {
         if (child == dest) {
@@ -65,7 +65,7 @@ bool edge(const vertex_idx_t<Graph_t> &src, const vertex_idx_t<Graph_t> &dest, c
  * @param graph The graph to check.
  * @return true if the vertex is a sink, false otherwise.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 bool is_sink(const vertex_idx_t<Graph_t> &v, const Graph_t &graph) {
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     return graph.out_degree(v) == 0u;
@@ -79,7 +79,7 @@ bool is_sink(const vertex_idx_t<Graph_t> &v, const Graph_t &graph) {
  * @param graph The graph to check.
  * @return true if the vertex is a source, false otherwise.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 bool is_source(const vertex_idx_t<Graph_t> &v, const Graph_t &graph) {
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     return graph.in_degree(v) == 0u;
@@ -92,9 +92,8 @@ bool is_source(const vertex_idx_t<Graph_t> &v, const Graph_t &graph) {
  * It is used to create views for source and sink vertices in a directed graph.
  *
  */
-template<typename cond_eval, typename Graph_t, typename iterator_t>
+template <typename cond_eval, typename Graph_t, typename iterator_t>
 struct vertex_cond_iterator {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     // TODO static_assert(is_callabl_v<cond_eval>;
 
@@ -110,7 +109,6 @@ struct vertex_cond_iterator {
     using reference = const value_type &;
 
     vertex_cond_iterator(const Graph_t &graph_, const iterator_t &start) : graph(graph_), current_vertex(start) {
-
         while (current_vertex != graph.vertices().end()) {
             // if (cond.eval(graph, *current_vertex)) {
             if (cond(graph, *current_vertex)) {
@@ -144,6 +142,7 @@ struct vertex_cond_iterator {
     }
 
     inline bool operator==(const vertex_cond_iterator &other) { return current_vertex == other.current_vertex; };
+
     inline bool operator!=(const vertex_cond_iterator &other) { return current_vertex != other.current_vertex; };
 };
 
@@ -153,12 +152,12 @@ struct vertex_cond_iterator {
  * These classes provide iterators to traverse the source and sink vertices
  * of a directed graph.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class source_vertices_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
+
     struct source_eval {
         // static bool eval(const Graph_t &graph, const vertex_idx_t<Graph_t> &v) { return graph.in_degree(v) == 0; }
         bool operator()(const Graph_t &graph, const vertex_idx_t<Graph_t> &v) const { return graph.in_degree(v) == 0; }
@@ -182,12 +181,12 @@ class source_vertices_view {
  * These classes provide iterators to traverse the source and sink vertices
  * of a directed graph.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class sink_vertices_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
+
     struct sink_eval {
         // static bool eval(const Graph_t &graph, const vertex_idx_t<Graph_t> &v) { return graph.out_degree(v) == 0; }
         bool operator()(const Graph_t &graph, const vertex_idx_t<Graph_t> &v) { return graph.out_degree(v) == 0; }
@@ -212,9 +211,8 @@ class sink_vertices_view {
  * @param graph The graph to check.
  * @return A vector containing the indices of the source vertices.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> source_vertices(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     std::vector<vertex_idx_t<Graph_t>> vec;
     for (const auto &source : source_vertices_view(graph)) {
@@ -230,9 +228,8 @@ std::vector<vertex_idx_t<Graph_t>> source_vertices(const Graph_t &graph) {
  * @param graph The graph to check.
  * @return A vector containing the indices of the sink vertices.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> sink_vertices(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     std::vector<vertex_idx_t<Graph_t>> vec;
 
@@ -249,9 +246,8 @@ std::vector<vertex_idx_t<Graph_t>> sink_vertices(const Graph_t &graph) {
  * It uses a container wrapper to manage the traversal order.
  * The adj_iterator can be used to setup the traversal along children or parents.
  */
-template<typename Graph_t, typename container_wrapper, typename adj_iterator>
+template <typename Graph_t, typename container_wrapper, typename adj_iterator>
 struct traversal_iterator {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -272,7 +268,6 @@ struct traversal_iterator {
 
     traversal_iterator(const Graph_t &graph_, const vertex_idx_t<Graph_t> &start)
         : graph(graph_), adj_iter(graph_), current_vertex(start) {
-
         if (graph.num_vertices() == start) {
             return;
         }
@@ -289,7 +284,6 @@ struct traversal_iterator {
 
     // Prefix increment
     traversal_iterator &operator++() {
-
         if (vertex_container.empty()) {
             current_vertex = graph.num_vertices();
             return *this;
@@ -315,10 +309,11 @@ struct traversal_iterator {
     }
 
     inline bool operator==(const traversal_iterator &other) { return current_vertex == other.current_vertex; };
+
     inline bool operator!=(const traversal_iterator &other) { return current_vertex != other.current_vertex; };
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct child_iterator {
     const Graph_t &graph;
 
@@ -327,7 +322,7 @@ struct child_iterator {
     inline auto iterate(const vertex_idx_t<Graph_t> &v) const { return graph.children(v); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct bfs_queue_wrapper {
     std::queue<vertex_idx_t<Graph_t>> queue;
 
@@ -348,9 +343,8 @@ struct bfs_queue_wrapper {
  * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex
  * using breadth-first search (BFS).
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class bfs_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -368,7 +362,7 @@ class bfs_view {
     auto size() const { return graph.num_vertices(); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct dfs_stack_wrapper {
     std::vector<vertex_idx_t<Graph_t>> stack;
 
@@ -389,9 +383,8 @@ struct dfs_stack_wrapper {
  * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex
  * using depth-first search (DFS).
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class dfs_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -409,7 +402,7 @@ class dfs_view {
     auto size() const { return graph.num_vertices(); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 struct parents_iterator {
     const Graph_t &graph;
 
@@ -424,9 +417,8 @@ struct parents_iterator {
  * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex
  * using breadth-first search (BFS) in reverse order.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class bfs_reverse_view {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     const Graph_t &graph;
@@ -452,9 +444,8 @@ class bfs_reverse_view {
  * @param graph The graph to check.
  * @return A vector containing the indices of the successors of the vertex.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> successors(const vertex_idx_t<Graph_t> &v, const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     std::vector<vertex_idx_t<Graph_t>> vec;
     for (const auto &suc : bfs_view(graph, v)) {
@@ -471,9 +462,8 @@ std::vector<vertex_idx_t<Graph_t>> successors(const vertex_idx_t<Graph_t> &v, co
  * @param graph The graph to check.
  * @return A vector containing the indices of the ancestors of the vertex.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 std::vector<vertex_idx_t<Graph_t>> ancestors(const vertex_idx_t<Graph_t> &v, const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     std::vector<vertex_idx_t<Graph_t>> vec;
     for (const auto &anc : bfs_reverse_view(graph, v)) {
@@ -482,23 +472,24 @@ std::vector<vertex_idx_t<Graph_t>> ancestors(const vertex_idx_t<Graph_t> &v, con
     return vec;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 bool is_acyclic(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     using VertexType = vertex_idx_t<Graph_t>;
 
-    if (graph.num_vertices() < 2)
+    if (graph.num_vertices() < 2) {
         return true;
+    }
 
     std::vector<VertexType> predecessors_count(graph.num_vertices(), 0);
 
     std::queue<VertexType> next;
 
     // Find source nodes
-    for (const VertexType &v : source_vertices_view(graph))
+    for (const VertexType &v : source_vertices_view(graph)) {
         next.push(v);
+    }
 
     VertexType node_count = 0;
     while (!next.empty()) {
@@ -508,23 +499,24 @@ bool is_acyclic(const Graph_t &graph) {
 
         for (const VertexType &current : graph.children(node)) {
             ++predecessors_count[current];
-            if (predecessors_count[current] == graph.in_degree(current))
+            if (predecessors_count[current] == graph.in_degree(current)) {
                 next.push(current);
+            }
         }
     }
 
     return node_count == graph.num_vertices();
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 bool is_connected(const Graph_t &graph) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
     using VertexType = vertex_idx_t<Graph_t>;
 
-    if (graph.num_vertices() < 2)
+    if (graph.num_vertices() < 2) {
         return true;
+    }
 
     std::unordered_set<VertexType> visited;
 
@@ -549,19 +541,19 @@ bool is_connected(const Graph_t &graph) {
     return node_count == graph.num_vertices();
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::size_t num_common_parents(const Graph_t &graph, vertex_idx_t<Graph_t> v1, vertex_idx_t<Graph_t> v2) {
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
-    
-    std::unordered_set<vertex_idx_t<Graph_t>>  parents;
+
+    std::unordered_set<vertex_idx_t<Graph_t>> parents;
     parents.reserve(graph.in_degree(v1));
     for (const auto &par : graph.parents(v1)) {
         parents.emplace(par);
     }
-    
+
     std::size_t num = 0;
     for (const auto &par : graph.parents(v2)) {
-        if(parents.find(par) != parents.end()) {
+        if (parents.find(par) != parents.end()) {
             ++num;
         }
     }
@@ -569,7 +561,7 @@ std::size_t num_common_parents(const Graph_t &graph, vertex_idx_t<Graph_t> v1, v
     return num;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 std::size_t num_common_children(const Graph_t &graph, vertex_idx_t<Graph_t> v1, vertex_idx_t<Graph_t> v2) {
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
@@ -578,10 +570,10 @@ std::size_t num_common_children(const Graph_t &graph, vertex_idx_t<Graph_t> v1,
     for (const auto &chld : graph.children(v1)) {
         childrn.emplace(chld);
     }
-    
+
     std::size_t num = 0;
     for (const auto &chld : graph.children(v2)) {
-        if(childrn.find(chld) != childrn.end()) {
+        if (childrn.find(chld) != childrn.end()) {
             ++num;
         }
     }
@@ -601,8 +593,8 @@ std::size_t num_common_children(const Graph_t &graph, vertex_idx_t<Graph_t> v1,
  * @param[out] components A vector where `components[i]` will be the component ID for vertex `i`.
  * @return The total number of weakly connected components.
  */
-template<typename Graph_t>
-std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vector<vertex_idx_t<Graph_t>>& components) {
+template <typename Graph_t>
+std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vector<vertex_idx_t<Graph_t>> &components) {
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
     using VertexType = vertex_idx_t<Graph_t>;
 
@@ -614,22 +606,22 @@ std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vecto
     components.assign(graph.num_vertices(), std::numeric_limits<VertexType>::max());
     VertexType component_id = 0;
 
-    for (const auto& v : graph.vertices()) {
+    for (const auto &v : graph.vertices()) {
         if (components[v] == std::numeric_limits<VertexType>::max()) {
             std::vector<VertexType> q;
             q.push_back(v);
             components[v] = component_id;
             size_t head = 0;
 
-            while(head < q.size()) {
+            while (head < q.size()) {
                 VertexType u = q[head++];
-                for (const auto& neighbor : graph.parents(u)) {
+                for (const auto &neighbor : graph.parents(u)) {
                     if (components[neighbor] == std::numeric_limits<VertexType>::max()) {
                         components[neighbor] = component_id;
                         q.push_back(neighbor);
                     }
                 }
-                for (const auto& neighbor : graph.children(u)) {
+                for (const auto &neighbor : graph.children(u)) {
                     if (components[neighbor] == std::numeric_limits<VertexType>::max()) {
                         components[neighbor] = component_id;
                         q.push_back(neighbor);
@@ -647,10 +639,10 @@ std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vecto
  * @param graph The input directed graph.
  * @return The number of weakly connected components.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 std::size_t count_weakly_connected_components(const Graph_t &graph) {
     std::vector<vertex_idx_t<Graph_t>> components;
     return compute_weakly_connected_components(graph, components);
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/strongly_connected_components.hpp b/include/osp/graph_algorithms/strongly_connected_components.hpp
index 78c43817..7de49e29 100644
--- a/include/osp/graph_algorithms/strongly_connected_components.hpp
+++ b/include/osp/graph_algorithms/strongly_connected_components.hpp
@@ -18,12 +18,13 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/concepts/directed_graph_concept.hpp"
 #include <algorithm>
 #include <limits>
 #include <stack>
 #include <vector>
 
+#include "osp/concepts/directed_graph_concept.hpp"
+
 namespace osp {
 
 /**
@@ -76,8 +77,7 @@ std::vector<std::vector<vertex_idx_t<Graph_t>>> strongly_connected_components(co
                     ++child_iter;
 
                     if (ids[to] == unvisited) {
-                        dfs_stack.emplace_back(
-                            to, std::make_pair(graph.children(to).begin(), graph.children(to).end()));
+                        dfs_stack.emplace_back(to, std::make_pair(graph.children(to).begin(), graph.children(to).end()));
                         s.push(to);
                         on_stack[to] = true;
                         ids[to] = low[to] = id_counter++;
@@ -92,8 +92,9 @@ std::vector<std::vector<vertex_idx_t<Graph_t>>> strongly_connected_components(co
                             s.pop();
                             on_stack[node] = false;
                             scc.push_back(node);
-                            if (node == at)
+                            if (node == at) {
                                 break;
+                            }
                         }
                         sccs.emplace_back(std::move(scc));
                     }
@@ -112,4 +113,4 @@ std::vector<std::vector<vertex_idx_t<Graph_t>>> strongly_connected_components(co
     return sccs;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_algorithms/subgraph_algorithms.hpp b/include/osp/graph_algorithms/subgraph_algorithms.hpp
index 193dcaa1..44c0a3a8 100644
--- a/include/osp/graph_algorithms/subgraph_algorithms.hpp
+++ b/include/osp/graph_algorithms/subgraph_algorithms.hpp
@@ -18,27 +18,26 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/concepts/constructable_computational_dag_concept.hpp"
-#include "osp/concepts/directed_graph_concept.hpp"
 #include <map>
 #include <set>
 #include <vector>
 
+#include "osp/concepts/constructable_computational_dag_concept.hpp"
+#include "osp/concepts/directed_graph_concept.hpp"
+
 namespace osp {
 
-template<typename Graph_t_in, typename Graph_t_out>
-void create_induced_subgraph(const Graph_t_in &dag, Graph_t_out &dag_out,
+template <typename Graph_t_in, typename Graph_t_out>
+void create_induced_subgraph(const Graph_t_in &dag,
+                             Graph_t_out &dag_out,
                              const std::set<vertex_idx_t<Graph_t_in>> &selected_nodes,
                              const std::set<vertex_idx_t<Graph_t_in>> &extra_sources = {}) {
-
     static_assert(std::is_same_v<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_out>>,
                   "Graph_t_in and out must have the same vertex_idx types");
 
-    static_assert(is_constructable_cdag_vertex_v<Graph_t_out>,
-                  "Graph_t_out must satisfy the constructable_cdag_vertex concept");
+    static_assert(is_constructable_cdag_vertex_v<Graph_t_out>, "Graph_t_out must satisfy the constructable_cdag_vertex concept");
 
-    static_assert(is_constructable_cdag_edge_v<Graph_t_out>,
-                  "Graph_t_out must satisfy the constructable_cdag_edge concept");
+    static_assert(is_constructable_cdag_edge_v<Graph_t_out>, "Graph_t_out must satisfy the constructable_cdag_edge concept");
 
     assert(dag_out.num_vertices() == 0);
 
@@ -60,8 +59,8 @@ void create_induced_subgraph(const Graph_t_in &dag, Graph_t_out &dag_out,
 
         if constexpr (is_constructable_cdag_typed_vertex_v<Graph_t_out> and has_typed_vertices_v<Graph_t_in>) {
             // add vertex with type
-            dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node),
-                               dag.vertex_type(node));
+            dag_out.add_vertex(
+                dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), dag.vertex_type(node));
         } else {
             // add vertex without type
             dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node));
@@ -69,85 +68,90 @@ void create_induced_subgraph(const Graph_t_in &dag, Graph_t_out &dag_out,
     }
 
     if constexpr (has_edge_weights_v<Graph_t_in> and has_edge_weights_v<Graph_t_out>) {
-
         // add edges with edge comm weights
-        for (const auto &node : selected_nodes)
+        for (const auto &node : selected_nodes) {
             for (const auto &in_edge : in_edges(node, dag)) {
                 const auto &pred = source(in_edge, dag);
-                if (selected_nodes.find(pred) != selected_nodes.end() || extra_sources.find(pred) != extra_sources.end())
+                if (selected_nodes.find(pred) != selected_nodes.end() || extra_sources.find(pred) != extra_sources.end()) {
                     dag_out.add_edge(local_idx[pred], local_idx[node], dag.edge_comm_weight(in_edge));
+                }
             }
+        }
 
     } else {
-
         // add edges without edge comm weights
-        for (const auto &node : selected_nodes)
+        for (const auto &node : selected_nodes) {
             for (const auto &pred : dag.parents(node)) {
-
-                if (selected_nodes.find(pred) != selected_nodes.end() ||
-                    extra_sources.find(pred) != extra_sources.end())
+                if (selected_nodes.find(pred) != selected_nodes.end() || extra_sources.find(pred) != extra_sources.end()) {
                     dag_out.add_edge(local_idx[pred], local_idx[node]);
+                }
             }
+        }
     }
 }
 
-
-template<typename Graph_t_in, typename Graph_t_out>
-void create_induced_subgraph(const Graph_t_in &dag, Graph_t_out &dag_out,
+template <typename Graph_t_in, typename Graph_t_out>
+void create_induced_subgraph(const Graph_t_in &dag,
+                             Graph_t_out &dag_out,
                              const std::vector<vertex_idx_t<Graph_t_in>> &selected_nodes) {
     return create_induced_subgraph(dag, dag_out, std::set<vertex_idx_t<Graph_t_in>>(selected_nodes.begin(), selected_nodes.end()));
 }
 
-
-template<typename Graph_t>
+template <typename Graph_t>
 bool checkOrderedIsomorphism(const Graph_t &first, const Graph_t &second) {
-
     static_assert(is_directed_graph_v<Graph_t>, "Graph_t must satisfy the directed_graph concept");
 
-    if (first.num_vertices() != second.num_vertices() || first.num_edges() != second.num_edges())
+    if (first.num_vertices() != second.num_vertices() || first.num_edges() != second.num_edges()) {
         return false;
+    }
 
     for (const auto &node : first.vertices()) {
-        if (first.vertex_work_weight(node) != second.vertex_work_weight(node) ||
-            first.vertex_mem_weight(node) != second.vertex_mem_weight(node) ||
-            first.vertex_comm_weight(node) != second.vertex_comm_weight(node) ||
-            first.vertex_type(node) != second.vertex_type(node))
+        if (first.vertex_work_weight(node) != second.vertex_work_weight(node)
+            || first.vertex_mem_weight(node) != second.vertex_mem_weight(node)
+            || first.vertex_comm_weight(node) != second.vertex_comm_weight(node)
+            || first.vertex_type(node) != second.vertex_type(node)) {
             return false;
+        }
 
-        if (first.in_degree(node) != second.in_degree(node) || first.out_degree(node) != second.out_degree(node))
+        if (first.in_degree(node) != second.in_degree(node) || first.out_degree(node) != second.out_degree(node)) {
             return false;
+        }
 
         if constexpr (has_edge_weights_v<Graph_t>) {
-
             std::set<std::pair<vertex_idx_t<Graph_t>, e_commw_t<Graph_t>>> first_children, second_children;
 
-            for (const auto &out_edge : out_edges(node, first))
+            for (const auto &out_edge : out_edges(node, first)) {
                 first_children.emplace(target(out_edge, first), first.edge_comm_weight(out_edge));
+            }
 
-            for (const auto &out_edge : out_edges(node, second))
+            for (const auto &out_edge : out_edges(node, second)) {
                 second_children.emplace(target(out_edge, second), second.edge_comm_weight(out_edge));
+            }
 
             auto itr = first_children.begin(), second_itr = second_children.begin();
             for (; itr != first_children.end() && second_itr != second_children.end(); ++itr) {
-                if (*itr != *second_itr)
+                if (*itr != *second_itr) {
                     return false;
+                }
                 ++second_itr;
             }
 
         } else {
-
             std::set<vertex_idx_t<Graph_t>> first_children, second_children;
 
-            for (const auto &child : first.children(node))
+            for (const auto &child : first.children(node)) {
                 first_children.emplace(child);
+            }
 
-            for (const auto &child : second.children(node))
+            for (const auto &child : second.children(node)) {
                 second_children.emplace(child);
+            }
 
             auto itr = first_children.begin(), second_itr = second_children.begin();
             for (; itr != first_children.end() && second_itr != second_children.end(); ++itr) {
-                if (*itr != *second_itr)
+                if (*itr != *second_itr) {
                     return false;
+                }
                 ++second_itr;
             }
         }
@@ -156,25 +160,21 @@ bool checkOrderedIsomorphism(const Graph_t &first, const Graph_t &second) {
     return true;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::vector<Graph_t_out> create_induced_subgraphs(const Graph_t_in &dag_in,
-                                                  const std::vector<unsigned> &partition_IDs) {
+template <typename Graph_t_in, typename Graph_t_out>
+std::vector<Graph_t_out> create_induced_subgraphs(const Graph_t_in &dag_in, const std::vector<unsigned> &partition_IDs) {
     // assumes that input partition IDs are consecutive and starting from 0
 
     static_assert(std::is_same_v<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_out>>,
                   "Graph_t_in and out must have the same vertex_idx types");
 
-    static_assert(is_constructable_cdag_vertex_v<Graph_t_out>,
-                  "Graph_t_out must satisfy the constructable_cdag_vertex concept");
-
-    static_assert(is_constructable_cdag_edge_v<Graph_t_out>,
-                  "Graph_t_out must satisfy the constructable_cdag_edge concept");
-
+    static_assert(is_constructable_cdag_vertex_v<Graph_t_out>, "Graph_t_out must satisfy the constructable_cdag_vertex concept");
 
+    static_assert(is_constructable_cdag_edge_v<Graph_t_out>, "Graph_t_out must satisfy the constructable_cdag_edge concept");
 
     unsigned number_of_parts = 0;
-    for (const auto id : partition_IDs)
+    for (const auto id : partition_IDs) {
         number_of_parts = std::max(number_of_parts, id + 1);
+    }
 
     std::vector<Graph_t_out> split_dags(number_of_parts);
 
@@ -184,33 +184,32 @@ std::vector<Graph_t_out> create_induced_subgraphs(const Graph_t_in &dag_in,
         local_idx[node] = split_dags[partition_IDs[node]].num_vertices();
 
         if constexpr (is_constructable_cdag_typed_vertex_v<Graph_t_out> and has_typed_vertices_v<Graph_t_in>) {
-
-            split_dags[partition_IDs[node]].add_vertex(dag_in.vertex_work_weight(node), dag_in.vertex_comm_weight(node),
-                                                       dag_in.vertex_mem_weight(node), dag_in.vertex_type(node));
+            split_dags[partition_IDs[node]].add_vertex(dag_in.vertex_work_weight(node),
+                                                       dag_in.vertex_comm_weight(node),
+                                                       dag_in.vertex_mem_weight(node),
+                                                       dag_in.vertex_type(node));
         } else {
-            split_dags[partition_IDs[node]].add_vertex(dag_in.vertex_work_weight(node), dag_in.vertex_comm_weight(node),
-                                                       dag_in.vertex_mem_weight(node));
+            split_dags[partition_IDs[node]].add_vertex(
+                dag_in.vertex_work_weight(node), dag_in.vertex_comm_weight(node), dag_in.vertex_mem_weight(node));
         }
     }
 
     if constexpr (has_edge_weights_v<Graph_t_in> and has_edge_weights_v<Graph_t_out>) {
-
         for (const auto node : dag_in.vertices()) {
             for (const auto &out_edge : out_edges(node, dag_in)) {
-
                 auto succ = target(out_edge, dag_in);
 
-                if (partition_IDs[node] == partition_IDs[succ])
-                    split_dags[partition_IDs[node]].add_edge(local_idx[node], local_idx[succ],
-                                                            dag_in.edge_comm_weight(out_edge));
+                if (partition_IDs[node] == partition_IDs[succ]) {
+                    split_dags[partition_IDs[node]].add_edge(local_idx[node], local_idx[succ], dag_in.edge_comm_weight(out_edge));
+                }
             }
         }
     } else {
-
         for (const auto node : dag_in.vertices()) {
             for (const auto &child : dag_in.children(node)) {
-                if (partition_IDs[node] == partition_IDs[child])
+                if (partition_IDs[node] == partition_IDs[child]) {
                     split_dags[partition_IDs[node]].add_edge(local_idx[node], local_idx[child]);
+                }
             }
         }
     }
@@ -218,18 +217,15 @@ std::vector<Graph_t_out> create_induced_subgraphs(const Graph_t_in &dag_in,
     return split_dags;
 }
 
-template<typename Graph_t_in, typename Graph_t_out>
-std::unordered_map<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_in>> create_induced_subgraph_map(const Graph_t_in &dag, Graph_t_out &dag_out,
-                             const std::vector<vertex_idx_t<Graph_t_in>> &selected_nodes) {
-
+template <typename Graph_t_in, typename Graph_t_out>
+std::unordered_map<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_in>> create_induced_subgraph_map(
+    const Graph_t_in &dag, Graph_t_out &dag_out, const std::vector<vertex_idx_t<Graph_t_in>> &selected_nodes) {
     static_assert(std::is_same_v<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_out>>,
                   "Graph_t_in and out must have the same vertex_idx types");
 
-    static_assert(is_constructable_cdag_vertex_v<Graph_t_out>,
-                  "Graph_t_out must satisfy the constructable_cdag_vertex concept");
+    static_assert(is_constructable_cdag_vertex_v<Graph_t_out>, "Graph_t_out must satisfy the constructable_cdag_vertex concept");
 
-    static_assert(is_constructable_cdag_edge_v<Graph_t_out>,
-                  "Graph_t_out must satisfy the constructable_cdag_edge concept");
+    static_assert(is_constructable_cdag_edge_v<Graph_t_out>, "Graph_t_out must satisfy the constructable_cdag_edge concept");
 
     assert(dag_out.num_vertices() == 0);
 
@@ -241,8 +237,8 @@ std::unordered_map<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_in>> create_in
 
         if constexpr (is_constructable_cdag_typed_vertex_v<Graph_t_out> and has_typed_vertices_v<Graph_t_in>) {
             // add vertex with type
-            dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node),
-                               dag.vertex_type(node));
+            dag_out.add_vertex(
+                dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), dag.vertex_type(node));
         } else {
             // add vertex without type
             dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node));
@@ -250,26 +246,28 @@ std::unordered_map<vertex_idx_t<Graph_t_in>, vertex_idx_t<Graph_t_in>> create_in
     }
 
     if constexpr (has_edge_weights_v<Graph_t_in> and has_edge_weights_v<Graph_t_out>) {
-
         // add edges with edge comm weights
-        for (const auto &node : selected_nodes)
+        for (const auto &node : selected_nodes) {
             for (const auto &in_edge : in_edges(node, dag)) {
                 const auto &pred = source(in_edge, dag);
-                if (local_idx.count(pred))
+                if (local_idx.count(pred)) {
                     dag_out.add_edge(local_idx[pred], local_idx[node], dag.edge_comm_weight(in_edge));
+                }
             }
+        }
 
     } else {
-
         // add edges without edge comm weights
-        for (const auto &node : selected_nodes)
+        for (const auto &node : selected_nodes) {
             for (const auto &pred : dag.parents(node)) {
-                if (local_idx.count(pred))
+                if (local_idx.count(pred)) {
                     dag_out.add_edge(local_idx[pred], local_idx[node]);
+                }
             }
+        }
     }
 
     return local_idx;
 }
 
-} // end namespace osp
+}    // end namespace osp
diff --git a/include/osp/graph_algorithms/transitive_reduction.hpp b/include/osp/graph_algorithms/transitive_reduction.hpp
index d6b5767a..a5ec6772 100644
--- a/include/osp/graph_algorithms/transitive_reduction.hpp
+++ b/include/osp/graph_algorithms/transitive_reduction.hpp
@@ -18,12 +18,13 @@ limitations under the License.
 
 #pragma once
 
+#include <vector>
+
 #include "osp/concepts/constructable_computational_dag_concept.hpp"
 #include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 #include "osp/graph_algorithms/directed_graph_path_util.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
 
-#include <vector>
 namespace osp {
 
 /**
@@ -58,11 +59,13 @@ void transitive_reduction_sparse(const Graph_t_in &graph_in, Graph_t_out &graph_
     // 1. Copy vertices and their properties from graph_in to graph_out.
     for (const auto &v_idx : graph_in.vertices()) {
         if constexpr (has_typed_vertices_v<Graph_t_in> && is_constructable_cdag_typed_vertex_v<Graph_t_out>) {
-            graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx),
-                                 graph_in.vertex_mem_weight(v_idx), graph_in.vertex_type(v_idx));
+            graph_out.add_vertex(graph_in.vertex_work_weight(v_idx),
+                                 graph_in.vertex_comm_weight(v_idx),
+                                 graph_in.vertex_mem_weight(v_idx),
+                                 graph_in.vertex_type(v_idx));
         } else {
-            graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx),
-                                 graph_in.vertex_mem_weight(v_idx));
+            graph_out.add_vertex(
+                graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), graph_in.vertex_mem_weight(v_idx));
         }
     }
 
@@ -121,11 +124,13 @@ void transitive_reduction_dense(const Graph_t_in &graph_in, Graph_t_out &graph_o
     // 1. Copy vertices and their properties from graph_in to graph_out.
     for (const auto &v_idx : graph_in.vertices()) {
         if constexpr (has_typed_vertices_v<Graph_t_in> && is_constructable_cdag_typed_vertex_v<Graph_t_out>) {
-            graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx),
-                                 graph_in.vertex_mem_weight(v_idx), graph_in.vertex_type(v_idx));
+            graph_out.add_vertex(graph_in.vertex_work_weight(v_idx),
+                                 graph_in.vertex_comm_weight(v_idx),
+                                 graph_in.vertex_mem_weight(v_idx),
+                                 graph_in.vertex_type(v_idx));
         } else {
-            graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx),
-                                 graph_in.vertex_mem_weight(v_idx));
+            graph_out.add_vertex(
+                graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), graph_in.vertex_mem_weight(v_idx));
         }
     }
 
@@ -169,4 +174,4 @@ void transitive_reduction_dense(const Graph_t_in &graph_in, Graph_t_out &graph_o
     }
 }
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp b/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp
index 616aea6b..fd950cc3 100644
--- a/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp
@@ -17,7 +17,7 @@ limitations under the License.
 */
 #pragma once
 
-#include <cstddef> // for std::size_t
+#include <cstddef>    // for std::size_t
 
 namespace osp {
 
@@ -33,9 +33,8 @@ namespace osp {
  * @tparam memw_t Type for memory weights.
  * @tparam vertex_type_t Type for vertex types.
  */
-template<typename vertex_idx_t, typename workw_t, typename commw_t, typename memw_t, typename vertex_type_t>
+template <typename vertex_idx_t, typename workw_t, typename commw_t, typename memw_t, typename vertex_type_t>
 struct cdag_vertex_impl {
-
     using vertex_idx_type = vertex_idx_t;
     using work_weight_type = workw_t;
     using comm_weight_type = commw_t;
@@ -58,10 +57,8 @@ struct cdag_vertex_impl {
      * @param mem_w The memory weight.
      * @param vertex_t The type of the vertex.
      */
-    cdag_vertex_impl(vertex_idx_t vertex_idx_, workw_t work_w, commw_t comm_w, memw_t mem_w,
-                     vertex_type_t vertex_t)
-        : id(vertex_idx_), work_weight(work_w), comm_weight(comm_w), mem_weight(mem_w),
-          vertex_type(vertex_t) {}
+    cdag_vertex_impl(vertex_idx_t vertex_idx_, workw_t work_w, commw_t comm_w, memw_t mem_w, vertex_type_t vertex_t)
+        : id(vertex_idx_), work_weight(work_w), comm_weight(comm_w), mem_weight(mem_w), vertex_type(vertex_t) {}
 
     vertex_idx_t id = 0;
 
@@ -86,4 +83,4 @@ using cdag_vertex_impl_int = cdag_vertex_impl<std::size_t, int, int, int, unsign
  */
 using cdag_vertex_impl_unsigned = cdag_vertex_impl<std::size_t, unsigned, unsigned, unsigned, unsigned>;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp
index 6ef6007c..f933a7b6 100644
--- a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp
@@ -18,6 +18,7 @@ limitations under the License.
 #pragma once
 
 #include <algorithm>
+#include <cassert>
 #include <iterator>
 #include <limits>
 #include <numeric>
@@ -27,862 +28,1041 @@ limitations under the License.
 
 #include "osp/auxiliary/math/math_helper.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
-#include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 #include "osp/concepts/constructable_computational_dag_concept.hpp"
+#include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 #include "osp/concepts/specific_graph_impl.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_view.hpp"
 #include "osp/graph_implementations/integral_range.hpp"
 
 namespace osp {
 
-template<bool keep_vertex_order, bool use_work_weights = false, bool use_comm_weights = false, bool use_mem_weights = false, bool use_vert_types = false, typename vert_t = std::size_t, typename edge_t = std::size_t, typename work_weight_type = unsigned, typename comm_weight_type = unsigned, typename mem_weight_type = unsigned, typename vertex_type_template_type = unsigned>
+template <bool keep_vertex_order,
+          bool use_work_weights = false,
+          bool use_comm_weights = false,
+          bool use_mem_weights = false,
+          bool use_vert_types = false,
+          typename vert_t = std::size_t,
+          typename edge_t = std::size_t,
+          typename work_weight_type = unsigned,
+          typename comm_weight_type = unsigned,
+          typename mem_weight_type = unsigned,
+          typename vertex_type_template_type = unsigned>
 class Compact_Sparse_Graph {
-    static_assert(std::is_integral<vert_t>::value && std::is_integral<edge_t>::value, "Vertex and edge type must be of integral nature.");
+    static_assert(std::is_integral<vert_t>::value && std::is_integral<edge_t>::value,
+                  "Vertex and edge type must be of integral nature.");
     static_assert(std::is_arithmetic_v<work_weight_type> && "Work weight must be of arithmetic type.");
     static_assert(std::is_arithmetic_v<comm_weight_type> && "Communication weight must be of arithmetic type.");
     static_assert(std::is_arithmetic_v<mem_weight_type> && "Memory weight must be of arithmetic type.");
     static_assert(std::is_integral_v<vertex_type_template_type> && "Vertex type type must be of integral type.");
 
-    public:
-        using vertex_idx = vert_t;
-
-        using vertex_work_weight_type   = std::conditional_t<use_work_weights, work_weight_type, edge_t>;
-        using vertex_comm_weight_type   = comm_weight_type;
-        using vertex_mem_weight_type    = mem_weight_type;
-        using vertex_type_type          = vertex_type_template_type;
-
-        static bool constexpr vertices_in_top_order     = true;
-        static bool constexpr children_in_top_order     = true;
-        static bool constexpr children_in_vertex_order  = true;
-        static bool constexpr parents_in_top_order      = true;
-        static bool constexpr parents_in_vertex_order   = true;
-    
-    private:
-        using ThisT = Compact_Sparse_Graph<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type>;
-
-    protected:    
-        class Compact_Parent_Edges {
-            private:
-                // Compressed Sparse Row (CSR)
-                std::vector<vertex_idx> csr_edge_parents;
-                std::vector<edge_t> csr_target_ptr;
-
-            public:
-                Compact_Parent_Edges() = default;
-                Compact_Parent_Edges(const Compact_Parent_Edges &other) = default;
-                Compact_Parent_Edges(Compact_Parent_Edges &&other) = default;
-                Compact_Parent_Edges &operator=(const Compact_Parent_Edges &other) = default;
-                Compact_Parent_Edges &operator=(Compact_Parent_Edges &&other) = default;
-                virtual ~Compact_Parent_Edges() = default;
-
-                Compact_Parent_Edges(const std::vector<vertex_idx> &csr_edge_parents_, const std::vector<edge_t> &csr_target_ptr_) : csr_edge_parents(csr_edge_parents_), csr_target_ptr(csr_target_ptr_) {};
-                Compact_Parent_Edges(std::vector<vertex_idx> &&csr_edge_parents_, std::vector<edge_t> &&csr_target_ptr_) : csr_edge_parents(std::move(csr_edge_parents_)), csr_target_ptr(std::move(csr_target_ptr_)) {};
-
-                inline edge_t number_of_parents(const vertex_idx v) const {
-                    return csr_target_ptr[v + 1] - csr_target_ptr[v];
-                }
+  public:
+    using vertex_idx = vert_t;
+
+    using vertex_work_weight_type = std::conditional_t<use_work_weights, work_weight_type, edge_t>;
+    using vertex_comm_weight_type = comm_weight_type;
+    using vertex_mem_weight_type = mem_weight_type;
+    using vertex_type_type = vertex_type_template_type;
+
+    static bool constexpr vertices_in_top_order = true;
+    static bool constexpr children_in_top_order = true;
+    static bool constexpr children_in_vertex_order = true;
+    static bool constexpr parents_in_top_order = true;
+    static bool constexpr parents_in_vertex_order = true;
+
+  private:
+    using ThisT = Compact_Sparse_Graph<keep_vertex_order,
+                                       use_work_weights,
+                                       use_comm_weights,
+                                       use_mem_weights,
+                                       use_vert_types,
+                                       vert_t,
+                                       edge_t,
+                                       work_weight_type,
+                                       comm_weight_type,
+                                       mem_weight_type,
+                                       vertex_type_template_type>;
+
+  protected:
+    class Compact_Parent_Edges {
+      private:
+        // Compressed Sparse Row (CSR)
+        std::vector<vertex_idx> csr_edge_parents;
+        std::vector<edge_t> csr_target_ptr;
+
+      public:
+        Compact_Parent_Edges() = default;
+        Compact_Parent_Edges(const Compact_Parent_Edges &other) = default;
+        Compact_Parent_Edges(Compact_Parent_Edges &&other) = default;
+        Compact_Parent_Edges &operator=(const Compact_Parent_Edges &other) = default;
+        Compact_Parent_Edges &operator=(Compact_Parent_Edges &&other) = default;
+        virtual ~Compact_Parent_Edges() = default;
+
+        Compact_Parent_Edges(const std::vector<vertex_idx> &csr_edge_parents_, const std::vector<edge_t> &csr_target_ptr_)
+            : csr_edge_parents(csr_edge_parents_), csr_target_ptr(csr_target_ptr_) {};
+        Compact_Parent_Edges(std::vector<vertex_idx> &&csr_edge_parents_, std::vector<edge_t> &&csr_target_ptr_)
+            : csr_edge_parents(std::move(csr_edge_parents_)), csr_target_ptr(std::move(csr_target_ptr_)) {};
+
+        inline edge_t number_of_parents(const vertex_idx v) const { return csr_target_ptr[v + 1] - csr_target_ptr[v]; }
+
+        class Parent_range {
+          private:
+            const std::vector<vertex_idx> &_csr_edge_parents;
+            const std::vector<edge_t> &_csr_target_ptr;
+            const vertex_idx _vert;
+
+          public:
+            Parent_range(const std::vector<vertex_idx> &csr_edge_parents,
+                         const std::vector<edge_t> &csr_target_ptr,
+                         const vertex_idx vert)
+                : _csr_edge_parents(csr_edge_parents), _csr_target_ptr(csr_target_ptr), _vert(vert) {};
+
+            inline auto cbegin() const {
+                auto it = _csr_edge_parents.cbegin();
+                std::advance(it, _csr_target_ptr[_vert]);
+                return it;
+            }
+
+            inline auto cend() const {
+                auto it = _csr_edge_parents.cbegin();
+                std::advance(it, _csr_target_ptr[_vert + 1]);
+                return it;
+            }
 
-                class Parent_range {
-                    private:
-                        const std::vector<vertex_idx> &_csr_edge_parents;
-                        const std::vector<edge_t> &_csr_target_ptr;
-                        const vertex_idx _vert;
+            inline auto begin() const { return cbegin(); }
 
-                    public:
-                        Parent_range (const std::vector<vertex_idx> &csr_edge_parents, const std::vector<edge_t> &csr_target_ptr, const vertex_idx vert) : _csr_edge_parents(csr_edge_parents), _csr_target_ptr(csr_target_ptr), _vert(vert) {};
+            inline auto end() const { return cend(); }
 
-                        inline auto cbegin() const { auto it = _csr_edge_parents.cbegin(); std::advance(it, _csr_target_ptr[_vert]); return it; }
-                        inline auto cend() const { auto it = _csr_edge_parents.cbegin(); std::advance(it, _csr_target_ptr[_vert + 1]); return it; }
-                        
-                        inline auto begin() const { return cbegin(); }
-                        inline auto end() const { return cend(); }
+            inline auto crbegin() const {
+                auto it = _csr_edge_parents.crbegin();
+                std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert + 1]);
+                return it;
+            };
 
-                        inline auto crbegin() const { auto it = _csr_edge_parents.crbegin(); std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert + 1]); return it; };
-                        inline auto crend() const { auto it = _csr_edge_parents.crbegin(); std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert]); return it; };
+            inline auto crend() const {
+                auto it = _csr_edge_parents.crbegin();
+                std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert]);
+                return it;
+            };
 
-                        inline auto rbegin() const { return crbegin(); };
-                        inline auto rend() const { return crend(); };
-                };
+            inline auto rbegin() const { return crbegin(); };
 
-                inline Parent_range parents(const vertex_idx vert) const { return Parent_range(csr_edge_parents, csr_target_ptr, vert); }
+            inline auto rend() const { return crend(); };
         };
 
-        class Compact_Children_Edges {
-            private:
-                // Compressed Sparse Column (CSC)
-                std::vector<vertex_idx> csc_edge_children;
-                std::vector<edge_t> csc_source_ptr;
-
-            public:
-                Compact_Children_Edges() = default;
-                Compact_Children_Edges(const Compact_Children_Edges &other) = default;
-                Compact_Children_Edges(Compact_Children_Edges &&other) = default;
-                Compact_Children_Edges &operator=(const Compact_Children_Edges &other) = default;
-                Compact_Children_Edges &operator=(Compact_Children_Edges &&other) = default;
-                virtual ~Compact_Children_Edges() = default;
-
-                Compact_Children_Edges(const std::vector<vertex_idx> &csc_edge_children_, const std::vector<edge_t> &csc_source_ptr_) : csc_edge_children(csc_edge_children_), csc_source_ptr(csc_source_ptr_) {};
-                Compact_Children_Edges(std::vector<vertex_idx> &&csc_edge_children_, std::vector<edge_t> &&csc_source_ptr_) : csc_edge_children(std::move(csc_edge_children_)), csc_source_ptr(std::move(csc_source_ptr_)) {};
-
-                inline edge_t number_of_children(const vertex_idx v) const {
-                    return csc_source_ptr[v + 1] - csc_source_ptr[v];
-                }
+        inline Parent_range parents(const vertex_idx vert) const { return Parent_range(csr_edge_parents, csr_target_ptr, vert); }
+    };
+
+    class Compact_Children_Edges {
+      private:
+        // Compressed Sparse Column (CSC)
+        std::vector<vertex_idx> csc_edge_children;
+        std::vector<edge_t> csc_source_ptr;
+
+      public:
+        Compact_Children_Edges() = default;
+        Compact_Children_Edges(const Compact_Children_Edges &other) = default;
+        Compact_Children_Edges(Compact_Children_Edges &&other) = default;
+        Compact_Children_Edges &operator=(const Compact_Children_Edges &other) = default;
+        Compact_Children_Edges &operator=(Compact_Children_Edges &&other) = default;
+        virtual ~Compact_Children_Edges() = default;
+
+        Compact_Children_Edges(const std::vector<vertex_idx> &csc_edge_children_, const std::vector<edge_t> &csc_source_ptr_)
+            : csc_edge_children(csc_edge_children_), csc_source_ptr(csc_source_ptr_) {};
+        Compact_Children_Edges(std::vector<vertex_idx> &&csc_edge_children_, std::vector<edge_t> &&csc_source_ptr_)
+            : csc_edge_children(std::move(csc_edge_children_)), csc_source_ptr(std::move(csc_source_ptr_)) {};
+
+        inline edge_t number_of_children(const vertex_idx v) const { return csc_source_ptr[v + 1] - csc_source_ptr[v]; }
+
+        inline vertex_idx source(const edge_t &indx) const {
+            auto it = std::upper_bound(csc_source_ptr.cbegin(), csc_source_ptr.cend(), indx);
+            vertex_idx src = static_cast<vertex_idx>(std::distance(csc_source_ptr.cbegin(), it) - 1);
+            return src;
+        };
+
+        inline vertex_idx target(const edge_t &indx) const { return csc_edge_children[indx]; };
 
-                inline vertex_idx source(const edge_t &indx) const { auto it = std::upper_bound(csc_source_ptr.cbegin(), csc_source_ptr.cend(), indx); vertex_idx src = static_cast<vertex_idx>(std::distance(csc_source_ptr.cbegin(), it) - 1); return src;};
-                inline vertex_idx target(const edge_t &indx) const { return csc_edge_children[indx]; };
+        inline edge_t children_indx_begin(const vertex_idx &vert) const { return csc_source_ptr[vert]; };
 
-                inline edge_t children_indx_begin(const vertex_idx &vert) const { return csc_source_ptr[vert]; };
+        class Children_range {
+          private:
+            const std::vector<vertex_idx> &_csc_edge_children;
+            const std::vector<edge_t> &_csc_source_ptr;
+            const vertex_idx _vert;
 
-                class Children_range {
-                    private:
-                        const std::vector<vertex_idx> &_csc_edge_children;
-                        const std::vector<edge_t> &_csc_source_ptr;
-                        const vertex_idx _vert;
+          public:
+            Children_range(const std::vector<vertex_idx> &csc_edge_children,
+                           const std::vector<edge_t> &csc_source_ptr,
+                           const vertex_idx vert)
+                : _csc_edge_children(csc_edge_children), _csc_source_ptr(csc_source_ptr), _vert(vert) {};
 
-                    public:
-                        Children_range (const std::vector<vertex_idx> &csc_edge_children, const std::vector<edge_t> &csc_source_ptr, const vertex_idx vert) : _csc_edge_children(csc_edge_children), _csc_source_ptr(csc_source_ptr), _vert(vert) {};
+            inline auto cbegin() const {
+                auto it = _csc_edge_children.cbegin();
+                std::advance(it, _csc_source_ptr[_vert]);
+                return it;
+            };
 
-                        inline auto cbegin() const { auto it = _csc_edge_children.cbegin(); std::advance(it, _csc_source_ptr[_vert]); return it; };
-                        inline auto cend() const { auto it = _csc_edge_children.cbegin(); std::advance(it, _csc_source_ptr[_vert + 1]); return it; };
+            inline auto cend() const {
+                auto it = _csc_edge_children.cbegin();
+                std::advance(it, _csc_source_ptr[_vert + 1]);
+                return it;
+            };
 
-                        inline auto begin() const { return cbegin(); };
-                        inline auto end() const { return cend(); };
+            inline auto begin() const { return cbegin(); };
 
-                        inline auto crbegin() const { auto it = _csc_edge_children.crbegin(); std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert + 1]); return it; };
-                        inline auto crend() const { auto it = _csc_edge_children.crbegin(); std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert]); return it; };
+            inline auto end() const { return cend(); };
 
-                        inline auto rbegin() const { return crbegin(); };
-                        inline auto rend() const { return crend(); };
-                };
+            inline auto crbegin() const {
+                auto it = _csc_edge_children.crbegin();
+                std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert + 1]);
+                return it;
+            };
 
-                inline Children_range children(const vertex_idx vert) const { return Children_range(csc_edge_children, csc_source_ptr, vert); }
+            inline auto crend() const {
+                auto it = _csc_edge_children.crbegin();
+                std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert]);
+                return it;
+            };
+
+            inline auto rbegin() const { return crbegin(); };
+
+            inline auto rend() const { return crend(); };
         };
 
+        inline Children_range children(const vertex_idx vert) const {
+            return Children_range(csc_edge_children, csc_source_ptr, vert);
+        }
+    };
 
+    vertex_idx number_of_vertices = static_cast<vert_t>(0);
+    edge_t number_of_edges = static_cast<edge_t>(0);
 
-        vertex_idx number_of_vertices = static_cast<vert_t>(0);
-        edge_t number_of_edges = static_cast<edge_t>(0);
+    Compact_Parent_Edges csr_in_edges;
+    Compact_Children_Edges csc_out_edges;
 
-        Compact_Parent_Edges csr_in_edges;
-        Compact_Children_Edges csc_out_edges;
+    vertex_type_type number_of_vertex_types = static_cast<vertex_type_type>(1);
 
-        vertex_type_type number_of_vertex_types = static_cast<vertex_type_type>(1);
+    std::vector<vertex_work_weight_type> vert_work_weights;
+    std::vector<vertex_comm_weight_type> vert_comm_weights;
+    std::vector<vertex_mem_weight_type> vert_mem_weights;
+    std::vector<vertex_type_type> vert_types;
 
-        std::vector<vertex_work_weight_type> vert_work_weights;
-        std::vector<vertex_comm_weight_type> vert_comm_weights;
-        std::vector<vertex_mem_weight_type> vert_mem_weights;
-        std::vector<vertex_type_type> vert_types;
+    std::vector<vertex_idx> vertex_permutation_from_internal_to_original;
+    std::vector<vertex_idx> vertex_permutation_from_original_to_internal;
 
-        std::vector<vertex_idx> vertex_permutation_from_internal_to_original;
-        std::vector<vertex_idx> vertex_permutation_from_original_to_internal;
+    template <typename RetT = void>
+    std::enable_if_t<not use_vert_types, RetT> _update_num_vertex_types() {
+        number_of_vertex_types = static_cast<vertex_type_type>(1);
+    }
 
-        template<typename RetT = void>
-        std::enable_if_t<not use_vert_types, RetT> _update_num_vertex_types() {
-            number_of_vertex_types = static_cast<vertex_type_type>(1);
+    template <typename RetT = void>
+    std::enable_if_t<use_vert_types, RetT> _update_num_vertex_types() {
+        number_of_vertex_types = static_cast<vertex_type_type>(1);
+        for (const auto vt : vert_types) {
+            number_of_vertex_types = std::max(number_of_vertex_types, vt);
+        }
+    }
+
+  public:
+    Compact_Sparse_Graph() = default;
+    Compact_Sparse_Graph(const Compact_Sparse_Graph &other) = default;
+    Compact_Sparse_Graph(Compact_Sparse_Graph &&other) = default;
+    Compact_Sparse_Graph &operator=(const Compact_Sparse_Graph &other) = default;
+    Compact_Sparse_Graph &operator=(Compact_Sparse_Graph &&other) = default;
+    virtual ~Compact_Sparse_Graph() = default;
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type &edges)
+        : number_of_vertices(num_vertices_), number_of_edges(static_cast<edge_t>(edges.size())) {
+        static_assert(is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value
+                      || is_edge_list_type<edge_list_type, vertex_idx, edge_t>::value);
+
+        assert((0 <= num_vertices_) && "Number of vertices must be non-negative.");
+        assert((edges.size() < static_cast<size_t>(std::numeric_limits<edge_t>::max()))
+               && "Number of edges must be strictly smaller than the maximally representable number.");
+
+        if constexpr (is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value) {
+            assert(std::all_of(edges.begin(),
+                               edges.end(),
+                               [num_vertices_](const auto &edge) {
+                                   return (0 <= edge.first) && (edge.first < num_vertices_) && (0 <= edge.second)
+                                          && (edge.second < num_vertices_);
+                               })
+                   && "Source and target of edges must be non-negative and less than the number of vertices.");
+        }
+
+        if constexpr (is_edge_list_type_v<edge_list_type, vertex_idx, edge_t>) {
+            assert(std::all_of(edges.begin(),
+                               edges.end(),
+                               [num_vertices_](const auto &edge) {
+                                   return (0 <= edge.source) && (edge.source < num_vertices_) && (0 <= edge.target)
+                                          && (edge.target < num_vertices_);
+                               })
+                   && "Source and target of edges must be non-negative and less than the number of vertices.");
         }
 
-        template<typename RetT = void>
-        std::enable_if_t<use_vert_types, RetT> _update_num_vertex_types() {
-            number_of_vertex_types = static_cast<vertex_type_type>(1);
-            for (const auto vt : vert_types) {
-                number_of_vertex_types = std::max(number_of_vertex_types, vt);
+        if constexpr (keep_vertex_order) {
+            if constexpr (is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value) {
+                assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.first < edge.second; })
+                       && "Vertex order must be a topological order.");
+            }
+            if constexpr (is_edge_list_type_v<edge_list_type, vertex_idx, edge_t>) {
+                assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.source < edge.target; })
+                       && "Vertex order must be a topological order.");
             }
         }
-    
 
-    public:
-        Compact_Sparse_Graph() = default;
-        Compact_Sparse_Graph(const Compact_Sparse_Graph &other) = default;
-        Compact_Sparse_Graph(Compact_Sparse_Graph &&other) = default;
-        Compact_Sparse_Graph &operator=(const Compact_Sparse_Graph &other) = default;
-        Compact_Sparse_Graph &operator=(Compact_Sparse_Graph &&other) = default;
-        virtual ~Compact_Sparse_Graph() = default;
+        if constexpr (use_work_weights) {
+            vert_work_weights = std::vector<vertex_work_weight_type>(num_vertices(), 1);
+        }
+        if constexpr (use_comm_weights) {
+            vert_comm_weights = std::vector<vertex_comm_weight_type>(num_vertices(), 0);
+        }
+        if constexpr (use_mem_weights) {
+            vert_mem_weights = std::vector<vertex_mem_weight_type>(num_vertices(), 0);
+        }
+        if constexpr (use_vert_types) {
+            number_of_vertex_types = 1;
+            vert_types = std::vector<vertex_type_type>(num_vertices(), 0);
+        }
+        if constexpr (!keep_vertex_order) {
+            vertex_permutation_from_internal_to_original.reserve(num_vertices());
+            vertex_permutation_from_original_to_internal.reserve(num_vertices());
+        }
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges) : number_of_vertices(num_vertices_), number_of_edges(static_cast<edge_t>(edges.size())) {
-            static_assert( is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value
-                        || is_edge_list_type<edge_list_type, vertex_idx, edge_t>::value);
-            
-            assert((0 <= num_vertices_) && "Number of vertices must be non-negative.");
-            assert((edges.size() < static_cast<size_t>(std::numeric_limits<edge_t>::max())) && "Number of edges must be strictly smaller than the maximally representable number.");
-            
-            if constexpr ( is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value ) {
-                assert(std::all_of(edges.begin(), edges.end(), [num_vertices_](const auto &edge) { return (0 <= edge.first) && (edge.first < num_vertices_) && (0 <= edge.second) && (edge.second < num_vertices_); } ) && "Source and target of edges must be non-negative and less than the number of vertices.");
-            }
+        // Construction
+        std::vector<std::vector<vertex_idx>> children_tmp(num_vertices());
+        std::vector<edge_t> num_parents_tmp(num_vertices(), 0);
 
-            if constexpr ( is_edge_list_type_v<edge_list_type, vertex_idx, edge_t> ) {
-                assert(std::all_of(edges.begin(), edges.end(), [num_vertices_](const auto &edge) { return (0 <= edge.source) && (edge.source < num_vertices_) && (0 <= edge.target) && (edge.target < num_vertices_); } ) && "Source and target of edges must be non-negative and less than the number of vertices.");
+        if constexpr (is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value) {
+            for (const auto &edge : edges) {
+                children_tmp[edge.first].push_back(edge.second);
+                num_parents_tmp[edge.second]++;
+            }
+        }
+        if constexpr (is_edge_list_type_v<edge_list_type, vertex_idx, edge_t>) {
+            for (const auto &edge : edges) {
+                children_tmp[edge.source].push_back(edge.target);
+                num_parents_tmp[edge.target]++;
             }
+        }
 
-            if constexpr (keep_vertex_order) {
-                if constexpr ( is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value ) {
-                    assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.first < edge.second; } ) && "Vertex order must be a topological order.");
-                }
-                if constexpr ( is_edge_list_type_v<edge_list_type, vertex_idx, edge_t> ) {
-                    assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.source < edge.target; } ) && "Vertex order must be a topological order.");
+        std::vector<vertex_idx> csc_edge_children;
+        csc_edge_children.reserve(num_edges());
+        std::vector<edge_t> csc_source_ptr(num_vertices() + 1);
+        std::vector<vertex_idx> csr_edge_parents(num_edges());
+        std::vector<edge_t> csr_target_ptr;
+        csr_target_ptr.reserve(num_vertices() + 1);
+
+        if constexpr (keep_vertex_order) {
+            for (vertex_idx vert = 0; vert < num_vertices(); ++vert) {
+                csc_source_ptr[vert] = static_cast<edge_t>(csc_edge_children.size());
+
+                std::sort(children_tmp[vert].begin(), children_tmp[vert].end());
+                for (const auto &chld : children_tmp[vert]) {
+                    csc_edge_children.emplace_back(chld);
                 }
             }
+            csc_source_ptr[num_vertices()] = static_cast<edge_t>(csc_edge_children.size());
 
-            if constexpr (use_work_weights) {
-                vert_work_weights = std::vector<vertex_work_weight_type>(num_vertices(), 1);
-            }
-            if constexpr (use_comm_weights) {
-                vert_comm_weights = std::vector<vertex_comm_weight_type>(num_vertices(), 0);
+            csr_target_ptr = std::vector<edge_t>(num_vertices() + 1, 0);
+            for (std::size_t i = 0U; i < num_parents_tmp.size(); ++i) {
+                csr_target_ptr[i + 1] = csr_target_ptr[i] + num_parents_tmp[i];
             }
-            if constexpr (use_mem_weights) {
-                vert_mem_weights = std::vector<vertex_mem_weight_type>(num_vertices(), 0);
-            }
-            if constexpr (use_vert_types) {
-                number_of_vertex_types = 1;
-                vert_types = std::vector<vertex_type_type>(num_vertices(), 0);
-            }
-            if constexpr (!keep_vertex_order) {
-                vertex_permutation_from_internal_to_original.reserve(num_vertices());
-                vertex_permutation_from_original_to_internal.reserve(num_vertices());
+
+            std::vector<edge_t> offset = csr_target_ptr;
+            for (vertex_idx vert = 0; vert < num_vertices(); ++vert) {
+                for (const auto &chld : children_tmp[vert]) {
+                    csr_edge_parents[offset[chld]++] = vert;
+                }
             }
 
-            // Construction
-            std::vector<std::vector<vertex_idx>> children_tmp(num_vertices());
-            std::vector<edge_t> num_parents_tmp(num_vertices(), 0);
+        } else {
+            std::vector<std::vector<vertex_idx>> parents_tmp(num_vertices());
 
-            if constexpr ( is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value ) {
+            if constexpr (is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value) {
                 for (const auto &edge : edges) {
-                    children_tmp[edge.first].push_back(edge.second);
-                    num_parents_tmp[edge.second]++;
+                    parents_tmp[edge.second].push_back(edge.first);
                 }
             }
-            if constexpr ( is_edge_list_type_v<edge_list_type, vertex_idx, edge_t> ) {
+            if constexpr (is_edge_list_type_v<edge_list_type, vertex_idx, edge_t>) {
                 for (const auto &edge : edges) {
-                    children_tmp[edge.source].push_back(edge.target);
-                    num_parents_tmp[edge.target]++;
+                    parents_tmp[edge.target].push_back(edge.source);
                 }
             }
 
-            std::vector<vertex_idx> csc_edge_children;
-            csc_edge_children.reserve(num_edges());
-            std::vector<edge_t> csc_source_ptr(num_vertices() + 1);
-            std::vector<vertex_idx> csr_edge_parents(num_edges());
-            std::vector<edge_t> csr_target_ptr;
-            csr_target_ptr.reserve(num_vertices() + 1);
-
-            if constexpr (keep_vertex_order) {
-                for (vertex_idx vert = 0; vert < num_vertices(); ++vert) {
-                    csc_source_ptr[vert] = static_cast<edge_t>( csc_edge_children.size() );
-                    
-                    std::sort(children_tmp[vert].begin(), children_tmp[vert].end());
-                    for (const auto &chld : children_tmp[vert]) {
-                        csc_edge_children.emplace_back(chld);
-                    }
-                }
-                csc_source_ptr[num_vertices()] = static_cast<edge_t>( csc_edge_children.size() );
+            // Generating modified Gorder topological order cf. "Speedup Graph Processing by Graph Ordering" by Hao Wei, Jeffrey
+            // Xu Yu, Can Lu, and Xuemin Lin
+            const double decay = 8.0;
 
-                csr_target_ptr = std::vector<edge_t>(num_vertices() + 1, 0);
-                for (std::size_t i = 0U; i < num_parents_tmp.size(); ++i) {
-                    csr_target_ptr[i + 1] = csr_target_ptr[i] + num_parents_tmp[i];
-                }
+            std::vector<edge_t> prec_remaining = num_parents_tmp;
+            std::vector<double> priorities(num_vertices(), 0.0);
 
-                std::vector<edge_t> offset = csr_target_ptr;
-                for (vertex_idx vert = 0; vert < num_vertices(); ++vert) {
-                    for (const auto &chld : children_tmp[vert]) {
-                        csr_edge_parents[offset[chld]++] = vert;
-                    }
-                }
-                
-            } else {
-                std::vector<std::vector<vertex_idx>> parents_tmp(num_vertices());
+            auto v_cmp = [&priorities, &children_tmp](const vertex_idx &lhs, const vertex_idx &rhs) {
+                return (priorities[lhs] < priorities[rhs])
+                       || ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() < children_tmp[rhs].size()))
+                       || ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() == children_tmp[rhs].size())
+                           && (lhs > rhs));
+            };
 
-                if constexpr ( is_container_of<edge_list_type, std::pair<vertex_idx, vertex_idx>>::value ) {
-                    for (const auto &edge : edges) {
-                        parents_tmp[edge.second].push_back(edge.first);
-                    }
-                }
-                if constexpr ( is_edge_list_type_v<edge_list_type, vertex_idx, edge_t> ) {
-                    for (const auto &edge : edges) {
-                        parents_tmp[edge.target].push_back(edge.source);
-                    }
+            std::priority_queue<vertex_idx, std::vector<vertex_idx>, decltype(v_cmp)> ready_q(v_cmp);
+            for (vertex_idx vert = 0; vert < num_vertices(); ++vert) {
+                if (prec_remaining[vert] == 0) {
+                    ready_q.push(vert);
                 }
+            }
 
-                // Generating modified Gorder topological order cf. "Speedup Graph Processing by Graph Ordering" by Hao Wei, Jeffrey Xu Yu, Can Lu, and Xuemin Lin
-                const double decay = 8.0;
+            while (!ready_q.empty()) {
+                vertex_idx vert = ready_q.top();
+                ready_q.pop();
 
-                std::vector<edge_t> prec_remaining = num_parents_tmp;
-                std::vector<double> priorities(num_vertices(), 0.0);
+                double pos = static_cast<double>(vertex_permutation_from_internal_to_original.size());
+                pos /= decay;
 
-                auto v_cmp = [&priorities, &children_tmp] (const vertex_idx &lhs, const vertex_idx &rhs) {
-                    return  (priorities[lhs] < priorities[rhs]) ||
-                            ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() < children_tmp[rhs].size())) ||
-                            ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() == children_tmp[rhs].size()) && (lhs > rhs));
-                };
+                vertex_permutation_from_internal_to_original.push_back(vert);
 
-                std::priority_queue<vertex_idx, std::vector<vertex_idx>, decltype(v_cmp)> ready_q(v_cmp);
-                for (vertex_idx vert = 0; vert < num_vertices(); ++vert) {
-                    if (prec_remaining[vert] == 0) {
-                        ready_q.push(vert);
-                    } 
+                // update priorities
+                for (vertex_idx chld : children_tmp[vert]) {
+                    priorities[chld] = log_sum_exp(priorities[chld], pos);
                 }
-
-                while (!ready_q.empty()) {
-                    vertex_idx vert = ready_q.top();
-                    ready_q.pop();
-
-                    double pos = static_cast<double>(vertex_permutation_from_internal_to_original.size());
-                    pos /= decay;
-
-                    vertex_permutation_from_internal_to_original.push_back(vert);
-
-                    // update priorities
-                    for (vertex_idx chld : children_tmp[vert]) {
-                        priorities[chld] = log_sum_exp(priorities[chld], pos);
+                for (vertex_idx par : parents_tmp[vert]) {
+                    for (vertex_idx sibling : children_tmp[par]) {
+                        priorities[sibling] = log_sum_exp(priorities[sibling], pos);
                     }
-                    for (vertex_idx par : parents_tmp[vert]) {
-                        for (vertex_idx sibling : children_tmp[par]) {
-                            priorities[sibling] = log_sum_exp(priorities[sibling], pos);
-                        }
-                    }
-                    for (vertex_idx chld : children_tmp[vert]) {
-                        for (vertex_idx couple : parents_tmp[chld]) {
-                            priorities[couple] = log_sum_exp(priorities[couple], pos);
-                        }
-                    }
-
-                    // update constraints and push to queue
-                    for (vertex_idx chld : children_tmp[vert]) {
-                        --prec_remaining[chld];
-                        if (prec_remaining[chld] == 0) {
-                            ready_q.push(chld);
-                        }
-                    }
-                }
-
-                assert(vertex_permutation_from_internal_to_original.size() == static_cast<size_t>(num_vertices()));
-
-
-                // constructing the csr and csc
-                vertex_permutation_from_original_to_internal = std::vector<vertex_idx>(num_vertices(), 0);
-                for (vertex_idx new_pos = 0; new_pos < num_vertices(); ++new_pos) {
-                    vertex_permutation_from_original_to_internal[vertex_permutation_from_internal_to_original[new_pos]] = new_pos;
                 }
-
-                for (vertex_idx vert_new_pos = 0; vert_new_pos < num_vertices(); ++vert_new_pos) {
-                    csc_source_ptr[vert_new_pos] = static_cast<edge_t>( csc_edge_children.size() );
-
-                    vertex_idx vert_old_name = vertex_permutation_from_internal_to_original[vert_new_pos];
-
-                    std::vector<vertex_idx> children_new_name;
-                    children_new_name.reserve( children_tmp[vert_old_name].size() );
-
-                    for (vertex_idx chld_old_name : children_tmp[vert_old_name]) {
-                        children_new_name.push_back( vertex_permutation_from_original_to_internal[chld_old_name] );
+                for (vertex_idx chld : children_tmp[vert]) {
+                    for (vertex_idx couple : parents_tmp[chld]) {
+                        priorities[couple] = log_sum_exp(priorities[couple], pos);
                     }
-                    
-                    
-                    std::sort(children_new_name.begin(), children_new_name.end());
-                    for (const auto &chld : children_new_name) {
-                        csc_edge_children.emplace_back(chld);
-                    }
-                }
-                csc_source_ptr[num_vertices()] = static_cast<edge_t>( csc_edge_children.size() );
-
-                edge_t acc = 0;
-                for (vertex_idx vert_old_name : vertex_permutation_from_internal_to_original) {
-                    csr_target_ptr.push_back(acc);
-                    acc += num_parents_tmp[vert_old_name];
                 }
-                csr_target_ptr.push_back(acc);
 
-                std::vector<edge_t> offset = csr_target_ptr;
-                for (vertex_idx vert = 0; vert < num_vertices(); ++vert) {
-                    for (edge_t indx = csc_source_ptr[vert]; indx < csc_source_ptr[vert + 1]; ++indx) {
-                        const vertex_idx chld = csc_edge_children[indx];
-                        csr_edge_parents[offset[chld]++] = vert;
+                // update constraints and push to queue
+                for (vertex_idx chld : children_tmp[vert]) {
+                    --prec_remaining[chld];
+                    if (prec_remaining[chld] == 0) {
+                        ready_q.push(chld);
                     }
                 }
             }
 
-            csc_out_edges = Compact_Children_Edges(std::move(csc_edge_children), std::move(csc_source_ptr));
-            csr_in_edges = Compact_Parent_Edges(std::move(csr_edge_parents), std::move(csr_target_ptr));
-        }
+            assert(vertex_permutation_from_internal_to_original.size() == static_cast<size_t>(num_vertices()));
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &ww) : Compact_Sparse_Graph(num_vertices_, edges) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-            assert((ww.size() == static_cast<std::size_t>(num_vertices())) && "Work weights vector must have the same length as the number of vertices.");
-            
-            if constexpr (keep_vertex_order) {
-                vert_work_weights = ww;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
-                }
+            // constructing the csr and csc
+            vertex_permutation_from_original_to_internal = std::vector<vertex_idx>(num_vertices(), 0);
+            for (vertex_idx new_pos = 0; new_pos < num_vertices(); ++new_pos) {
+                vertex_permutation_from_original_to_internal[vertex_permutation_from_internal_to_original[new_pos]] = new_pos;
             }
-        }
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, edge_list_type & edges, const std::vector<vertex_work_weight_type> &&ww) : Compact_Sparse_Graph(num_vertices_, edges) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-            assert((ww.size() == static_cast<std::size_t>(num_vertices())) && "Work weights vector must have the same length as the number of vertices.");
+            for (vertex_idx vert_new_pos = 0; vert_new_pos < num_vertices(); ++vert_new_pos) {
+                csc_source_ptr[vert_new_pos] = static_cast<edge_t>(csc_edge_children.size());
 
-            if constexpr (keep_vertex_order) {
-                vert_work_weights = std::move(ww);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
-                }
-            }
-        }
+                vertex_idx vert_old_name = vertex_permutation_from_internal_to_original[vert_new_pos];
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &ww, const std::vector<vertex_comm_weight_type> &cw) : Compact_Sparse_Graph(num_vertices_, edges) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-            static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
-            assert((ww.size() == static_cast<std::size_t>(num_vertices())) && "Work weights vector must have the same length as the number of vertices.");
-            assert((cw.size() == static_cast<std::size_t>(num_vertices())) && "Communication weights vector must have the same length as the number of vertices.");
+                std::vector<vertex_idx> children_new_name;
+                children_new_name.reserve(children_tmp[vert_old_name].size());
 
-            if constexpr (keep_vertex_order) {
-                vert_work_weights = ww;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
+                for (vertex_idx chld_old_name : children_tmp[vert_old_name]) {
+                    children_new_name.push_back(vertex_permutation_from_original_to_internal[chld_old_name]);
                 }
-            }
 
-            if constexpr (keep_vertex_order) {
-                vert_comm_weights = cw;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
+                std::sort(children_new_name.begin(), children_new_name.end());
+                for (const auto &chld : children_new_name) {
+                    csc_edge_children.emplace_back(chld);
                 }
             }
-        }
-
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, std::vector<vertex_work_weight_type> &&ww, std::vector<vertex_comm_weight_type> &&cw) : Compact_Sparse_Graph(num_vertices_, edges) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-            static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
-            assert((ww.size() == static_cast<std::size_t>(num_vertices())) && "Work weights vector must have the same length as the number of vertices.");
-            assert((cw.size() == static_cast<std::size_t>(num_vertices())) && "Communication weights vector must have the same length as the number of vertices.");
+            csc_source_ptr[num_vertices()] = static_cast<edge_t>(csc_edge_children.size());
 
-            if constexpr (keep_vertex_order) {
-                vert_work_weights = std::move(ww);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
-                }
+            edge_t acc = 0;
+            for (vertex_idx vert_old_name : vertex_permutation_from_internal_to_original) {
+                csr_target_ptr.push_back(acc);
+                acc += num_parents_tmp[vert_old_name];
             }
+            csr_target_ptr.push_back(acc);
 
-            if constexpr (keep_vertex_order) {
-                vert_comm_weights = std::move(cw);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
+            std::vector<edge_t> offset = csr_target_ptr;
+            for (vertex_idx vert = 0; vert < num_vertices(); ++vert) {
+                for (edge_t indx = csc_source_ptr[vert]; indx < csc_source_ptr[vert + 1]; ++indx) {
+                    const vertex_idx chld = csc_edge_children[indx];
+                    csr_edge_parents[offset[chld]++] = vert;
                 }
             }
         }
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &ww, const std::vector<vertex_comm_weight_type> &cw, const std::vector<vertex_mem_weight_type> &mw) : Compact_Sparse_Graph(num_vertices_, edges) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-            static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
-            static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights.");
-            assert((ww.size() == static_cast<std::size_t>(num_vertices())) && "Work weights vector must have the same length as the number of vertices.");
-            assert((cw.size() == static_cast<std::size_t>(num_vertices())) && "Communication weights vector must have the same length as the number of vertices.");
-            assert((mw.size() == static_cast<std::size_t>(num_vertices())) && "Memory weights vector must have the same length as the number of vertices.");
-
-            if constexpr (keep_vertex_order) {
-                vert_work_weights = ww;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
-                }
-            }
-
-            if constexpr (keep_vertex_order) {
-                vert_comm_weights = cw;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
-                }
-            }
-
-            if constexpr (keep_vertex_order) {
-                vert_mem_weights = mw;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]];
-                }
+        csc_out_edges = Compact_Children_Edges(std::move(csc_edge_children), std::move(csc_source_ptr));
+        csr_in_edges = Compact_Parent_Edges(std::move(csr_edge_parents), std::move(csr_target_ptr));
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type &edges, const std::vector<vertex_work_weight_type> &ww)
+        : Compact_Sparse_Graph(num_vertices_, edges) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+        assert((ww.size() == static_cast<std::size_t>(num_vertices()))
+               && "Work weights vector must have the same length as the number of vertices.");
+
+        if constexpr (keep_vertex_order) {
+            vert_work_weights = ww;
+        } else {
+            for (auto vert : vertices()) {
+                vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
             }
         }
-
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, std::vector<vertex_work_weight_type> &&ww, std::vector<vertex_comm_weight_type> &&cw, std::vector<vertex_mem_weight_type> &&mw) : Compact_Sparse_Graph(num_vertices_, edges) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-            static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
-            static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights.");
-            assert((ww.size() == static_cast<std::size_t>(num_vertices())) && "Work weights vector must have the same length as the number of vertices.");
-            assert((cw.size() == static_cast<std::size_t>(num_vertices())) && "Communication weights vector must have the same length as the number of vertices.");
-            assert((mw.size() == static_cast<std::size_t>(num_vertices())) && "Memory weights vector must have the same length as the number of vertices.");
-
-            if constexpr (keep_vertex_order) {
-                vert_work_weights = std::move(ww);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
-                }
-            }
-
-            if constexpr (keep_vertex_order) {
-                vert_comm_weights = std::move(cw);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
-                }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_, edge_list_type &edges, const std::vector<vertex_work_weight_type> &&ww)
+        : Compact_Sparse_Graph(num_vertices_, edges) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+        assert((ww.size() == static_cast<std::size_t>(num_vertices()))
+               && "Work weights vector must have the same length as the number of vertices.");
+
+        if constexpr (keep_vertex_order) {
+            vert_work_weights = std::move(ww);
+        } else {
+            for (auto vert : vertices()) {
+                vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
             }
-
-            if constexpr (keep_vertex_order) {
-                vert_mem_weights = std::move(mw);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]];
-                }
+        }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_,
+                         const edge_list_type &edges,
+                         const std::vector<vertex_work_weight_type> &ww,
+                         const std::vector<vertex_comm_weight_type> &cw)
+        : Compact_Sparse_Graph(num_vertices_, edges) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+        static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
+        assert((ww.size() == static_cast<std::size_t>(num_vertices()))
+               && "Work weights vector must have the same length as the number of vertices.");
+        assert((cw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Communication weights vector must have the same length as the number of vertices.");
+
+        if constexpr (keep_vertex_order) {
+            vert_work_weights = ww;
+        } else {
+            for (auto vert : vertices()) {
+                vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
             }
         }
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &ww, const std::vector<vertex_comm_weight_type> &cw, const std::vector<vertex_mem_weight_type> &mw, const std::vector<vertex_type_type> &vt) : Compact_Sparse_Graph(num_vertices_, edges) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-            static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
-            static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights.");
-            static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types.");
-            assert((ww.size() == static_cast<std::size_t>(num_vertices())) && "Work weights vector must have the same length as the number of vertices.");
-            assert((cw.size() == static_cast<std::size_t>(num_vertices())) && "Communication weights vector must have the same length as the number of vertices.");
-            assert((mw.size() == static_cast<std::size_t>(num_vertices())) && "Memory weights vector must have the same length as the number of vertices.");
-            assert((vt.size() == static_cast<std::size_t>(num_vertices())) && "Vertex type vector must have the same length as the number of vertices.");
-
-            if constexpr (keep_vertex_order) {
-                vert_work_weights = ww;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
-                }
+        if constexpr (keep_vertex_order) {
+            vert_comm_weights = cw;
+        } else {
+            for (auto vert : vertices()) {
+                vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
             }
-
-            if constexpr (keep_vertex_order) {
-                vert_comm_weights = cw;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
-                }
+        }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_,
+                         const edge_list_type &edges,
+                         std::vector<vertex_work_weight_type> &&ww,
+                         std::vector<vertex_comm_weight_type> &&cw)
+        : Compact_Sparse_Graph(num_vertices_, edges) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+        static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
+        assert((ww.size() == static_cast<std::size_t>(num_vertices()))
+               && "Work weights vector must have the same length as the number of vertices.");
+        assert((cw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Communication weights vector must have the same length as the number of vertices.");
+
+        if constexpr (keep_vertex_order) {
+            vert_work_weights = std::move(ww);
+        } else {
+            for (auto vert : vertices()) {
+                vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
             }
+        }
 
-            if constexpr (keep_vertex_order) {
-                vert_mem_weights = mw;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]];
-                }
+        if constexpr (keep_vertex_order) {
+            vert_comm_weights = std::move(cw);
+        } else {
+            for (auto vert : vertices()) {
+                vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
             }
-
-            if constexpr (keep_vertex_order) {
-                vert_types = vt;
-            } else {
-                for (auto vert : vertices()) {
-                    vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]];
-                }
+        }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_,
+                         const edge_list_type &edges,
+                         const std::vector<vertex_work_weight_type> &ww,
+                         const std::vector<vertex_comm_weight_type> &cw,
+                         const std::vector<vertex_mem_weight_type> &mw)
+        : Compact_Sparse_Graph(num_vertices_, edges) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+        static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
+        static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights.");
+        assert((ww.size() == static_cast<std::size_t>(num_vertices()))
+               && "Work weights vector must have the same length as the number of vertices.");
+        assert((cw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Communication weights vector must have the same length as the number of vertices.");
+        assert((mw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Memory weights vector must have the same length as the number of vertices.");
+
+        if constexpr (keep_vertex_order) {
+            vert_work_weights = ww;
+        } else {
+            for (auto vert : vertices()) {
+                vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
             }
         }
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, std::vector<vertex_work_weight_type> &&ww, std::vector<vertex_comm_weight_type> &&cw, std::vector<vertex_mem_weight_type> &&mw, std::vector<vertex_type_type> &&vt) : Compact_Sparse_Graph(num_vertices_, edges) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-            static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
-            static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights.");
-            static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types.");
-            assert((ww.size() == static_cast<std::size_t>(num_vertices())) && "Work weights vector must have the same length as the number of vertices.");
-            assert((cw.size() == static_cast<std::size_t>(num_vertices())) && "Communication weights vector must have the same length as the number of vertices.");
-            assert((mw.size() == static_cast<std::size_t>(num_vertices())) && "Memory weights vector must have the same length as the number of vertices.");
-            assert((vt.size() == static_cast<std::size_t>(num_vertices())) && "Vertex type vector must have the same length as the number of vertices.");
-
-            if constexpr (keep_vertex_order) {
-                vert_work_weights = std::move(ww);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
-                }
+        if constexpr (keep_vertex_order) {
+            vert_comm_weights = cw;
+        } else {
+            for (auto vert : vertices()) {
+                vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
             }
+        }
 
-            if constexpr (keep_vertex_order) {
-                vert_comm_weights = std::move(cw);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
-                }
+        if constexpr (keep_vertex_order) {
+            vert_mem_weights = mw;
+        } else {
+            for (auto vert : vertices()) {
+                vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]];
             }
-
-            if constexpr (keep_vertex_order) {
-                vert_mem_weights = std::move(mw);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]];
-                }
+        }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_,
+                         const edge_list_type &edges,
+                         std::vector<vertex_work_weight_type> &&ww,
+                         std::vector<vertex_comm_weight_type> &&cw,
+                         std::vector<vertex_mem_weight_type> &&mw)
+        : Compact_Sparse_Graph(num_vertices_, edges) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+        static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
+        static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights.");
+        assert((ww.size() == static_cast<std::size_t>(num_vertices()))
+               && "Work weights vector must have the same length as the number of vertices.");
+        assert((cw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Communication weights vector must have the same length as the number of vertices.");
+        assert((mw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Memory weights vector must have the same length as the number of vertices.");
+
+        if constexpr (keep_vertex_order) {
+            vert_work_weights = std::move(ww);
+        } else {
+            for (auto vert : vertices()) {
+                vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
             }
+        }
 
-            if constexpr (keep_vertex_order) {
-                vert_types = std::move(vt);
-            } else {
-                for (auto vert : vertices()) {
-                    vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]];
-                }
+        if constexpr (keep_vertex_order) {
+            vert_comm_weights = std::move(cw);
+        } else {
+            for (auto vert : vertices()) {
+                vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
             }
         }
 
-        template <typename Graph_type>
-        Compact_Sparse_Graph(const Graph_type  & graph) : Compact_Sparse_Graph(graph.num_vertices(), edge_view(graph)) {
-            static_assert(is_directed_graph_v<Graph_type>);
-
-            if constexpr (is_computational_dag_v<Graph_type> && use_work_weights) {
-                for (const auto &vert : graph.vertices()) {
-                    set_vertex_work_weight(vert, graph.vertex_work_weight(vert));
-                }
+        if constexpr (keep_vertex_order) {
+            vert_mem_weights = std::move(mw);
+        } else {
+            for (auto vert : vertices()) {
+                vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]];
             }
-
-            if constexpr (is_computational_dag_v<Graph_type> && use_comm_weights) {
-                for (const auto &vert : graph.vertices()) {
-                    set_vertex_comm_weight(vert, graph.vertex_comm_weight(vert));
-                }
+        }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_,
+                         const edge_list_type &edges,
+                         const std::vector<vertex_work_weight_type> &ww,
+                         const std::vector<vertex_comm_weight_type> &cw,
+                         const std::vector<vertex_mem_weight_type> &mw,
+                         const std::vector<vertex_type_type> &vt)
+        : Compact_Sparse_Graph(num_vertices_, edges) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+        static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
+        static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights.");
+        static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types.");
+        assert((ww.size() == static_cast<std::size_t>(num_vertices()))
+               && "Work weights vector must have the same length as the number of vertices.");
+        assert((cw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Communication weights vector must have the same length as the number of vertices.");
+        assert((mw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Memory weights vector must have the same length as the number of vertices.");
+        assert((vt.size() == static_cast<std::size_t>(num_vertices()))
+               && "Vertex type vector must have the same length as the number of vertices.");
+
+        if constexpr (keep_vertex_order) {
+            vert_work_weights = ww;
+        } else {
+            for (auto vert : vertices()) {
+                vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
             }
+        }
 
-            if constexpr (is_computational_dag_v<Graph_type> && use_mem_weights) {
-                for (const auto &vert : graph.vertices()) {
-                    set_vertex_mem_weight(vert, graph.vertex_mem_weight(vert));
-                }
+        if constexpr (keep_vertex_order) {
+            vert_comm_weights = cw;
+        } else {
+            for (auto vert : vertices()) {
+                vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
             }
+        }
 
-            if constexpr (is_computational_dag_typed_vertices_v<Graph_type> && use_vert_types) {
-                for (const auto &vert : graph.vertices()) {
-                    set_vertex_type(vert, graph.vertex_type(vert));
-                }
+        if constexpr (keep_vertex_order) {
+            vert_mem_weights = mw;
+        } else {
+            for (auto vert : vertices()) {
+                vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]];
             }
         }
 
-        inline auto vertices() const { return integral_range<vertex_idx>(number_of_vertices); };
-
-        inline vert_t num_vertices() const { return number_of_vertices; };
-        inline edge_t num_edges() const { return number_of_edges; }
-
-        inline auto parents(const vertex_idx &v) const { return csr_in_edges.parents(v); };
-        inline auto children(const vertex_idx &v) const { return csc_out_edges.children(v); };
-
-        inline edge_t in_degree(const vertex_idx &v) const {
-            return csr_in_edges.number_of_parents(v);
-        };
-        inline edge_t out_degree(const vertex_idx &v) const {
-            return csc_out_edges.number_of_children(v);
-        };
-
-        template<typename RetT = vertex_work_weight_type>
-        inline std::enable_if_t<use_work_weights, RetT> vertex_work_weight(const vertex_idx &v) const {
-            return vert_work_weights[v];
+        if constexpr (keep_vertex_order) {
+            vert_types = vt;
+        } else {
+            for (auto vert : vertices()) {
+                vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]];
+            }
         }
-        template<typename RetT = vertex_work_weight_type>
-        inline std::enable_if_t<not use_work_weights, RetT> vertex_work_weight(const vertex_idx &v) const {
-            return static_cast<RetT>(1) + in_degree(v);
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph(vertex_idx num_vertices_,
+                         const edge_list_type &edges,
+                         std::vector<vertex_work_weight_type> &&ww,
+                         std::vector<vertex_comm_weight_type> &&cw,
+                         std::vector<vertex_mem_weight_type> &&mw,
+                         std::vector<vertex_type_type> &&vt)
+        : Compact_Sparse_Graph(num_vertices_, edges) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+        static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights.");
+        static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights.");
+        static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types.");
+        assert((ww.size() == static_cast<std::size_t>(num_vertices()))
+               && "Work weights vector must have the same length as the number of vertices.");
+        assert((cw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Communication weights vector must have the same length as the number of vertices.");
+        assert((mw.size() == static_cast<std::size_t>(num_vertices()))
+               && "Memory weights vector must have the same length as the number of vertices.");
+        assert((vt.size() == static_cast<std::size_t>(num_vertices()))
+               && "Vertex type vector must have the same length as the number of vertices.");
+
+        if constexpr (keep_vertex_order) {
+            vert_work_weights = std::move(ww);
+        } else {
+            for (auto vert : vertices()) {
+                vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]];
+            }
         }
 
-        template<typename RetT = vertex_comm_weight_type>
-        inline std::enable_if_t<use_comm_weights, RetT> vertex_comm_weight(const vertex_idx &v) const {
-            return vert_comm_weights[v];
-        }
-        template<typename RetT = vertex_comm_weight_type>
-        inline std::enable_if_t<not use_comm_weights, RetT> vertex_comm_weight(const vertex_idx) const {
-            return static_cast<RetT>(0);
+        if constexpr (keep_vertex_order) {
+            vert_comm_weights = std::move(cw);
+        } else {
+            for (auto vert : vertices()) {
+                vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]];
+            }
         }
 
-        template<typename RetT = vertex_mem_weight_type>
-        inline std::enable_if_t<use_mem_weights, RetT> vertex_mem_weight(const vertex_idx &v) const {
-            return vert_mem_weights[v];
-        }
-        template<typename RetT = vertex_mem_weight_type>
-        inline std::enable_if_t<not use_mem_weights, RetT> vertex_mem_weight(const vertex_idx) const {
-            return static_cast<RetT>(0);
+        if constexpr (keep_vertex_order) {
+            vert_mem_weights = std::move(mw);
+        } else {
+            for (auto vert : vertices()) {
+                vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]];
+            }
         }
 
-        template<typename RetT = vertex_type_type>
-        inline std::enable_if_t<use_vert_types, RetT> vertex_type(const vertex_idx &v) const {
-            return vert_types[v];
-        }
-        template<typename RetT = vertex_type_type>
-        inline std::enable_if_t<not use_vert_types, RetT> vertex_type(const vertex_idx) const {
-            return static_cast<RetT>(0);
+        if constexpr (keep_vertex_order) {
+            vert_types = std::move(vt);
+        } else {
+            for (auto vert : vertices()) {
+                vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]];
+            }
         }
+    }
 
-        inline vertex_type_type num_vertex_types() const { return number_of_vertex_types; };
+    template <typename Graph_type>
+    Compact_Sparse_Graph(const Graph_type &graph) : Compact_Sparse_Graph(graph.num_vertices(), edge_view(graph)) {
+        static_assert(is_directed_graph_v<Graph_type>);
 
-        template<typename RetT = void>
-        inline std::enable_if_t<use_work_weights, RetT> set_vertex_work_weight(const vertex_idx &v, const vertex_work_weight_type work_weight) {
-            if constexpr (keep_vertex_order) {
-                vert_work_weights[v] = work_weight;
-            } else {
-                vert_work_weights[vertex_permutation_from_original_to_internal[v]] = work_weight;
+        if constexpr (is_computational_dag_v<Graph_type> && use_work_weights) {
+            for (const auto &vert : graph.vertices()) {
+                set_vertex_work_weight(vert, graph.vertex_work_weight(vert));
             }
         }
-        template<typename RetT = void>
-        inline std::enable_if_t<not use_work_weights, RetT> set_vertex_work_weight(const vertex_idx &v, const vertex_work_weight_type work_weight) {
-            static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
-        }
 
-        template<typename RetT = void>
-        inline std::enable_if_t<use_comm_weights, RetT> set_vertex_comm_weight(const vertex_idx &v, const vertex_comm_weight_type comm_weight) {
-            if constexpr (keep_vertex_order) {
-                vert_comm_weights[v] = comm_weight;
-            } else {
-                vert_comm_weights[vertex_permutation_from_original_to_internal[v]] = comm_weight;
+        if constexpr (is_computational_dag_v<Graph_type> && use_comm_weights) {
+            for (const auto &vert : graph.vertices()) {
+                set_vertex_comm_weight(vert, graph.vertex_comm_weight(vert));
             }
         }
-        template<typename RetT = void>
-        inline std::enable_if_t<not use_comm_weights, RetT> set_vertex_comm_weight(const vertex_idx &v, const vertex_comm_weight_type comm_weight) {
-            static_assert(use_comm_weights, "To set comm weight, graph type must allow comm weights.");
-        }
-        
-        template<typename RetT = void>
-        inline std::enable_if_t<use_mem_weights, RetT> set_vertex_mem_weight(const vertex_idx &v, const vertex_mem_weight_type mem_weight) {
-            if constexpr (keep_vertex_order) {
-                vert_mem_weights[v] = mem_weight;
-            } else {
-                vert_mem_weights[vertex_permutation_from_original_to_internal[v]] = mem_weight;
+
+        if constexpr (is_computational_dag_v<Graph_type> && use_mem_weights) {
+            for (const auto &vert : graph.vertices()) {
+                set_vertex_mem_weight(vert, graph.vertex_mem_weight(vert));
             }
         }
-        template<typename RetT = void>
-        inline std::enable_if_t<not use_mem_weights, RetT> set_vertex_mem_weight(const vertex_idx &v, const vertex_mem_weight_type mem_weight) {
-            static_assert(use_mem_weights, "To set mem weight, graph type must allow mem weights.");
-        }
-        
-        template<typename RetT = void>
-        inline std::enable_if_t<use_vert_types, RetT> set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) {
-            if constexpr (keep_vertex_order) {
-                vert_types[v] = vertex_type_;
-            } else {
-                vert_types[vertex_permutation_from_original_to_internal[v]] = vertex_type_;
+
+        if constexpr (is_computational_dag_typed_vertices_v<Graph_type> && use_vert_types) {
+            for (const auto &vert : graph.vertices()) {
+                set_vertex_type(vert, graph.vertex_type(vert));
             }
-            number_of_vertex_types = std::max(number_of_vertex_types, vertex_type_);
-        }
-        template<typename RetT = void>
-        inline std::enable_if_t<not use_vert_types, RetT> set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) {
-            static_assert(use_vert_types, "To set vert type, graph type must allow vertex types.");
         }
+    }
 
-        template<typename RetT = const std::vector<vertex_idx> &>
-        inline std::enable_if_t<keep_vertex_order, RetT> get_pullback_permutation() const {
-            static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function.");
-            return {};
-        }
+    inline auto vertices() const { return integral_range<vertex_idx>(number_of_vertices); };
 
-        template<typename RetT = const std::vector<vertex_idx> &>
-        inline std::enable_if_t<not keep_vertex_order, RetT> get_pullback_permutation() const {
-            return vertex_permutation_from_internal_to_original;
-        }
+    inline vert_t num_vertices() const { return number_of_vertices; };
 
-        template<typename RetT = const std::vector<vertex_idx> &>
-        inline std::enable_if_t<keep_vertex_order, RetT> get_pushforward_permutation() const {
-            static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function.");
-            return {};
-        }
+    inline edge_t num_edges() const { return number_of_edges; }
 
-        template<typename RetT = const std::vector<vertex_idx> &>
-        inline std::enable_if_t<not keep_vertex_order, RetT> get_pushforward_permutation() const {
-            return vertex_permutation_from_original_to_internal;
-        }
-};
+    inline auto parents(const vertex_idx &v) const { return csr_in_edges.parents(v); };
+
+    inline auto children(const vertex_idx &v) const { return csc_out_edges.children(v); };
 
+    inline edge_t in_degree(const vertex_idx &v) const { return csr_in_edges.number_of_parents(v); };
 
+    inline edge_t out_degree(const vertex_idx &v) const { return csc_out_edges.number_of_children(v); };
 
+    template <typename RetT = vertex_work_weight_type>
+    inline std::enable_if_t<use_work_weights, RetT> vertex_work_weight(const vertex_idx &v) const {
+        return vert_work_weights[v];
+    }
 
+    template <typename RetT = vertex_work_weight_type>
+    inline std::enable_if_t<not use_work_weights, RetT> vertex_work_weight(const vertex_idx &v) const {
+        return static_cast<RetT>(1) + in_degree(v);
+    }
 
-template<bool keep_vertex_order, bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename vertex_type_template_type>
-struct is_Compact_Sparse_Graph<Compact_Sparse_Graph<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type>, void> : std::true_type {};
+    template <typename RetT = vertex_comm_weight_type>
+    inline std::enable_if_t<use_comm_weights, RetT> vertex_comm_weight(const vertex_idx &v) const {
+        return vert_comm_weights[v];
+    }
 
-template<bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename vertex_type_template_type>
-struct is_Compact_Sparse_Graph_reorder<Compact_Sparse_Graph<false, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type>, void> : std::true_type {};
+    template <typename RetT = vertex_comm_weight_type>
+    inline std::enable_if_t<not use_comm_weights, RetT> vertex_comm_weight(const vertex_idx) const {
+        return static_cast<RetT>(0);
+    }
 
+    template <typename RetT = vertex_mem_weight_type>
+    inline std::enable_if_t<use_mem_weights, RetT> vertex_mem_weight(const vertex_idx &v) const {
+        return vert_mem_weights[v];
+    }
 
+    template <typename RetT = vertex_mem_weight_type>
+    inline std::enable_if_t<not use_mem_weights, RetT> vertex_mem_weight(const vertex_idx) const {
+        return static_cast<RetT>(0);
+    }
 
+    template <typename RetT = vertex_type_type>
+    inline std::enable_if_t<use_vert_types, RetT> vertex_type(const vertex_idx &v) const {
+        return vert_types[v];
+    }
 
+    template <typename RetT = vertex_type_type>
+    inline std::enable_if_t<not use_vert_types, RetT> vertex_type(const vertex_idx) const {
+        return static_cast<RetT>(0);
+    }
 
+    inline vertex_type_type num_vertex_types() const { return number_of_vertex_types; };
+
+    template <typename RetT = void>
+    inline std::enable_if_t<use_work_weights, RetT> set_vertex_work_weight(const vertex_idx &v,
+                                                                           const vertex_work_weight_type work_weight) {
+        if constexpr (keep_vertex_order) {
+            vert_work_weights[v] = work_weight;
+        } else {
+            vert_work_weights[vertex_permutation_from_original_to_internal[v]] = work_weight;
+        }
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<not use_work_weights, RetT> set_vertex_work_weight(const vertex_idx &v,
+                                                                               const vertex_work_weight_type work_weight) {
+        static_assert(use_work_weights, "To set work weight, graph type must allow work weights.");
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<use_comm_weights, RetT> set_vertex_comm_weight(const vertex_idx &v,
+                                                                           const vertex_comm_weight_type comm_weight) {
+        if constexpr (keep_vertex_order) {
+            vert_comm_weights[v] = comm_weight;
+        } else {
+            vert_comm_weights[vertex_permutation_from_original_to_internal[v]] = comm_weight;
+        }
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<not use_comm_weights, RetT> set_vertex_comm_weight(const vertex_idx &v,
+                                                                               const vertex_comm_weight_type comm_weight) {
+        static_assert(use_comm_weights, "To set comm weight, graph type must allow comm weights.");
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<use_mem_weights, RetT> set_vertex_mem_weight(const vertex_idx &v,
+                                                                         const vertex_mem_weight_type mem_weight) {
+        if constexpr (keep_vertex_order) {
+            vert_mem_weights[v] = mem_weight;
+        } else {
+            vert_mem_weights[vertex_permutation_from_original_to_internal[v]] = mem_weight;
+        }
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<not use_mem_weights, RetT> set_vertex_mem_weight(const vertex_idx &v,
+                                                                             const vertex_mem_weight_type mem_weight) {
+        static_assert(use_mem_weights, "To set mem weight, graph type must allow mem weights.");
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<use_vert_types, RetT> set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) {
+        if constexpr (keep_vertex_order) {
+            vert_types[v] = vertex_type_;
+        } else {
+            vert_types[vertex_permutation_from_original_to_internal[v]] = vertex_type_;
+        }
+        number_of_vertex_types = std::max(number_of_vertex_types, vertex_type_);
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<not use_vert_types, RetT> set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) {
+        static_assert(use_vert_types, "To set vert type, graph type must allow vertex types.");
+    }
+
+    template <typename RetT = const std::vector<vertex_idx> &>
+    inline std::enable_if_t<keep_vertex_order, RetT> get_pullback_permutation() const {
+        static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function.");
+        return {};
+    }
+
+    template <typename RetT = const std::vector<vertex_idx> &>
+    inline std::enable_if_t<not keep_vertex_order, RetT> get_pullback_permutation() const {
+        return vertex_permutation_from_internal_to_original;
+    }
+
+    template <typename RetT = const std::vector<vertex_idx> &>
+    inline std::enable_if_t<keep_vertex_order, RetT> get_pushforward_permutation() const {
+        static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function.");
+        return {};
+    }
+
+    template <typename RetT = const std::vector<vertex_idx> &>
+    inline std::enable_if_t<not keep_vertex_order, RetT> get_pushforward_permutation() const {
+        return vertex_permutation_from_original_to_internal;
+    }
+};
 
+template <bool keep_vertex_order,
+          bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename vertex_type_template_type>
+struct is_Compact_Sparse_Graph<Compact_Sparse_Graph<keep_vertex_order,
+                                                    use_work_weights,
+                                                    use_comm_weights,
+                                                    use_mem_weights,
+                                                    use_vert_types,
+                                                    vert_t,
+                                                    edge_t,
+                                                    work_weight_type,
+                                                    comm_weight_type,
+                                                    mem_weight_type,
+                                                    vertex_type_template_type>,
+                               void> : std::true_type {};
+
+template <bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename vertex_type_template_type>
+struct is_Compact_Sparse_Graph_reorder<Compact_Sparse_Graph<false,
+                                                            use_work_weights,
+                                                            use_comm_weights,
+                                                            use_mem_weights,
+                                                            use_vert_types,
+                                                            vert_t,
+                                                            edge_t,
+                                                            work_weight_type,
+                                                            comm_weight_type,
+                                                            mem_weight_type,
+                                                            vertex_type_template_type>,
+                                       void> : std::true_type {};
 
 static_assert(is_Compact_Sparse_Graph_v<Compact_Sparse_Graph<true>>);
 static_assert(is_Compact_Sparse_Graph_v<Compact_Sparse_Graph<false>>);
 static_assert(!is_Compact_Sparse_Graph_reorder_v<Compact_Sparse_Graph<true>>);
 static_assert(is_Compact_Sparse_Graph_reorder_v<Compact_Sparse_Graph<false>>);
 
+static_assert(has_vertex_weights_v<Compact_Sparse_Graph<true, true>>,
+              "Compact_Sparse_Graph must satisfy the has_vertex_weights concept");
 
+static_assert(has_vertex_weights_v<Compact_Sparse_Graph<false, true>>,
+              "Compact_Sparse_Graph must satisfy the has_vertex_weights concept");
 
+static_assert(is_directed_graph_v<Compact_Sparse_Graph<false, false, false, false, false>>,
+              "Compact_Sparse_Graph must satisfy the directed_graph concept");
 
-static_assert(has_vertex_weights_v<Compact_Sparse_Graph<true, true>>, 
-    "Compact_Sparse_Graph must satisfy the has_vertex_weights concept");
+static_assert(is_directed_graph_v<Compact_Sparse_Graph<false, true, true, true, true>>,
+              "Compact_Sparse_Graph must satisfy the directed_graph concept");
 
-static_assert(has_vertex_weights_v<Compact_Sparse_Graph<false, true>>, 
-    "Compact_Sparse_Graph must satisfy the has_vertex_weights concept");
+static_assert(is_directed_graph_v<Compact_Sparse_Graph<true, false, false, false, false>>,
+              "Compact_Sparse_Graph must satisfy the directed_graph concept");
 
-static_assert(is_directed_graph_v<Compact_Sparse_Graph<false, false, false, false, false>>, 
-    "Compact_Sparse_Graph must satisfy the directed_graph concept");
+static_assert(is_directed_graph_v<Compact_Sparse_Graph<true, true, true, true, true>>,
+              "Compact_Sparse_Graph must satisfy the directed_graph concept");
 
-static_assert(is_directed_graph_v<Compact_Sparse_Graph<false, true, true, true, true>>, 
-    "Compact_Sparse_Graph must satisfy the directed_graph concept");
+static_assert(is_computational_dag_v<Compact_Sparse_Graph<false, true, true, true, false>>,
+              "Compact_Sparse_Graph must satisfy the is_computation_dag concept");
 
-static_assert(is_directed_graph_v<Compact_Sparse_Graph<true, false, false, false, false>>, 
-    "Compact_Sparse_Graph must satisfy the directed_graph concept");
-
-static_assert(is_directed_graph_v<Compact_Sparse_Graph<true, true, true, true, true>>, 
-    "Compact_Sparse_Graph must satisfy the directed_graph concept");
-
-static_assert(is_computational_dag_v<Compact_Sparse_Graph<false, true, true, true, false>>, 
-    "Compact_Sparse_Graph must satisfy the is_computation_dag concept");
-
-static_assert(is_computational_dag_v<Compact_Sparse_Graph<true, true, true, true, false>>, 
-    "Compact_Sparse_Graph must satisfy the is_computation_dag concept");
+static_assert(is_computational_dag_v<Compact_Sparse_Graph<true, true, true, true, false>>,
+              "Compact_Sparse_Graph must satisfy the is_computation_dag concept");
 
 static_assert(is_computational_dag_typed_vertices_v<Compact_Sparse_Graph<false, true, true, true, true>>,
-    "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept");
+              "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept");
 
 static_assert(is_computational_dag_typed_vertices_v<Compact_Sparse_Graph<true, true, true, true, true>>,
-    "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept");
+              "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept");
 
-static_assert(is_direct_constructable_cdag_v<Compact_Sparse_Graph<true, true>>, 
-    "Compact_Sparse_Graph must be directly constructable");
+static_assert(is_direct_constructable_cdag_v<Compact_Sparse_Graph<true, true>>,
+              "Compact_Sparse_Graph must be directly constructable");
 
-static_assert(is_direct_constructable_cdag_v<Compact_Sparse_Graph<false, true>>, 
-    "Compact_Sparse_Graph must be directly constructable");
+static_assert(is_direct_constructable_cdag_v<Compact_Sparse_Graph<false, true>>,
+              "Compact_Sparse_Graph must be directly constructable");
 
 using CSG = Compact_Sparse_Graph<false, true, true, true, true, std::size_t, std::size_t, unsigned, unsigned, unsigned, unsigned>;
 
-static_assert(is_directed_graph_edge_desc_v<CSG>,
-              "CSG must satisfy the directed_graph_edge_desc concept");
-
-
-
-
-
-
+static_assert(is_directed_graph_edge_desc_v<CSG>, "CSG must satisfy the directed_graph_edge_desc concept");
 
 // // Graph specific implementations
 
-// template<typename Graph_t_in, typename v_work_acc_method, typename v_comm_acc_method, typename v_mem_acc_method, typename e_comm_acc_method,
-//          bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_vert_types, typename vert_t, typename edge_t,
-//          typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename vertex_type_template_type>
+// template<typename Graph_t_in, typename v_work_acc_method, typename v_comm_acc_method, typename v_mem_acc_method, typename
+// e_comm_acc_method,
+//          bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_vert_types, typename vert_t, typename
+//          edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename
+//          vertex_type_template_type>
 // bool coarser_util::construct_coarse_dag(
 //             const Graph_t_in &dag_in,
-//             Compact_Sparse_Graph<false, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type> &coarsened_dag,
-//             std::vector<vertex_idx_t<Compact_Sparse_Graph<false, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type>>> &vertex_contraction_map) {
+//             Compact_Sparse_Graph<false, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t,
+//             work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type> &coarsened_dag,
+//             std::vector<vertex_idx_t<Compact_Sparse_Graph<false, use_work_weights, use_comm_weights, use_mem_weights,
+//             use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type>>>
+//             &vertex_contraction_map) {
 
-//     using Graph_out_type = Compact_Sparse_Graph<false, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type>;
+//     using Graph_out_type = Compact_Sparse_Graph<false, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types,
+//     vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type>;
 
-//     static_assert(is_directed_graph_v<Graph_t_in> && is_directed_graph_v<Graph_out_type>, "Graph types need to satisfy the is_directed_graph concept.");
-//     static_assert(is_computational_dag_v<Graph_t_in>, "Graph_t_in must be a computational DAG");
-//     static_assert(is_constructable_cdag_v<Graph_out_type> || is_direct_constructable_cdag_v<Graph_out_type>, "Graph_out_type must be a (direct) constructable computational DAG");
+//     static_assert(is_directed_graph_v<Graph_t_in> && is_directed_graph_v<Graph_out_type>, "Graph types need to satisfy the
+//     is_directed_graph concept."); static_assert(is_computational_dag_v<Graph_t_in>, "Graph_t_in must be a computational DAG");
+//     static_assert(is_constructable_cdag_v<Graph_out_type> || is_direct_constructable_cdag_v<Graph_out_type>, "Graph_out_type
+//     must be a (direct) constructable computational DAG");
 
 //     assert(check_valid_contraction_map<Graph_out_type>(vertex_contraction_map));
 
-
 //     const vertex_idx_t<Graph_out_type> num_vert_quotient =
 //         (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1;
 
@@ -906,9 +1086,10 @@ static_assert(is_directed_graph_edge_desc_v<CSG>,
 //     }
 
 //     if constexpr (has_vertex_weights_v<Graph_t_in> && is_modifiable_cdag_vertex_v<Graph_out_type>) {
-//         static_assert(std::is_same_v<v_workw_t<Graph_t_in>, v_workw_t<Graph_out_type>>, "Work weight types of in-graph and out-graph must be the same.");
-//         static_assert(std::is_same_v<v_commw_t<Graph_t_in>, v_commw_t<Graph_out_type>>, "Vertex communication types of in-graph and out-graph must be the same.");
-//         static_assert(std::is_same_v<v_memw_t<Graph_t_in>, v_memw_t<Graph_out_type>>, "Memory weight types of in-graph and out-graph must be the same.");
+//         static_assert(std::is_same_v<v_workw_t<Graph_t_in>, v_workw_t<Graph_out_type>>, "Work weight types of in-graph and
+//         out-graph must be the same."); static_assert(std::is_same_v<v_commw_t<Graph_t_in>, v_commw_t<Graph_out_type>>, "Vertex
+//         communication types of in-graph and out-graph must be the same."); static_assert(std::is_same_v<v_memw_t<Graph_t_in>,
+//         v_memw_t<Graph_out_type>>, "Memory weight types of in-graph and out-graph must be the same.");
 
 //         for (const vertex_idx_t<Graph_t_in> &vert : coarsened_dag.vertices()) {
 //             coarsened_dag.set_vertex_work_weight(vert, 0);
@@ -951,31 +1132,7 @@ static_assert(is_directed_graph_edge_desc_v<CSG>,
 
 //     std::cout << "Specific Template construct coarsen dag" << std::endl;
 
-    
 //     return true;
 // };
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp
index c455b582..68b8b967 100644
--- a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp
@@ -17,361 +17,698 @@ limitations under the License.
 */
 #pragma once
 
-#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 #include "osp/concepts/directed_graph_edge_desc_concept.hpp"
+#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 
 namespace osp {
 
-template<bool keep_vertex_order, bool use_work_weights = false, bool use_comm_weights = false, bool use_mem_weights = false, bool use_edge_comm_weights = false, bool use_vert_types = false, typename vert_t = std::size_t, typename edge_t = std::size_t, typename work_weight_type = unsigned, typename comm_weight_type = unsigned, typename mem_weight_type = unsigned, typename e_comm_weight_type = unsigned, typename vertex_type_template_type = unsigned>
-class Compact_Sparse_Graph_EdgeDesc : public Compact_Sparse_Graph<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type> {
-    private:
-        using ThisT = Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>;
-        using BaseT = Compact_Sparse_Graph<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, vertex_type_template_type>;
-    
-    public:
-        using vertex_idx = typename BaseT::vertex_idx;
-
-        using vertex_work_weight_type   = typename BaseT::vertex_work_weight_type;
-        using vertex_comm_weight_type   = typename BaseT::vertex_comm_weight_type;
-        using vertex_mem_weight_type    = typename BaseT::vertex_mem_weight_type;
-        using vertex_type_type          = typename BaseT::vertex_type_type;
-
-        using directed_edge_descriptor  = edge_t;
-        using edge_comm_weight_type     = e_comm_weight_type;
-
-    protected:
-        std::vector<edge_comm_weight_type> edge_comm_weights;
-
-        class In_Edges_range {
-            private:
-                const vertex_idx tgt_vert;
-                const typename BaseT::Compact_Parent_Edges::Parent_range par_range;
-                const typename BaseT::Compact_Children_Edges &csc_out_edges;
-
-                class In_Edges_iterator {
-                    public:
-                        using iterator_category = std::bidirectional_iterator_tag;
-                        using difference_type = std::ptrdiff_t;
-                        using value_type = edge_t;
-                        using pointer = vertex_idx *;
-                        using reference = edge_t &;
-
-                    private:
-                        const vertex_idx target_vert;
-                        const typename BaseT::Compact_Children_Edges &csc_out_edges;
-
-                        typename std::vector<vertex_idx>::const_iterator current;
-
-                    public:
-                        In_Edges_iterator(const vertex_idx &target_vert_, const typename BaseT::Compact_Children_Edges &csc_out_edges_, const typename std::vector<vertex_idx>::const_iterator start_) : target_vert(target_vert_), csc_out_edges(csc_out_edges_), current(start_) { };
-                        In_Edges_iterator(const In_Edges_iterator &other) : target_vert(other.target_vert), csc_out_edges(other.csc_out_edges), current(other.current) { };
-                        In_Edges_iterator &operator=(const In_Edges_iterator &other) {
-                            if (this != &other) {
-                                target_vert = other.target_vert;
-                                csc_out_edges = other.csc_out_edges;
-                                current = other.current;
-                            }
-                            return *this;
-                        };
-
-                        inline value_type operator*() const {
-                            const vertex_idx src_vert = *current;
-                            typename BaseT::Compact_Children_Edges::Children_range range = csc_out_edges.children(src_vert);
-
-                            assert(std::binary_search(range.cbegin(), range.cend(), target_vert));
-                            auto it = std::lower_bound(range.cbegin(), range.cend(), target_vert);
-                            
-                            edge_t diff = static_cast<edge_t>( std::distance(range.cbegin(), it) );
-                            edge_t edge_desc_val = csc_out_edges.children_indx_begin(src_vert) + diff;
-
-                            return edge_desc_val;
-                        };
-
-                        inline In_Edges_iterator &operator++() {
-                            ++current;
-                            return *this;
-                        };
-
-                        inline In_Edges_iterator operator++(int) {
-                            In_Edges_iterator temp = *this;
-                            ++(*this);
-                            return temp;
-                        };
-
-                        inline In_Edges_iterator &operator--() {
-                            --current;
-                            return *this;
-                        };
-
-                        inline In_Edges_iterator operator--(int) {
-                            In_Edges_iterator temp = *this;
-                            --(*this);
-                            return temp;
-                        };
-
-                        inline bool operator==(const In_Edges_iterator &other) const { return current == other.current; };
-                        inline bool operator!=(const In_Edges_iterator &other) const { return !(*this == other); };
-
-                        inline bool operator<=(const In_Edges_iterator &other) const { return current <= other.current; };
-                        inline bool operator<(const In_Edges_iterator &other) const { return (*this <= other) && (*this != other); };
-                        inline bool operator>=(const In_Edges_iterator &other) const { return (!(*this <= other)) || (*this == other); };
-                        inline bool operator>(const In_Edges_iterator &other) const { return !(*this <= other); };
-                };
-
-            public:
-                In_Edges_range() = default;
-                In_Edges_range(const In_Edges_range &other) = default;
-                In_Edges_range(In_Edges_range &&other) = default;
-                In_Edges_range &operator=(const In_Edges_range &other) = default;
-                In_Edges_range &operator=(In_Edges_range &&other) = default;
-                virtual ~In_Edges_range() = default;
-                
-                In_Edges_range(const vertex_idx &tgt_vert_, const ThisT &graph, const typename BaseT::Compact_Children_Edges &csc_out_edges_) : tgt_vert(tgt_vert_), par_range(graph.parents(tgt_vert_)), csc_out_edges(csc_out_edges_) { };
-
-                inline auto cbegin() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cbegin()); };
-                inline auto cend() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cend()); };
-
-                inline auto begin() const { return cbegin(); };
-                inline auto end() const { return cend(); };
+template <bool keep_vertex_order,
+          bool use_work_weights = false,
+          bool use_comm_weights = false,
+          bool use_mem_weights = false,
+          bool use_edge_comm_weights = false,
+          bool use_vert_types = false,
+          typename vert_t = std::size_t,
+          typename edge_t = std::size_t,
+          typename work_weight_type = unsigned,
+          typename comm_weight_type = unsigned,
+          typename mem_weight_type = unsigned,
+          typename e_comm_weight_type = unsigned,
+          typename vertex_type_template_type = unsigned>
+class Compact_Sparse_Graph_EdgeDesc : public Compact_Sparse_Graph<keep_vertex_order,
+                                                                  use_work_weights,
+                                                                  use_comm_weights,
+                                                                  use_mem_weights,
+                                                                  use_vert_types,
+                                                                  vert_t,
+                                                                  edge_t,
+                                                                  work_weight_type,
+                                                                  comm_weight_type,
+                                                                  mem_weight_type,
+                                                                  vertex_type_template_type> {
+  private:
+    using ThisT = Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                use_work_weights,
+                                                use_comm_weights,
+                                                use_mem_weights,
+                                                use_edge_comm_weights,
+                                                use_vert_types,
+                                                vert_t,
+                                                edge_t,
+                                                work_weight_type,
+                                                comm_weight_type,
+                                                mem_weight_type,
+                                                e_comm_weight_type,
+                                                vertex_type_template_type>;
+    using BaseT = Compact_Sparse_Graph<keep_vertex_order,
+                                       use_work_weights,
+                                       use_comm_weights,
+                                       use_mem_weights,
+                                       use_vert_types,
+                                       vert_t,
+                                       edge_t,
+                                       work_weight_type,
+                                       comm_weight_type,
+                                       mem_weight_type,
+                                       vertex_type_template_type>;
+
+  public:
+    using vertex_idx = typename BaseT::vertex_idx;
+
+    using vertex_work_weight_type = typename BaseT::vertex_work_weight_type;
+    using vertex_comm_weight_type = typename BaseT::vertex_comm_weight_type;
+    using vertex_mem_weight_type = typename BaseT::vertex_mem_weight_type;
+    using vertex_type_type = typename BaseT::vertex_type_type;
+
+    using directed_edge_descriptor = edge_t;
+    using edge_comm_weight_type = e_comm_weight_type;
+
+  protected:
+    std::vector<edge_comm_weight_type> edge_comm_weights;
+
+    class In_Edges_range {
+      private:
+        const vertex_idx tgt_vert;
+        const typename BaseT::Compact_Parent_Edges::Parent_range par_range;
+        const typename BaseT::Compact_Children_Edges &csc_out_edges;
+
+        class In_Edges_iterator {
+          public:
+            using iterator_category = std::bidirectional_iterator_tag;
+            using difference_type = std::ptrdiff_t;
+            using value_type = edge_t;
+            using pointer = vertex_idx *;
+            using reference = edge_t &;
+
+          private:
+            const vertex_idx target_vert;
+            const typename BaseT::Compact_Children_Edges &csc_out_edges;
+
+            typename std::vector<vertex_idx>::const_iterator current;
+
+          public:
+            In_Edges_iterator(const vertex_idx &target_vert_,
+                              const typename BaseT::Compact_Children_Edges &csc_out_edges_,
+                              const typename std::vector<vertex_idx>::const_iterator start_)
+                : target_vert(target_vert_), csc_out_edges(csc_out_edges_), current(start_) {};
+            In_Edges_iterator(const In_Edges_iterator &other)
+                : target_vert(other.target_vert), csc_out_edges(other.csc_out_edges), current(other.current) {};
+
+            In_Edges_iterator &operator=(const In_Edges_iterator &other) {
+                if (this != &other) {
+                    target_vert = other.target_vert;
+                    csc_out_edges = other.csc_out_edges;
+                    current = other.current;
+                }
+                return *this;
+            };
+
+            inline value_type operator*() const {
+                const vertex_idx src_vert = *current;
+                typename BaseT::Compact_Children_Edges::Children_range range = csc_out_edges.children(src_vert);
+
+                assert(std::binary_search(range.cbegin(), range.cend(), target_vert));
+                auto it = std::lower_bound(range.cbegin(), range.cend(), target_vert);
+
+                edge_t diff = static_cast<edge_t>(std::distance(range.cbegin(), it));
+                edge_t edge_desc_val = csc_out_edges.children_indx_begin(src_vert) + diff;
+
+                return edge_desc_val;
+            };
+
+            inline In_Edges_iterator &operator++() {
+                ++current;
+                return *this;
+            };
+
+            inline In_Edges_iterator operator++(int) {
+                In_Edges_iterator temp = *this;
+                ++(*this);
+                return temp;
+            };
+
+            inline In_Edges_iterator &operator--() {
+                --current;
+                return *this;
+            };
+
+            inline In_Edges_iterator operator--(int) {
+                In_Edges_iterator temp = *this;
+                --(*this);
+                return temp;
+            };
+
+            inline bool operator==(const In_Edges_iterator &other) const { return current == other.current; };
+
+            inline bool operator!=(const In_Edges_iterator &other) const { return !(*this == other); };
+
+            inline bool operator<=(const In_Edges_iterator &other) const { return current <= other.current; };
+
+            inline bool operator<(const In_Edges_iterator &other) const { return (*this <= other) && (*this != other); };
+
+            inline bool operator>=(const In_Edges_iterator &other) const { return (!(*this <= other)) || (*this == other); };
 
+            inline bool operator>(const In_Edges_iterator &other) const { return !(*this <= other); };
         };
 
-    public:
-        Compact_Sparse_Graph_EdgeDesc() = default;
-        Compact_Sparse_Graph_EdgeDesc(const Compact_Sparse_Graph_EdgeDesc &other) = default;
-        Compact_Sparse_Graph_EdgeDesc(Compact_Sparse_Graph_EdgeDesc &&other) = default;
-        Compact_Sparse_Graph_EdgeDesc &operator=(const Compact_Sparse_Graph_EdgeDesc &other) = default;
-        Compact_Sparse_Graph_EdgeDesc &operator=(Compact_Sparse_Graph_EdgeDesc &&other) = default;
-        virtual ~Compact_Sparse_Graph_EdgeDesc() = default;
-
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges) : BaseT(num_vertices_, edges) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
-        }
+      public:
+        In_Edges_range() = default;
+        In_Edges_range(const In_Edges_range &other) = default;
+        In_Edges_range(In_Edges_range &&other) = default;
+        In_Edges_range &operator=(const In_Edges_range &other) = default;
+        In_Edges_range &operator=(In_Edges_range &&other) = default;
+        virtual ~In_Edges_range() = default;
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &ww) : BaseT(num_vertices_, edges, ww) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
-        }
+        In_Edges_range(const vertex_idx &tgt_vert_, const ThisT &graph, const typename BaseT::Compact_Children_Edges &csc_out_edges_)
+            : tgt_vert(tgt_vert_), par_range(graph.parents(tgt_vert_)), csc_out_edges(csc_out_edges_) {};
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &&ww) : BaseT(num_vertices_, edges, std::move(ww)) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
-        }
+        inline auto cbegin() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cbegin()); };
 
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, std::vector<vertex_work_weight_type> &ww, std::vector<vertex_comm_weight_type> &cw) : BaseT(num_vertices_, edges, ww, cw) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
+        inline auto cend() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cend()); };
+
+        inline auto begin() const { return cbegin(); };
+
+        inline auto end() const { return cend(); };
+    };
+
+  public:
+    Compact_Sparse_Graph_EdgeDesc() = default;
+    Compact_Sparse_Graph_EdgeDesc(const Compact_Sparse_Graph_EdgeDesc &other) = default;
+    Compact_Sparse_Graph_EdgeDesc(Compact_Sparse_Graph_EdgeDesc &&other) = default;
+    Compact_Sparse_Graph_EdgeDesc &operator=(const Compact_Sparse_Graph_EdgeDesc &other) = default;
+    Compact_Sparse_Graph_EdgeDesc &operator=(Compact_Sparse_Graph_EdgeDesc &&other) = default;
+    virtual ~Compact_Sparse_Graph_EdgeDesc() = default;
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type &edges) : BaseT(num_vertices_, edges) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
         }
-        
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, std::vector<vertex_work_weight_type> &&ww, std::vector<vertex_comm_weight_type> &&cw) : BaseT(num_vertices_, edges, std::move(ww), std::move(cw)) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_,
+                                  const edge_list_type &edges,
+                                  const std::vector<vertex_work_weight_type> &ww)
+        : BaseT(num_vertices_, edges, ww) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
         }
-
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &ww, const std::vector<vertex_comm_weight_type> &cw, const std::vector<vertex_mem_weight_type> &mw) : BaseT(num_vertices_, edges, ww, cw, mw) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_,
+                                  const edge_list_type &edges,
+                                  const std::vector<vertex_work_weight_type> &&ww)
+        : BaseT(num_vertices_, edges, std::move(ww)) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
         }
-        
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &&ww, const std::vector<vertex_comm_weight_type> &&cw, const std::vector<vertex_mem_weight_type> &&mw) : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw)) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_,
+                                  const edge_list_type &edges,
+                                  std::vector<vertex_work_weight_type> &ww,
+                                  std::vector<vertex_comm_weight_type> &cw)
+        : BaseT(num_vertices_, edges, ww, cw) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
         }
-
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &ww, const std::vector<vertex_comm_weight_type> &cw, const std::vector<vertex_mem_weight_type> &mw, const std::vector<vertex_type_type> &vt) : BaseT(num_vertices_, edges, ww, cw, mw, vt) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_,
+                                  const edge_list_type &edges,
+                                  std::vector<vertex_work_weight_type> &&ww,
+                                  std::vector<vertex_comm_weight_type> &&cw)
+        : BaseT(num_vertices_, edges, std::move(ww), std::move(cw)) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
         }
-        
-        template <typename edge_list_type>
-        Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector<vertex_work_weight_type> &&ww, const std::vector<vertex_comm_weight_type> &&cw, const std::vector<vertex_mem_weight_type> &&mw, const std::vector<vertex_type_type> &&vt) : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw), std::move(vt)) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_,
+                                  const edge_list_type &edges,
+                                  const std::vector<vertex_work_weight_type> &ww,
+                                  const std::vector<vertex_comm_weight_type> &cw,
+                                  const std::vector<vertex_mem_weight_type> &mw)
+        : BaseT(num_vertices_, edges, ww, cw, mw) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
+        }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_,
+                                  const edge_list_type &edges,
+                                  const std::vector<vertex_work_weight_type> &&ww,
+                                  const std::vector<vertex_comm_weight_type> &&cw,
+                                  const std::vector<vertex_mem_weight_type> &&mw)
+        : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw)) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
+        }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_,
+                                  const edge_list_type &edges,
+                                  const std::vector<vertex_work_weight_type> &ww,
+                                  const std::vector<vertex_comm_weight_type> &cw,
+                                  const std::vector<vertex_mem_weight_type> &mw,
+                                  const std::vector<vertex_type_type> &vt)
+        : BaseT(num_vertices_, edges, ww, cw, mw, vt) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
         }
+    }
+
+    template <typename edge_list_type>
+    Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_,
+                                  const edge_list_type &edges,
+                                  const std::vector<vertex_work_weight_type> &&ww,
+                                  const std::vector<vertex_comm_weight_type> &&cw,
+                                  const std::vector<vertex_mem_weight_type> &&mw,
+                                  const std::vector<vertex_type_type> &&vt)
+        : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw), std::move(vt)) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
+        }
+    }
 
-        template <typename Graph_type>
-        Compact_Sparse_Graph_EdgeDesc(const Graph_type  & graph) : BaseT(graph) {
-            if constexpr (use_edge_comm_weights) {
-                edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
-            }
+    template <typename Graph_type>
+    Compact_Sparse_Graph_EdgeDesc(const Graph_type &graph) : BaseT(graph) {
+        if constexpr (use_edge_comm_weights) {
+            edge_comm_weights = std::vector<edge_comm_weight_type>(BaseT::num_edges(), static_cast<edge_comm_weight_type>(0));
+        }
 
-            if constexpr (has_edge_weights_v<Graph_type> && use_edge_comm_weights) {
-                for (const auto& edge : edges(graph)) {
-                    const auto src = source(edge, graph);
-                    const auto tgt = target(edge, graph);
-                    set_edge_comm_weight(src, tgt, graph.edge_comm_weight(edge));
-                }
+        if constexpr (has_edge_weights_v<Graph_type> && use_edge_comm_weights) {
+            for (const auto &edge : edges(graph)) {
+                const auto src = source(edge, graph);
+                const auto tgt = target(edge, graph);
+                set_edge_comm_weight(src, tgt, graph.edge_comm_weight(edge));
             }
         }
+    }
 
+    inline auto edges() const { return integral_range<directed_edge_descriptor>(BaseT::number_of_edges); };
 
+    inline directed_edge_descriptor edge(const vertex_idx &src, const vertex_idx &tgt) const {
+        typename BaseT::Compact_Children_Edges::Children_range range = BaseT::csc_out_edges.children(src);
 
-        inline auto edges() const { return integral_range<directed_edge_descriptor>(BaseT::number_of_edges); };
-        inline directed_edge_descriptor edge(const vertex_idx &src, const vertex_idx &tgt) const {
-            typename BaseT::Compact_Children_Edges::Children_range range = BaseT::csc_out_edges.children(src);
+        assert(std::binary_search(range.cbegin(), range.cend(), tgt));
+        auto it = std::lower_bound(range.cbegin(), range.cend(), tgt);
 
-            assert(std::binary_search(range.cbegin(), range.cend(), tgt));
-            auto it = std::lower_bound(range.cbegin(), range.cend(), tgt);
-            
-            directed_edge_descriptor diff = static_cast<directed_edge_descriptor>( std::distance(range.cbegin(), it) );
-            directed_edge_descriptor edge_desc_val = BaseT::csc_out_edges.children_indx_begin(src) + diff;
+        directed_edge_descriptor diff = static_cast<directed_edge_descriptor>(std::distance(range.cbegin(), it));
+        directed_edge_descriptor edge_desc_val = BaseT::csc_out_edges.children_indx_begin(src) + diff;
 
-            return edge_desc_val;
-        };
-        
-        inline vertex_idx source(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.source(edge); };
-        inline vertex_idx target(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.target(edge); };
-        
-        inline auto out_edges(const vertex_idx &vert) const { return integral_range<directed_edge_descriptor>(BaseT::csc_out_edges.children_indx_begin(vert), BaseT::csc_out_edges.children_indx_begin(vert + 1)); };
-        inline auto in_edges(const vertex_idx &vert) const { return In_Edges_range(vert, *this, BaseT::csc_out_edges); };
-
-        template<typename RetT = edge_comm_weight_type>
-        inline std::enable_if_t<use_edge_comm_weights, RetT> edge_comm_weight(const directed_edge_descriptor &edge) const {
-            return edge_comm_weights[edge];
-        }
-        template<typename RetT = edge_comm_weight_type>
-        inline std::enable_if_t<not use_edge_comm_weights, RetT> edge_comm_weight(const directed_edge_descriptor &edge) const {
-            return static_cast<RetT>(1);
-        }
+        return edge_desc_val;
+    };
 
-        template<typename RetT = void>
-        inline std::enable_if_t<use_edge_comm_weights, RetT> set_edge_comm_weight(const vertex_idx &src, const vertex_idx &tgt, const edge_comm_weight_type e_comm_weight) {
-            if constexpr (keep_vertex_order) {
-                edge_comm_weights[edge(src, tgt)] = e_comm_weight;
-            } else {
-                const vertex_idx internal_src = BaseT::vertex_permutation_from_original_to_internal[src];
-                const vertex_idx internal_tgt = BaseT::vertex_permutation_from_original_to_internal[tgt];
-                edge_comm_weights[edge(internal_src, internal_tgt)] = e_comm_weight;
-            }
-        }
-        template<typename RetT = void>
-        inline std::enable_if_t<not use_edge_comm_weights, RetT> set_edge_comm_weight(const vertex_idx &src, const vertex_idx &tgt, const edge_comm_weight_type e_comm_weight) {
-            static_assert(use_edge_comm_weights, "To set edge communication weight, graph type must allow edge communication weights.");
+    inline vertex_idx source(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.source(edge); };
+
+    inline vertex_idx target(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.target(edge); };
+
+    inline auto out_edges(const vertex_idx &vert) const {
+        return integral_range<directed_edge_descriptor>(BaseT::csc_out_edges.children_indx_begin(vert),
+                                                        BaseT::csc_out_edges.children_indx_begin(vert + 1));
+    };
+
+    inline auto in_edges(const vertex_idx &vert) const { return In_Edges_range(vert, *this, BaseT::csc_out_edges); };
+
+    template <typename RetT = edge_comm_weight_type>
+    inline std::enable_if_t<use_edge_comm_weights, RetT> edge_comm_weight(const directed_edge_descriptor &edge) const {
+        return edge_comm_weights[edge];
+    }
+
+    template <typename RetT = edge_comm_weight_type>
+    inline std::enable_if_t<not use_edge_comm_weights, RetT> edge_comm_weight(const directed_edge_descriptor &edge) const {
+        return static_cast<RetT>(1);
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<use_edge_comm_weights, RetT> set_edge_comm_weight(const vertex_idx &src,
+                                                                              const vertex_idx &tgt,
+                                                                              const edge_comm_weight_type e_comm_weight) {
+        if constexpr (keep_vertex_order) {
+            edge_comm_weights[edge(src, tgt)] = e_comm_weight;
+        } else {
+            const vertex_idx internal_src = BaseT::vertex_permutation_from_original_to_internal[src];
+            const vertex_idx internal_tgt = BaseT::vertex_permutation_from_original_to_internal[tgt];
+            edge_comm_weights[edge(internal_src, internal_tgt)] = e_comm_weight;
         }
+    }
+
+    template <typename RetT = void>
+    inline std::enable_if_t<not use_edge_comm_weights, RetT> set_edge_comm_weight(const vertex_idx &src,
+                                                                                  const vertex_idx &tgt,
+                                                                                  const edge_comm_weight_type e_comm_weight) {
+        static_assert(use_edge_comm_weights, "To set edge communication weight, graph type must allow edge communication weights.");
+    }
 };
 
-
-template<bool keep_vertex_order, bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_edge_comm_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename e_comm_weight_type, typename vertex_type_template_type>
-inline auto edges(const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type> &graph) {
+template <bool keep_vertex_order,
+          bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_edge_comm_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename e_comm_weight_type,
+          typename vertex_type_template_type>
+inline auto edges(const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                      use_work_weights,
+                                                      use_comm_weights,
+                                                      use_mem_weights,
+                                                      use_edge_comm_weights,
+                                                      use_vert_types,
+                                                      vert_t,
+                                                      edge_t,
+                                                      work_weight_type,
+                                                      comm_weight_type,
+                                                      mem_weight_type,
+                                                      e_comm_weight_type,
+                                                      vertex_type_template_type> &graph) {
     return graph.edges();
 }
 
-template<bool keep_vertex_order, bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_edge_comm_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename e_comm_weight_type, typename vertex_type_template_type>
-inline auto out_edges(vertex_idx_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>> v,
-                      const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type> &graph) {
+template <bool keep_vertex_order,
+          bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_edge_comm_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename e_comm_weight_type,
+          typename vertex_type_template_type>
+inline auto out_edges(vertex_idx_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                                 use_work_weights,
+                                                                 use_comm_weights,
+                                                                 use_mem_weights,
+                                                                 use_edge_comm_weights,
+                                                                 use_vert_types,
+                                                                 vert_t,
+                                                                 edge_t,
+                                                                 work_weight_type,
+                                                                 comm_weight_type,
+                                                                 mem_weight_type,
+                                                                 e_comm_weight_type,
+                                                                 vertex_type_template_type>> v,
+                      const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                          use_work_weights,
+                                                          use_comm_weights,
+                                                          use_mem_weights,
+                                                          use_edge_comm_weights,
+                                                          use_vert_types,
+                                                          vert_t,
+                                                          edge_t,
+                                                          work_weight_type,
+                                                          comm_weight_type,
+                                                          mem_weight_type,
+                                                          e_comm_weight_type,
+                                                          vertex_type_template_type> &graph) {
     return graph.out_edges(v);
 }
 
-template<bool keep_vertex_order, bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_edge_comm_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename e_comm_weight_type, typename vertex_type_template_type>
-inline auto in_edges(vertex_idx_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>> v,
-                     const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type> &graph) {
+template <bool keep_vertex_order,
+          bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_edge_comm_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename e_comm_weight_type,
+          typename vertex_type_template_type>
+inline auto in_edges(vertex_idx_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                                use_work_weights,
+                                                                use_comm_weights,
+                                                                use_mem_weights,
+                                                                use_edge_comm_weights,
+                                                                use_vert_types,
+                                                                vert_t,
+                                                                edge_t,
+                                                                work_weight_type,
+                                                                comm_weight_type,
+                                                                mem_weight_type,
+                                                                e_comm_weight_type,
+                                                                vertex_type_template_type>> v,
+                     const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                         use_work_weights,
+                                                         use_comm_weights,
+                                                         use_mem_weights,
+                                                         use_edge_comm_weights,
+                                                         use_vert_types,
+                                                         vert_t,
+                                                         edge_t,
+                                                         work_weight_type,
+                                                         comm_weight_type,
+                                                         mem_weight_type,
+                                                         e_comm_weight_type,
+                                                         vertex_type_template_type> &graph) {
     return graph.in_edges(v);
 }
 
-template<bool keep_vertex_order, bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_edge_comm_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename e_comm_weight_type, typename vertex_type_template_type>
-inline vertex_idx_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>> source(const edge_desc_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>> &edge, const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type> &graph) {
+template <bool keep_vertex_order,
+          bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_edge_comm_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename e_comm_weight_type,
+          typename vertex_type_template_type>
+inline vertex_idx_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                  use_work_weights,
+                                                  use_comm_weights,
+                                                  use_mem_weights,
+                                                  use_edge_comm_weights,
+                                                  use_vert_types,
+                                                  vert_t,
+                                                  edge_t,
+                                                  work_weight_type,
+                                                  comm_weight_type,
+                                                  mem_weight_type,
+                                                  e_comm_weight_type,
+                                                  vertex_type_template_type>>
+source(const edge_desc_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                       use_work_weights,
+                                                       use_comm_weights,
+                                                       use_mem_weights,
+                                                       use_edge_comm_weights,
+                                                       use_vert_types,
+                                                       vert_t,
+                                                       edge_t,
+                                                       work_weight_type,
+                                                       comm_weight_type,
+                                                       mem_weight_type,
+                                                       e_comm_weight_type,
+                                                       vertex_type_template_type>> &edge,
+       const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                           use_work_weights,
+                                           use_comm_weights,
+                                           use_mem_weights,
+                                           use_edge_comm_weights,
+                                           use_vert_types,
+                                           vert_t,
+                                           edge_t,
+                                           work_weight_type,
+                                           comm_weight_type,
+                                           mem_weight_type,
+                                           e_comm_weight_type,
+                                           vertex_type_template_type> &graph) {
     return graph.source(edge);
 }
 
-template<bool keep_vertex_order, bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_edge_comm_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename e_comm_weight_type, typename vertex_type_template_type>
-inline vertex_idx_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>> target(const edge_desc_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>> &edge, const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type> &graph) {
+template <bool keep_vertex_order,
+          bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_edge_comm_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename e_comm_weight_type,
+          typename vertex_type_template_type>
+inline vertex_idx_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                  use_work_weights,
+                                                  use_comm_weights,
+                                                  use_mem_weights,
+                                                  use_edge_comm_weights,
+                                                  use_vert_types,
+                                                  vert_t,
+                                                  edge_t,
+                                                  work_weight_type,
+                                                  comm_weight_type,
+                                                  mem_weight_type,
+                                                  e_comm_weight_type,
+                                                  vertex_type_template_type>>
+target(const edge_desc_t<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                       use_work_weights,
+                                                       use_comm_weights,
+                                                       use_mem_weights,
+                                                       use_edge_comm_weights,
+                                                       use_vert_types,
+                                                       vert_t,
+                                                       edge_t,
+                                                       work_weight_type,
+                                                       comm_weight_type,
+                                                       mem_weight_type,
+                                                       e_comm_weight_type,
+                                                       vertex_type_template_type>> &edge,
+       const Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                           use_work_weights,
+                                           use_comm_weights,
+                                           use_mem_weights,
+                                           use_edge_comm_weights,
+                                           use_vert_types,
+                                           vert_t,
+                                           edge_t,
+                                           work_weight_type,
+                                           comm_weight_type,
+                                           mem_weight_type,
+                                           e_comm_weight_type,
+                                           vertex_type_template_type> &graph) {
     return graph.target(edge);
 }
 
-
-template<bool keep_vertex_order, bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_edge_comm_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename e_comm_weight_type, typename vertex_type_template_type>
-struct is_Compact_Sparse_Graph<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>, void> : std::true_type {};
-
-template<bool use_work_weights, bool use_comm_weights, bool use_mem_weights, bool use_edge_comm_weights, bool use_vert_types, typename vert_t, typename edge_t, typename work_weight_type, typename comm_weight_type, typename mem_weight_type, typename e_comm_weight_type, typename vertex_type_template_type>
-struct is_Compact_Sparse_Graph_reorder<Compact_Sparse_Graph_EdgeDesc<false, use_work_weights, use_comm_weights, use_mem_weights, use_edge_comm_weights, use_vert_types, vert_t, edge_t, work_weight_type, comm_weight_type, mem_weight_type, e_comm_weight_type, vertex_type_template_type>, void> : std::true_type {};
-
-
-
-
-
-
-
-
+template <bool keep_vertex_order,
+          bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_edge_comm_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename e_comm_weight_type,
+          typename vertex_type_template_type>
+struct is_Compact_Sparse_Graph<Compact_Sparse_Graph_EdgeDesc<keep_vertex_order,
+                                                             use_work_weights,
+                                                             use_comm_weights,
+                                                             use_mem_weights,
+                                                             use_edge_comm_weights,
+                                                             use_vert_types,
+                                                             vert_t,
+                                                             edge_t,
+                                                             work_weight_type,
+                                                             comm_weight_type,
+                                                             mem_weight_type,
+                                                             e_comm_weight_type,
+                                                             vertex_type_template_type>,
+                               void> : std::true_type {};
+
+template <bool use_work_weights,
+          bool use_comm_weights,
+          bool use_mem_weights,
+          bool use_edge_comm_weights,
+          bool use_vert_types,
+          typename vert_t,
+          typename edge_t,
+          typename work_weight_type,
+          typename comm_weight_type,
+          typename mem_weight_type,
+          typename e_comm_weight_type,
+          typename vertex_type_template_type>
+struct is_Compact_Sparse_Graph_reorder<Compact_Sparse_Graph_EdgeDesc<false,
+                                                                     use_work_weights,
+                                                                     use_comm_weights,
+                                                                     use_mem_weights,
+                                                                     use_edge_comm_weights,
+                                                                     use_vert_types,
+                                                                     vert_t,
+                                                                     edge_t,
+                                                                     work_weight_type,
+                                                                     comm_weight_type,
+                                                                     mem_weight_type,
+                                                                     e_comm_weight_type,
+                                                                     vertex_type_template_type>,
+                                       void> : std::true_type {};
 
 static_assert(is_Compact_Sparse_Graph_v<Compact_Sparse_Graph_EdgeDesc<true>>);
 static_assert(is_Compact_Sparse_Graph_v<Compact_Sparse_Graph_EdgeDesc<false>>);
 static_assert(!is_Compact_Sparse_Graph_reorder_v<Compact_Sparse_Graph_EdgeDesc<true>>);
 static_assert(is_Compact_Sparse_Graph_reorder_v<Compact_Sparse_Graph_EdgeDesc<false>>);
 
+static_assert(has_vertex_weights_v<Compact_Sparse_Graph_EdgeDesc<true, true>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept");
 
-static_assert(has_vertex_weights_v<Compact_Sparse_Graph_EdgeDesc<true, true>>, 
-            "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept");
+static_assert(has_vertex_weights_v<Compact_Sparse_Graph_EdgeDesc<false, true>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept");
 
-static_assert(has_vertex_weights_v<Compact_Sparse_Graph_EdgeDesc<false, true>>, 
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept");
+static_assert(is_directed_graph_v<Compact_Sparse_Graph_EdgeDesc<false, false, false, false, false>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept");
 
-static_assert(is_directed_graph_v<Compact_Sparse_Graph_EdgeDesc<false, false, false, false, false>>, 
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept");
+static_assert(is_directed_graph_v<Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept");
 
-static_assert(is_directed_graph_v<Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true>>, 
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept");
+static_assert(is_directed_graph_v<Compact_Sparse_Graph_EdgeDesc<true, false, false, false, false>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept");
 
-static_assert(is_directed_graph_v<Compact_Sparse_Graph_EdgeDesc<true, false, false, false, false>>, 
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept");
+static_assert(is_directed_graph_v<Compact_Sparse_Graph_EdgeDesc<true, true, true, true, true>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept");
 
-static_assert(is_directed_graph_v<Compact_Sparse_Graph_EdgeDesc<true, true, true, true, true>>, 
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept");
+static_assert(is_computational_dag_v<Compact_Sparse_Graph_EdgeDesc<false, true, true, true, false>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept");
 
-static_assert(is_computational_dag_v<Compact_Sparse_Graph_EdgeDesc<false, true, true, true, false>>, 
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept");
-
-static_assert(is_computational_dag_v<Compact_Sparse_Graph_EdgeDesc<true, true, true, true, false>>, 
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept");
+static_assert(is_computational_dag_v<Compact_Sparse_Graph_EdgeDesc<true, true, true, true, false>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept");
 
 static_assert(is_computational_dag_typed_vertices_v<Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true, true>>,
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept");
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept");
 
 static_assert(is_computational_dag_typed_vertices_v<Compact_Sparse_Graph_EdgeDesc<true, true, true, true, true, true>>,
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept");
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept");
 
 static_assert(is_directed_graph_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<true>>,
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept.");
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept.");
 
 static_assert(is_directed_graph_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<false>>,
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept.");
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept.");
 
-static_assert(is_computational_dag_typed_vertices_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true, true>>,
+static_assert(
+    is_computational_dag_typed_vertices_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true, true>>,
     "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computational_dag_typed_vertices_edge_desc_v with types concept");
 
-static_assert(is_computational_dag_typed_vertices_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<true, true, true, true, true, true>>,
+static_assert(
+    is_computational_dag_typed_vertices_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<true, true, true, true, true, true>>,
     "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computational_dag_typed_vertices_edge_desc_v with types concept");
 
 static_assert(has_edge_weights_v<Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true, true>>,
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept");
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept");
 
 static_assert(has_edge_weights_v<Compact_Sparse_Graph_EdgeDesc<true, true, true, true, true, true>>,
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept");
-
-static_assert(has_hashable_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<true, true>>, 
-            "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept");
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept");
 
-static_assert(has_hashable_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<false, true>>, 
-    "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept");
+static_assert(has_hashable_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<true, true>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept");
 
-using CSGE = Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true, true, std::size_t, std::size_t, unsigned, unsigned, unsigned, unsigned, unsigned>;
+static_assert(has_hashable_edge_desc_v<Compact_Sparse_Graph_EdgeDesc<false, true>>,
+              "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept");
 
+using CSGE
+    = Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true, true, std::size_t, std::size_t, unsigned, unsigned, unsigned, unsigned, unsigned>;
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp
index 5a439664..f4445622 100644
--- a/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp
@@ -17,18 +17,18 @@ limitations under the License.
 */
 #pragma once
 
-#include "osp/auxiliary/hash_util.hpp"
+#include <vector>
+
 #include "cdag_vertex_impl.hpp"
 #include "edge_iterator.hpp"
-#include "osp/graph_implementations/integral_range.hpp"
+#include "osp/auxiliary/hash_util.hpp"
 #include "osp/graph_algorithms/computational_dag_construction_util.hpp"
-#include <vector>
+#include "osp/graph_implementations/integral_range.hpp"
 
 namespace osp {
 
-template<typename v_impl>
+template <typename v_impl>
 struct directed_edge_descriptor_impl {
-
     using vertex_idx = typename v_impl::vertex_idx_type;
 
     vertex_idx idx;
@@ -37,12 +37,15 @@ struct directed_edge_descriptor_impl {
     vertex_idx target;
 
     directed_edge_descriptor_impl() : idx(0), source(0), target(0) {}
+
     directed_edge_descriptor_impl(const directed_edge_descriptor_impl<v_impl> &other) = default;
     directed_edge_descriptor_impl(directed_edge_descriptor_impl<v_impl> &&other) = default;
     directed_edge_descriptor_impl &operator=(const directed_edge_descriptor_impl<v_impl> &other) = default;
     directed_edge_descriptor_impl &operator=(directed_edge_descriptor_impl<v_impl> &&other) = default;
+
     directed_edge_descriptor_impl(vertex_idx source_arg, vertex_idx target_arg, vertex_idx idx_arg)
         : idx(idx_arg), source(source_arg), target(target_arg) {}
+
     ~directed_edge_descriptor_impl() = default;
 
     bool operator==(const directed_edge_descriptor_impl<v_impl> &other) const {
@@ -52,19 +55,19 @@ struct directed_edge_descriptor_impl {
     bool operator!=(const directed_edge_descriptor_impl<v_impl> &other) const { return !(*this == other); }
 };
 
-template<typename edge_comm_weight_t>
+template <typename edge_comm_weight_t>
 struct cdag_edge_impl {
-
     using cdag_edge_comm_weight_type = edge_comm_weight_t;
 
     cdag_edge_impl(edge_comm_weight_t comm_weight_arg = 1) : comm_weight(comm_weight_arg) {}
+
     edge_comm_weight_t comm_weight;
 };
 
 using cdag_edge_impl_int = cdag_edge_impl<int>;
 using cdag_edge_impl_unsigned = cdag_edge_impl<unsigned>;
 
-template<typename v_impl, typename e_impl>
+template <typename v_impl, typename e_impl>
 class computational_dag_edge_idx_vector_impl {
   public:
     // graph_traits specialization
@@ -119,9 +122,8 @@ class computational_dag_edge_idx_vector_impl {
 
     computational_dag_edge_idx_vector_impl(const computational_dag_edge_idx_vector_impl &other) = default;
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     computational_dag_edge_idx_vector_impl(const Graph_t &other) {
-
         static_assert(is_computational_dag_v<Graph_t>, "Graph_t must satisfy the is_computation_dag concept");
 
         constructComputationalDag(other, *this);
@@ -130,10 +132,11 @@ class computational_dag_edge_idx_vector_impl {
     computational_dag_edge_idx_vector_impl &operator=(const computational_dag_edge_idx_vector_impl &other) = default;
 
     computational_dag_edge_idx_vector_impl(computational_dag_edge_idx_vector_impl &&other)
-        : vertices_(std::move(other.vertices_)), edges_(std::move(other.edges_)),
-          num_vertex_types_(other.num_vertex_types_), out_edges_(std::move(other.out_edges_)),
+        : vertices_(std::move(other.vertices_)),
+          edges_(std::move(other.edges_)),
+          num_vertex_types_(other.num_vertex_types_),
+          out_edges_(std::move(other.out_edges_)),
           in_edges_(std::move(other.in_edges_)) {
-
         other.num_vertex_types_ = 0;
     }
 
@@ -152,38 +155,45 @@ class computational_dag_edge_idx_vector_impl {
     virtual ~computational_dag_edge_idx_vector_impl() = default;
 
     inline vertex_idx num_edges() const { return static_cast<vertex_idx>(edges_.size()); }
+
     inline vertex_idx num_vertices() const { return static_cast<vertex_idx>(vertices_.size()); }
 
     inline auto edges() const { return edge_range_vector_impl<ThisT>(*this); }
 
     inline auto parents(vertex_idx v) const { return edge_source_range(in_edges_[v], *this); }
+
     inline auto children(vertex_idx v) const { return edge_target_range(out_edges_[v], *this); }
 
     inline auto vertices() const { return integral_range<vertex_idx>(static_cast<vertex_idx>(vertices_.size())); }
 
     inline const std::vector<directed_edge_descriptor> &in_edges(vertex_idx v) const { return in_edges_[v]; }
+
     inline const std::vector<directed_edge_descriptor> &out_edges(vertex_idx v) const { return out_edges_[v]; }
 
     inline vertex_idx in_degree(vertex_idx v) const { return static_cast<vertex_idx>(in_edges_[v].size()); }
+
     inline vertex_idx out_degree(vertex_idx v) const { return static_cast<vertex_idx>(out_edges_[v].size()); }
 
-    inline edge_comm_weight_type edge_comm_weight(directed_edge_descriptor e) const {
-        return edges_[e.idx].comm_weight;
-    }
+    inline edge_comm_weight_type edge_comm_weight(directed_edge_descriptor e) const { return edges_[e.idx].comm_weight; }
 
     inline vertex_work_weight_type vertex_work_weight(vertex_idx v) const { return vertices_[v].work_weight; }
+
     inline vertex_comm_weight_type vertex_comm_weight(vertex_idx v) const { return vertices_[v].comm_weight; }
+
     inline vertex_mem_weight_type vertex_mem_weight(vertex_idx v) const { return vertices_[v].mem_weight; }
 
     inline unsigned num_vertex_types() const { return num_vertex_types_; }
+
     inline vertex_type_type vertex_type(vertex_idx v) const { return vertices_[v].vertex_type; }
 
     inline vertex_idx source(const directed_edge_descriptor &e) const { return e.source; }
-    inline vertex_idx target(const directed_edge_descriptor &e) const { return e.target; }
 
-    vertex_idx add_vertex(vertex_work_weight_type work_weight, vertex_comm_weight_type comm_weight,
-                          vertex_mem_weight_type mem_weight, vertex_type_type vertex_type = 0) {
+    inline vertex_idx target(const directed_edge_descriptor &e) const { return e.target; }
 
+    vertex_idx add_vertex(vertex_work_weight_type work_weight,
+                          vertex_comm_weight_type comm_weight,
+                          vertex_mem_weight_type mem_weight,
+                          vertex_type_type vertex_type = 0) {
         vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type);
 
         out_edges_.push_back({});
@@ -194,9 +204,7 @@ class computational_dag_edge_idx_vector_impl {
         return vertices_.back().id;
     }
 
-    std::pair<directed_edge_descriptor, bool> add_edge(vertex_idx source, vertex_idx target,
-                                                       edge_comm_weight_type comm_weight = 1) {
-
+    std::pair<directed_edge_descriptor, bool> add_edge(vertex_idx source, vertex_idx target, edge_comm_weight_type comm_weight = 1) {
         if (source == target) {
             return {directed_edge_descriptor{}, false};
         }
@@ -222,12 +230,13 @@ class computational_dag_edge_idx_vector_impl {
     inline void set_vertex_work_weight(vertex_idx v, vertex_work_weight_type work_weight) {
         vertices_[v].work_weight = work_weight;
     }
+
     inline void set_vertex_comm_weight(vertex_idx v, vertex_comm_weight_type comm_weight) {
         vertices_[v].comm_weight = comm_weight;
     }
-    inline void set_vertex_mem_weight(vertex_idx v, vertex_mem_weight_type mem_weight) {
-        vertices_[v].mem_weight = mem_weight;
-    }
+
+    inline void set_vertex_mem_weight(vertex_idx v, vertex_mem_weight_type mem_weight) { vertices_[v].mem_weight = mem_weight; }
+
     inline void set_vertex_type(vertex_idx v, vertex_type_type vertex_type) {
         vertices_[v].vertex_type = vertex_type;
         num_vertex_types_ = std::max(num_vertex_types_, vertex_type + 1);
@@ -238,58 +247,59 @@ class computational_dag_edge_idx_vector_impl {
     }
 
     inline const v_impl &get_vertex_impl(vertex_idx v) const { return vertices_[v]; }
+
     inline const e_impl &get_edge_impl(directed_edge_descriptor e) const { return edges_[e.idx]; }
 };
 
-template<typename v_impl, typename e_impl>
+template <typename v_impl, typename e_impl>
 inline auto edges(const computational_dag_edge_idx_vector_impl<v_impl, e_impl> &graph) {
     return graph.edges();
 }
 
-template<typename v_impl, typename e_impl>
+template <typename v_impl, typename e_impl>
 inline auto out_edges(vertex_idx_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> v,
                       const computational_dag_edge_idx_vector_impl<v_impl, e_impl> &graph) {
     return graph.out_edges(v);
 }
 
-template<typename v_impl, typename e_impl>
+template <typename v_impl, typename e_impl>
 inline auto in_edges(vertex_idx_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> v,
                      const computational_dag_edge_idx_vector_impl<v_impl, e_impl> &graph) {
     return graph.in_edges(v);
 }
 
-
 // default implementation to get the source of an edge
-template<typename v_impl, typename e_impl>
-inline vertex_idx_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> source(const edge_desc_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> &edge, const computational_dag_edge_idx_vector_impl<v_impl, e_impl> &graph) {
+template <typename v_impl, typename e_impl>
+inline vertex_idx_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> source(
+    const edge_desc_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> &edge,
+    const computational_dag_edge_idx_vector_impl<v_impl, e_impl> &graph) {
     return graph.source(edge);
 }
 
 // default implementation to get the target of an edge
-template<typename v_impl, typename e_impl>
-inline vertex_idx_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> target(const edge_desc_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> &edge, const computational_dag_edge_idx_vector_impl<v_impl, e_impl> &graph) {
+template <typename v_impl, typename e_impl>
+inline vertex_idx_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> target(
+    const edge_desc_t<computational_dag_edge_idx_vector_impl<v_impl, e_impl>> &edge,
+    const computational_dag_edge_idx_vector_impl<v_impl, e_impl> &graph) {
     return graph.target(edge);
 }
 
-
 // default template specialization
-using computational_dag_edge_idx_vector_impl_def_t =
-    computational_dag_edge_idx_vector_impl<cdag_vertex_impl_unsigned, cdag_edge_impl_unsigned>;
-
-using computational_dag_edge_idx_vector_impl_def_int_t =
-    computational_dag_edge_idx_vector_impl<cdag_vertex_impl_int, cdag_edge_impl_int>;
+using computational_dag_edge_idx_vector_impl_def_t
+    = computational_dag_edge_idx_vector_impl<cdag_vertex_impl_unsigned, cdag_edge_impl_unsigned>;
 
+using computational_dag_edge_idx_vector_impl_def_int_t
+    = computational_dag_edge_idx_vector_impl<cdag_vertex_impl_int, cdag_edge_impl_int>;
 
 static_assert(is_directed_graph_edge_desc_v<computational_dag_edge_idx_vector_impl_def_t>,
               "computational_dag_edge_idx_vector_impl must satisfy the directed_graph_edge_desc concept");
 
-static_assert(
-    is_computational_dag_typed_vertices_edge_desc_v<computational_dag_edge_idx_vector_impl_def_t>,
-    "computational_dag_edge_idx_vector_impl must satisfy the computation_dag_typed_vertices_edge_desc concept");
+static_assert(is_computational_dag_typed_vertices_edge_desc_v<computational_dag_edge_idx_vector_impl_def_t>,
+              "computational_dag_edge_idx_vector_impl must satisfy the computation_dag_typed_vertices_edge_desc concept");
 
-} // namespace osp
+}    // namespace osp
 
-template<typename v_impl>
+template <typename v_impl>
 struct std::hash<osp::directed_edge_descriptor_impl<v_impl>> {
     using vertex_idx = typename v_impl::vertex_idx_type;
 
@@ -299,4 +309,4 @@ struct std::hash<osp::directed_edge_descriptor_impl<v_impl>> {
 
         return h1;
     }
-};
\ No newline at end of file
+};
diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
index 0a1b676a..b6621760 100644
--- a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
@@ -17,14 +17,14 @@ limitations under the License.
 */
 #pragma once
 
+#include <algorithm>
+#include <vector>
+
 #include "cdag_vertex_impl.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 #include "osp/graph_algorithms/computational_dag_construction_util.hpp"
 #include "osp/graph_implementations/integral_range.hpp"
-#include <vector>
-
-#include <algorithm>
 
 namespace osp {
 
@@ -54,11 +54,12 @@ namespace osp {
  *   - `mem_weight`: Of type `mem_weight_type`.
  *   - `vertex_type`: Of type `cdag_vertex_type_type`.
  * - It must be constructible with the signature:
- *   `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight, cdag_vertex_type_type vertex_type)`
+ *   `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight,
+ * cdag_vertex_type_type vertex_type)`
  *
  * @see cdag_vertex_impl for a reference implementation of the vertex type.
  */
-template<typename v_impl>
+template <typename v_impl>
 class computational_dag_vector_impl {
   public:
     using vertex_idx = typename v_impl::vertex_idx_type;
@@ -76,8 +77,7 @@ class computational_dag_vector_impl {
      * @param num_vertices The number of vertices to initialize.
      */
     explicit computational_dag_vector_impl(const vertex_idx num_vertices)
-        : vertices_(num_vertices), out_neigbors(num_vertices), in_neigbors(num_vertices), num_edges_(0),
-          num_vertex_types_(0) {
+        : vertices_(num_vertices), out_neigbors(num_vertices), in_neigbors(num_vertices), num_edges_(0), num_vertex_types_(0) {
         for (vertex_idx i = 0; i < num_vertices; ++i) {
             vertices_[i].id = i;
         }
@@ -95,17 +95,18 @@ class computational_dag_vector_impl {
      * @tparam Graph_t The type of the source graph. Must satisfy `is_computational_dag_v`.
      * @param other The source graph to copy from.
      */
-    template<typename Graph_t>
+    template <typename Graph_t>
     explicit computational_dag_vector_impl(const Graph_t &other) {
         static_assert(is_computational_dag_v<Graph_t>, "Graph_t must satisfy the is_computation_dag concept");
         constructComputationalDag(other, *this);
     }
 
     computational_dag_vector_impl(computational_dag_vector_impl &&other) noexcept
-        : vertices_(std::move(other.vertices_)), out_neigbors(std::move(other.out_neigbors)),
-          in_neigbors(std::move(other.in_neigbors)), num_edges_(other.num_edges_),
+        : vertices_(std::move(other.vertices_)),
+          out_neigbors(std::move(other.out_neigbors)),
+          in_neigbors(std::move(other.in_neigbors)),
+          num_edges_(other.num_edges_),
           num_vertex_types_(other.num_vertex_types_) {
-
         other.num_edges_ = 0;
         other.num_vertex_types_ = 0;
     };
@@ -191,8 +192,10 @@ class computational_dag_vector_impl {
      * @param vertex_type Type of the vertex.
      * @return The index of the newly added vertex.
      */
-    vertex_idx add_vertex(const vertex_work_weight_type work_weight, const vertex_comm_weight_type comm_weight,
-                          const vertex_mem_weight_type mem_weight, const vertex_type_type vertex_type = 0) {
+    vertex_idx add_vertex(const vertex_work_weight_type work_weight,
+                          const vertex_comm_weight_type comm_weight,
+                          const vertex_mem_weight_type mem_weight,
+                          const vertex_type_type vertex_type = 0) {
         vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type);
         out_neigbors.push_back({});
         in_neigbors.push_back({});
@@ -227,8 +230,10 @@ class computational_dag_vector_impl {
      * @return True if the edge was added, false if it already exists or vertices are invalid.
      */
     bool add_edge(const vertex_idx source, const vertex_idx target) {
-        if (source >= static_cast<vertex_idx>(vertices_.size()) || target >= static_cast<vertex_idx>(vertices_.size()) || source == target)
+        if (source >= static_cast<vertex_idx>(vertices_.size()) || target >= static_cast<vertex_idx>(vertices_.size())
+            || source == target) {
             return false;
+        }
 
         const auto &out = out_neigbors.at(source);
         if (std::find(out.begin(), out.end(), target) != out.end()) {
@@ -274,4 +279,4 @@ static_assert(is_directed_graph_v<computational_dag_vector_impl<cdag_vertex_impl
 static_assert(is_computational_dag_typed_vertices_v<computational_dag_vector_impl<cdag_vertex_impl_unsigned>>,
               "computational_dag_vector_impl must satisfy the is_computation_dag concept");
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp b/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp
index 3ab94872..56b9ac38 100644
--- a/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp
@@ -17,11 +17,12 @@ limitations under the License.
 */
 #pragma once
 
+#include <vector>
+
 #include "cdag_vertex_impl.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/graph_implementations/integral_range.hpp"
 #include "vector_cast_view.hpp"
-#include <vector>
 
 namespace osp {
 
@@ -52,13 +53,13 @@ namespace osp {
  *   - `mem_weight`: Of type `mem_weight_type`.
  *   - `vertex_type`: Of type `cdag_vertex_type_type`.
  * - It must be constructible with the signature:
- *   `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight, cdag_vertex_type_type vertex_type)`
+ *   `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight,
+ * cdag_vertex_type_type vertex_type)`
  *
  * @tparam index_t The type used for vertex indices in the adjacency lists.
  */
-template<typename v_impl, typename index_t>
+template <typename v_impl, typename index_t>
 class dag_vector_adapter {
-
   public:
     using vertex_idx = typename v_impl::vertex_idx_type;
 
@@ -78,7 +79,12 @@ class dag_vector_adapter {
      * @warning The adapter stores pointers to these vectors. They must remain valid for the lifetime of the adapter.
      */
     dag_vector_adapter(const std::vector<std::vector<index_t>> &out_neigbors_,
-                       const std::vector<std::vector<index_t>> &in_neigbors_) : vertices_(out_neigbors_.size()), out_neigbors(&out_neigbors_), in_neigbors(&in_neigbors_), num_edges_(0), num_vertex_types_(1) {
+                       const std::vector<std::vector<index_t>> &in_neigbors_)
+        : vertices_(out_neigbors_.size()),
+          out_neigbors(&out_neigbors_),
+          in_neigbors(&in_neigbors_),
+          num_edges_(0),
+          num_vertex_types_(1) {
         for (vertex_idx i = 0; i < static_cast<vertex_idx>(out_neigbors_.size()); ++i) {
             vertices_[i].id = i;
             num_edges_ += out_neigbors_[i].size();
@@ -99,7 +105,8 @@ class dag_vector_adapter {
      * @param in_neigbors_ New in-neighbors adjacency list.
      * @param out_neigbors_ New out-neighbors adjacency list.
      */
-    void set_in_out_neighbors(const std::vector<std::vector<index_t>> &in_neigbors_, const std::vector<std::vector<index_t>> &out_neigbors_) {
+    void set_in_out_neighbors(const std::vector<std::vector<index_t>> &in_neigbors_,
+                              const std::vector<std::vector<index_t>> &out_neigbors_) {
         out_neigbors = &out_neigbors_;
         in_neigbors = &in_neigbors_;
 
@@ -204,4 +211,4 @@ static_assert(is_directed_graph_v<dag_vector_adapter<cdag_vertex_impl_unsigned,
 static_assert(is_computational_dag_typed_vertices_v<dag_vector_adapter<cdag_vertex_impl_unsigned, int>>,
               "dag_vector_adapter must satisfy the is_computation_dag concept");
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp b/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp
index aed4fe18..56cce059 100644
--- a/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp
@@ -24,16 +24,14 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class edge_range_vector_impl {
-
     using directed_edge_descriptor = typename directed_graph_edge_desc_traits<Graph_t>::directed_edge_descriptor;
     using vertex_idx = typename directed_graph_traits<Graph_t>::vertex_idx;
     using iter = typename Graph_t::out_edges_iterator_t;
     const Graph_t &graph;
 
     struct edge_iterator {
-
         vertex_idx current_vertex;
         std::size_t current_edge_idx;
         iter current_edge;
@@ -48,6 +46,7 @@ class edge_range_vector_impl {
         using reference = const value_type &;
 
         edge_iterator() : current_vertex(0u), current_edge_idx(0u), graph(nullptr) {}
+
         edge_iterator(const edge_iterator &other)
             : current_vertex(other.current_vertex), current_edge_idx(other.current_edge_idx), graph(other.graph) {}
 
@@ -61,7 +60,6 @@ class edge_range_vector_impl {
         }
 
         edge_iterator(const Graph_t &graph_) : current_vertex(0u), current_edge_idx(0u), graph(&graph_) {
-
             while (current_vertex != graph->num_vertices()) {
                 if (graph->out_edges(current_vertex).begin() != graph->out_edges(current_vertex).end()) {
                     current_edge = graph->out_edges(current_vertex).begin();
@@ -73,19 +71,14 @@ class edge_range_vector_impl {
 
         edge_iterator(std::size_t current_edge_idx_, const Graph_t &graph_)
             : current_vertex(0u), current_edge_idx(current_edge_idx_), graph(&graph_) {
-
             if (current_edge_idx < graph->num_edges()) {
-
                 std::size_t tmp = 0u;
 
                 if (tmp < current_edge_idx) {
-
                     while (current_vertex != graph->num_vertices()) {
-
                         current_edge = graph->out_edges(current_vertex).begin();
 
                         while (current_edge != graph->out_edges(current_vertex).end()) {
-
                             if (tmp == current_edge_idx) {
                                 break;
                             }
@@ -105,20 +98,18 @@ class edge_range_vector_impl {
         }
 
         const value_type &operator*() const { return *current_edge; }
+
         const value_type *operator->() const { return &(*current_edge); }
 
         // Prefix increment
         edge_iterator &operator++() {
-
             current_edge++;
             current_edge_idx++;
 
             if (current_edge == graph->out_edges(current_vertex).end()) {
-
                 current_vertex++;
 
                 while (current_vertex != graph->num_vertices()) {
-
                     if (graph->out_edges(current_vertex).begin() != graph->out_edges(current_vertex).end()) {
                         current_edge = graph->out_edges(current_vertex).begin();
                         break;
@@ -139,6 +130,7 @@ class edge_range_vector_impl {
         }
 
         inline bool operator==(const edge_iterator &other) const { return current_edge_idx == other.current_edge_idx; }
+
         inline bool operator!=(const edge_iterator &other) const { return current_edge_idx != other.current_edge_idx; }
     };
 
@@ -152,9 +144,8 @@ class edge_range_vector_impl {
     auto size() const { return graph.num_edges(); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 class edge_source_range {
-
     using directed_edge_descriptor = typename directed_graph_edge_desc_traits<Graph_t>::directed_edge_descriptor;
     using vertex_idx = typename directed_graph_traits<Graph_t>::vertex_idx;
     using iter = typename Graph_t::in_edges_iterator_t;
@@ -163,7 +154,6 @@ class edge_source_range {
     const std::vector<directed_edge_descriptor> &edges;
 
     struct source_iterator {
-
         const Graph_t *graph;
         iter current_edge;
 
@@ -204,6 +194,7 @@ class edge_source_range {
         }
 
         inline bool operator==(const source_iterator &other) const { return current_edge == other.current_edge; }
+
         inline bool operator!=(const source_iterator &other) const { return current_edge != other.current_edge; }
     };
 
@@ -218,9 +209,8 @@ class edge_source_range {
     auto size() const { return edges.size(); }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 class edge_target_range {
-
     using directed_edge_descriptor = typename directed_graph_edge_desc_traits<Graph_t>::directed_edge_descriptor;
     using vertex_idx = typename directed_graph_traits<Graph_t>::vertex_idx;
     using iter = typename Graph_t::out_edges_iterator_t;
@@ -228,7 +218,6 @@ class edge_target_range {
     const std::vector<directed_edge_descriptor> &edges;
 
     struct target_iterator {
-
         const Graph_t *graph;
         iter current_edge;
 
@@ -240,6 +229,7 @@ class edge_target_range {
         using reference = const value_type &;
 
         target_iterator() : graph(nullptr) {}
+
         target_iterator(const target_iterator &other) : graph(other.graph), current_edge(other.current_edge) {}
 
         target_iterator &operator=(const target_iterator &other) {
@@ -268,6 +258,7 @@ class edge_target_range {
         }
 
         inline bool operator==(const target_iterator &other) const { return current_edge == other.current_edge; }
+
         inline bool operator!=(const target_iterator &other) const { return current_edge != other.current_edge; }
     };
 
@@ -282,4 +273,4 @@ class edge_target_range {
     auto size() const { return edges.size(); }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp b/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp
index b42ea17d..4de995e9 100644
--- a/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp
@@ -31,9 +31,8 @@ namespace osp {
  * @tparam from_t The original type of elements in the vector.
  * @tparam to_t The target type to cast elements to.
  */
-template<typename from_t, typename to_t>
+template <typename from_t, typename to_t>
 class vector_cast_view {
-
     using iter = typename std::vector<from_t>::const_iterator;
     const std::vector<from_t> &vec;
 
@@ -53,6 +52,7 @@ class vector_cast_view {
         iter current_edge;
 
         cast_iterator() = default;
+
         explicit cast_iterator(iter current_edge_) : current_edge(current_edge_) {}
 
         value_type operator*() const { return static_cast<to_t>(*current_edge); }
@@ -96,10 +96,15 @@ class vector_cast_view {
         difference_type operator-(const cast_iterator &other) const { return current_edge - other.current_edge; }
 
         bool operator==(const cast_iterator &other) const { return current_edge == other.current_edge; }
+
         bool operator!=(const cast_iterator &other) const { return current_edge != other.current_edge; }
+
         bool operator<(const cast_iterator &other) const { return current_edge < other.current_edge; }
+
         bool operator>(const cast_iterator &other) const { return current_edge > other.current_edge; }
+
         bool operator<=(const cast_iterator &other) const { return current_edge <= other.current_edge; }
+
         bool operator>=(const cast_iterator &other) const { return current_edge >= other.current_edge; }
     };
 
@@ -144,4 +149,4 @@ class vector_cast_view {
     [[nodiscard]] auto operator[](std::size_t i) const { return static_cast<to_t>(vec[i]); }
 };
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/graph_implementations/boost_graphs/boost_graph.hpp b/include/osp/graph_implementations/boost_graphs/boost_graph.hpp
index 35360fdd..8dcb069d 100644
--- a/include/osp/graph_implementations/boost_graphs/boost_graph.hpp
+++ b/include/osp/graph_implementations/boost_graphs/boost_graph.hpp
@@ -17,27 +17,27 @@ limitations under the License.
 */
 
 #pragma once
-#include <functional>
-#include <iostream>
-
 #include <boost/graph/adjacency_list.hpp>
 #include <boost/graph/graph_traits.hpp>
+#include <functional>
+#include <iostream>
 
 #include "osp/auxiliary/hash_util.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
-#include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 #include "osp/concepts/constructable_computational_dag_concept.hpp"
+#include "osp/concepts/directed_graph_edge_desc_concept.hpp"
 #include "osp/graph_algorithms/computational_dag_construction_util.hpp"
 #include "source_iterator_range.hpp"
 
-template<typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t>
+template <typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t>
 struct boost_vertex {
-
     boost_vertex() : workWeight(0), communicationWeight(0), memoryWeight(0), nodeType(0) {}
-    boost_vertex(vertex_workw_t workWeight_, vertex_commw_t communicationWeight_, vertex_memw_t memoryWeight_,
+
+    boost_vertex(vertex_workw_t workWeight_,
+                 vertex_commw_t communicationWeight_,
+                 vertex_memw_t memoryWeight_,
                  vertex_type_t nodeType_ = 0)
-        : workWeight(workWeight_), communicationWeight(communicationWeight_), memoryWeight(memoryWeight_),
-          nodeType(nodeType_) {}
+        : workWeight(workWeight_), communicationWeight(communicationWeight_), memoryWeight(memoryWeight_), nodeType(nodeType_) {}
 
     vertex_workw_t workWeight;
     vertex_commw_t communicationWeight;
@@ -48,9 +48,10 @@ struct boost_vertex {
 using boost_vertex_def_int = boost_vertex<int, int, int, unsigned>;
 using boost_vertex_def_uint = boost_vertex<unsigned, unsigned, unsigned, unsigned>;
 
-template<typename edge_commw_t>
+template <typename edge_commw_t>
 struct boost_edge {
     boost_edge() : communicationWeight(0) {}
+
     boost_edge(edge_commw_t communicationWeight_) : communicationWeight(communicationWeight_) {}
 
     edge_commw_t communicationWeight;
@@ -59,17 +60,17 @@ struct boost_edge {
 using boost_edge_def_int = boost_edge<int>;
 using boost_edge_def_uint = boost_edge<unsigned>;
 
-template<typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t,
-         typename edge_commw_t>
-using boost_graph_impl =
-    boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS,
-                          boost_vertex<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t>,
-                          boost_edge<edge_commw_t>>;
+template <typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
+using boost_graph_impl = boost::adjacency_list<boost::vecS,
+                                               boost::vecS,
+                                               boost::bidirectionalS,
+                                               boost_vertex<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t>,
+                                               boost_edge<edge_commw_t>>;
 
-using boost_edge_desc = typename boost::graph_traits<
-    boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS>>::edge_descriptor;
+using boost_edge_desc =
+    typename boost::graph_traits<boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS>>::edge_descriptor;
 
-template<>
+template <>
 struct std::hash<boost_edge_desc> {
     std::size_t operator()(const boost_edge_desc &p) const noexcept {
         auto h1 = std::hash<std::size_t>{}(p.m_source);
@@ -89,12 +90,9 @@ struct std::hash<boost_edge_desc> {
  * calculating the longest path, and retrieving topological order of vertices.
  */
 
-template<typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t,
-         typename edge_commw_t>
+template <typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
 class boost_graph {
-
-    using boost_graph_impl_t =
-        boost_graph_impl<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>;
+    using boost_graph_impl_t = boost_graph_impl<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>;
 
   public:
     // graph_traits specialization
@@ -108,10 +106,10 @@ class boost_graph {
     using vertex_type_type = vertex_type_t;
     using edge_comm_weight_type = edge_commw_t;
 
-    boost_graph(
-        const std::vector<std::vector<vertex_idx>> &out_, const std::vector<vertex_work_weight_type> &workW_,
-        const std::vector<vertex_comm_weight_type> &commW_,
-        const std::unordered_map<std::pair<vertex_idx, vertex_idx>, edge_comm_weight_type, osp::pair_hash> &comm_edge_W)
+    boost_graph(const std::vector<std::vector<vertex_idx>> &out_,
+                const std::vector<vertex_work_weight_type> &workW_,
+                const std::vector<vertex_comm_weight_type> &commW_,
+                const std::unordered_map<std::pair<vertex_idx, vertex_idx>, edge_comm_weight_type, osp::pair_hash> &comm_edge_W)
         : number_of_vertex_types(0) {
         graph.m_vertices.reserve(out_.size());
 
@@ -122,7 +120,6 @@ class boost_graph {
             add_vertex(workW_[i], commW_[i]);
         }
         for (size_t i = 0; i < out_.size(); ++i) {
-
             for (const auto &j : out_[i]) {
                 assert(comm_edge_W.find(std::make_pair(i, j)) != comm_edge_W.cend());
                 add_edge(i, j, comm_edge_W.at(std::make_pair(i, j)));
@@ -131,7 +128,8 @@ class boost_graph {
         updateNumberOfVertexTypes();
     }
 
-    boost_graph(const std::vector<std::vector<vertex_idx>> &out_, const std::vector<vertex_work_weight_type> &workW_,
+    boost_graph(const std::vector<std::vector<vertex_idx>> &out_,
+                const std::vector<vertex_work_weight_type> &workW_,
                 const std::vector<vertex_comm_weight_type> &commW_)
         : number_of_vertex_types(0) {
         graph.m_vertices.reserve(out_.size());
@@ -143,7 +141,6 @@ class boost_graph {
             add_vertex(workW_[i], commW_[i]);
         }
         for (size_t i = 0; i < out_.size(); ++i) {
-
             for (const auto &j : out_[i]) {
                 add_edge(i, j);
             }
@@ -151,8 +148,10 @@ class boost_graph {
         updateNumberOfVertexTypes();
     }
 
-    boost_graph(const std::vector<std::vector<vertex_idx>> &out_, const std::vector<vertex_work_weight_type> &workW_,
-                const std::vector<vertex_comm_weight_type> &commW_, const std::vector<vertex_type_type> &nodeType_)
+    boost_graph(const std::vector<std::vector<vertex_idx>> &out_,
+                const std::vector<vertex_work_weight_type> &workW_,
+                const std::vector<vertex_comm_weight_type> &commW_,
+                const std::vector<vertex_type_type> &nodeType_)
         : number_of_vertex_types(0) {
         graph.m_vertices.reserve(out_.size());
 
@@ -164,7 +163,6 @@ class boost_graph {
             add_vertex(workW_[i], commW_[i], 0, nodeType_[i]);
         }
         for (size_t i = 0; i < out_.size(); ++i) {
-
             for (const auto &j : out_[i]) {
                 add_edge(i, j);
             }
@@ -176,9 +174,10 @@ class boost_graph {
      * @brief Default constructor for the ComputationalDag class.
      */
     explicit boost_graph() : graph(0), number_of_vertex_types(0) {}
+
     boost_graph(vertex_idx number_of_nodes) : graph(number_of_nodes), number_of_vertex_types(0) {}
-    boost_graph(unsigned number_of_nodes)
-        : graph(static_cast<vertex_idx>(number_of_nodes)), number_of_vertex_types(0) {}
+
+    boost_graph(unsigned number_of_nodes) : graph(static_cast<vertex_idx>(number_of_nodes)), number_of_vertex_types(0) {}
 
     boost_graph(const boost_graph &other) = default;
 
@@ -201,9 +200,8 @@ class boost_graph {
 
     virtual ~boost_graph() = default;
 
-    template<typename Graph_t>
+    template <typename Graph_t>
     boost_graph(const Graph_t &other) : number_of_vertex_types(0) {
-
         static_assert(osp::is_computational_dag_v<Graph_t>, "Graph_t must satisfy the is_computation_dag concept");
 
         graph.m_vertices.reserve(other.num_vertices());
@@ -212,13 +210,14 @@ class boost_graph {
     }
 
     inline const boost_graph_impl_t &get_boost_graph() const { return graph; }
+
     inline boost_graph_impl_t &get_boost_graph() { return graph; }
 
     inline size_t num_vertices() const { return boost::num_vertices(graph); }
+
     inline size_t num_edges() const { return boost::num_edges(graph); }
 
     void updateNumberOfVertexTypes() {
-
         number_of_vertex_types = 0;
         for (const auto &v : vertices()) {
             if (vertex_type(v) >= number_of_vertex_types) {
@@ -230,6 +229,7 @@ class boost_graph {
     inline unsigned num_vertex_types() const { return number_of_vertex_types; };
 
     auto vertices() const { return boost::make_iterator_range(boost::vertices(graph)); }
+
     auto vertices() { return boost::make_iterator_range(boost::vertices(graph)); }
 
     // template<typename T>
@@ -262,43 +262,42 @@ class boost_graph {
 
     auto edges() { return boost::extensions::make_source_iterator_range(boost::edges(graph)); }
 
-    auto in_edges(const vertex_idx &v) const {
-        return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph));
-    }
+    auto in_edges(const vertex_idx &v) const { return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph)); }
 
-    auto in_edges(const vertex_idx &v) {
-        return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph));
-    }
+    auto in_edges(const vertex_idx &v) { return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph)); }
 
     auto out_edges(const vertex_idx &v) const {
         return boost::extensions::make_source_iterator_range(boost::out_edges(v, graph));
     }
 
-    auto out_edges(const vertex_idx &v) {
-        return boost::extensions::make_source_iterator_range(boost::out_edges(v, graph));
-    }
+    auto out_edges(const vertex_idx &v) { return boost::extensions::make_source_iterator_range(boost::out_edges(v, graph)); }
 
     vertex_idx source(const directed_edge_descriptor &e) const { return boost::source(e, graph); }
+
     vertex_idx target(const directed_edge_descriptor &e) const { return boost::target(e, graph); }
 
     inline size_t out_degree(const vertex_idx &v) const { return boost::out_degree(v, graph); }
+
     inline size_t in_degree(const vertex_idx &v) const { return boost::in_degree(v, graph); }
 
     vertex_work_weight_type vertex_work_weight(const vertex_idx &v) const { return graph[v].workWeight; }
+
     vertex_comm_weight_type vertex_comm_weight(const vertex_idx &v) const { return graph[v].communicationWeight; }
+
     vertex_mem_weight_type vertex_mem_weight(const vertex_idx &v) const { return graph[v].memoryWeight; }
+
     vertex_type_type vertex_type(const vertex_idx &v) const { return graph[v].nodeType; }
 
-    edge_comm_weight_type edge_comm_weight(const directed_edge_descriptor &e) const {
-        return graph[e].communicationWeight;
-    }
+    edge_comm_weight_type edge_comm_weight(const directed_edge_descriptor &e) const { return graph[e].communicationWeight; }
 
     void set_vertex_mem_weight(const vertex_idx &v, const vertex_mem_weight_type memory_weight) {
         graph[v].memoryWeight = memory_weight;
     }
+
     void set_vertex_work_weight(const vertex_idx &v, const vertex_work_weight_type work_weight) {
         graph[v].workWeight = work_weight;
     }
+
     void set_vertex_type(const vertex_idx &v, const vertex_type_type node_type) {
         graph[v].nodeType = node_type;
         number_of_vertex_types = std::max(number_of_vertex_types, node_type + 1);
@@ -307,22 +306,24 @@ class boost_graph {
     void set_vertex_comm_weight(const vertex_idx &v, const vertex_comm_weight_type comm_weight) {
         graph[v].communicationWeight = comm_weight;
     }
+
     void set_edge_comm_weight(const directed_edge_descriptor &e, const edge_comm_weight_type comm_weight) {
         graph[e].communicationWeight = comm_weight;
     }
 
-    vertex_idx add_vertex(const vertex_work_weight_type work_weight, const vertex_comm_weight_type comm_weight,
-                          const vertex_mem_weight_type memory_weight = 0, const vertex_type_type node_type = 0) {
+    vertex_idx add_vertex(const vertex_work_weight_type work_weight,
+                          const vertex_comm_weight_type comm_weight,
+                          const vertex_mem_weight_type memory_weight = 0,
+                          const vertex_type_type node_type = 0) {
         number_of_vertex_types = std::max(number_of_vertex_types, node_type + 1);
         return boost::add_vertex(boost_vertex{work_weight, comm_weight, memory_weight, node_type}, graph);
     }
 
-    std::pair<boost::detail::edge_desc_impl<boost::bidirectional_tag, std::size_t>, bool>
-    add_edge(const vertex_idx &src, const vertex_idx &tar, edge_commw_t comm_weight = DEFAULT_EDGE_COMM_WEIGHT) {
-
+    std::pair<boost::detail::edge_desc_impl<boost::bidirectional_tag, std::size_t>, bool> add_edge(
+        const vertex_idx &src, const vertex_idx &tar, edge_commw_t comm_weight = DEFAULT_EDGE_COMM_WEIGHT) {
         const auto pair = boost::add_edge(src, tar, {comm_weight}, graph);
 
-        number_of_vertex_types = std::max(number_of_vertex_types, 1u); // in case adding edges adds vertices
+        number_of_vertex_types = std::max(number_of_vertex_types, 1u);    // in case adding edges adds vertices
         return pair;
     }
 
@@ -343,37 +344,40 @@ class boost_graph {
     static constexpr edge_comm_weight_type DEFAULT_EDGE_COMM_WEIGHT = 1;
 };
 
-template<typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
+template <typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
 inline auto edges(const boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t> &graph) {
     return graph.edges();
 }
 
-template<typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
+template <typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
 inline auto out_edges(osp::vertex_idx_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> v,
                       const boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t> &graph) {
     return graph.out_edges(v);
 }
 
-template<typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
+template <typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
 inline auto in_edges(osp::vertex_idx_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> v,
                      const boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t> &graph) {
     return graph.in_edges(v);
 }
 
-template<typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
-inline osp::vertex_idx_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> source(const osp::edge_desc_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> &edge, const boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t> &graph) {
+template <typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
+inline osp::vertex_idx_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> source(
+    const osp::edge_desc_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> &edge,
+    const boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t> &graph) {
     return graph.source(edge);
 }
 
-template<typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
-inline osp::vertex_idx_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> target(const osp::edge_desc_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> &edge, const boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t> &graph) {
+template <typename vertex_workw_t, typename vertex_commw_t, typename vertex_memw_t, typename vertex_type_t, typename edge_commw_t>
+inline osp::vertex_idx_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> target(
+    const osp::edge_desc_t<boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t>> &edge,
+    const boost_graph<vertex_workw_t, vertex_commw_t, vertex_memw_t, vertex_type_t, edge_commw_t> &graph) {
     return graph.target(edge);
 }
 
 using boost_graph_int_t = boost_graph<int, int, int, unsigned, int>;
 using boost_graph_uint_t = boost_graph<unsigned, unsigned, unsigned, unsigned, unsigned>;
 
-
 static_assert(osp::is_directed_graph_edge_desc_v<boost_graph_int_t>,
               "boost_graph_adapter does not satisfy the directed_graph_edge_desc concept");
 
@@ -390,4 +394,4 @@ static_assert(osp::is_constructable_cdag_edge_v<boost_graph_int_t>,
               "boost_graph_adapter must satisfy the is_constructable_cdag_edge concept");
 
 static_assert(osp::is_constructable_cdag_comm_edge_v<boost_graph_int_t>,
-              "boost_graph_adapter must satisfy the is_constructable_cdag_comm_edge concept");
\ No newline at end of file
+              "boost_graph_adapter must satisfy the is_constructable_cdag_comm_edge concept");
diff --git a/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp b/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp
index a1ab3634..17fcf796 100644
--- a/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp
+++ b/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
@@ -28,14 +28,15 @@ limitations under the License.
 #include <vector>
 
 namespace boost::extensions {
-template<class IncidenceGraph, class Buffer, class BFSVisitor, class ColorMap, class SourceIterator>
-void inv_breadth_first_visit(const IncidenceGraph &g, SourceIterator sources_begin, SourceIterator sources_end,
-                             Buffer &Q, BFSVisitor vis, ColorMap color) {
-    BOOST_CONCEPT_ASSERT((IncidenceGraphConcept<IncidenceGraph>));
+
+template <class IncidenceGraph, class Buffer, class BFSVisitor, class ColorMap, class SourceIterator>
+void inv_breadth_first_visit(
+    const IncidenceGraph &g, SourceIterator sources_begin, SourceIterator sources_end, Buffer &Q, BFSVisitor vis, ColorMap color) {
+    BOOST_CONCEPT_ASSERT((IncidenceGraphConcept<IncidenceGraph>) );
     typedef graph_traits<IncidenceGraph> GTraits;
     typedef typename graph_traits<IncidenceGraph>::vertex_descriptor Vertex;
-    BOOST_CONCEPT_ASSERT((BFSVisitorConcept<BFSVisitor, IncidenceGraph>));
-    BOOST_CONCEPT_ASSERT((ReadWritePropertyMapConcept<ColorMap, Vertex>));
+    BOOST_CONCEPT_ASSERT((BFSVisitorConcept<BFSVisitor, IncidenceGraph>) );
+    BOOST_CONCEPT_ASSERT((ReadWritePropertyMapConcept<ColorMap, Vertex>) );
     typedef typename property_traits<ColorMap>::value_type ColorValue;
     typedef color_traits<ColorValue> Color;
     typename GTraits::in_edge_iterator ei, ei_end;
@@ -61,19 +62,19 @@ void inv_breadth_first_visit(const IncidenceGraph &g, SourceIterator sources_beg
                 Q.push(v);
             } else {
                 vis.non_tree_edge(*ei, g);
-                if (v_color == Color::gray())
+                if (v_color == Color::gray()) {
                     vis.gray_target(*ei, g);
-                else
+                } else {
                     vis.black_target(*ei, g);
+                }
             }
-        } // end for
+        }    // end for
         put(color, u, Color::black());
         vis.finish_vertex(u, g);
-    } // end while
+    }    // end while
 }
 
-
-template<typename IncidenceGraph, class SourceVertex, class BFSVisitor>
+template <typename IncidenceGraph, class SourceVertex, class BFSVisitor>
 void inv_breadth_first_search(const IncidenceGraph &graph, SourceVertex source, BFSVisitor vis) {
     const std::array sources = {source};
     typedef typename graph_traits<IncidenceGraph>::vertex_descriptor VertexT;
@@ -82,4 +83,4 @@ void inv_breadth_first_search(const IncidenceGraph &graph, SourceVertex source,
     inv_breadth_first_visit(graph, sources.begin(), sources.end(), q, vis, boost::associative_property_map(color_map));
 }
 
-}
\ No newline at end of file
+}    // namespace boost::extensions
diff --git a/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp b/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp
index f025c28c..868a34f4 100644
--- a/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp
+++ b/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp
@@ -22,37 +22,38 @@ limitations under the License.
 
 namespace boost::extensions {
 
-template<typename IteratorType>
+template <typename IteratorType>
 struct source_iterator_range {
   public:
-
     IteratorType begin() { return _begin; }
+
     IteratorType begin() const { return _begin; }
+
     IteratorType cbegin() const { return _begin; }
 
     IteratorType end() { return _end; }
+
     IteratorType end() const { return _end; }
+
     IteratorType cend() const { return _end; }
 
     std::size_t size() const { return std::distance(cbegin(), cend()); }
 
-    template<class RangeType>
+    template <class RangeType>
     explicit source_iterator_range(RangeType &r) : _begin(boost::begin(r)), _end(boost::end(r)) {}
 
   private:
     IteratorType _begin, _end;
 };
 
-template<class ForwardRange>
-source_iterator_range<decltype(boost::begin(std::declval<ForwardRange &>()))>
-make_source_iterator_range(const ForwardRange &r) {
+template <class ForwardRange>
+source_iterator_range<decltype(boost::begin(std::declval<ForwardRange &>()))> make_source_iterator_range(const ForwardRange &r) {
     return source_iterator_range<decltype(boost::begin(std::declval<ForwardRange &>()))>(r);
 }
 
-template<class ForwardRange>
-source_iterator_range<decltype(boost::begin(std::declval<ForwardRange &>()))>
-make_source_iterator_range(ForwardRange &r) {
+template <class ForwardRange>
+source_iterator_range<decltype(boost::begin(std::declval<ForwardRange &>()))> make_source_iterator_range(ForwardRange &r) {
     return source_iterator_range<decltype(boost::begin(std::declval<ForwardRange &>()))>(r);
 }
 
-} // namespace boost::extensions
+}    // namespace boost::extensions
diff --git a/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp b/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp
index 4e3f8c70..616b750c 100644
--- a/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp
+++ b/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp
@@ -16,23 +16,22 @@ limitations under the License.
 @author Christos Matzoros, Toni Boehnlein, Pal Andras Papp, Raphael S. Steiner
 */
 
-
-
 #pragma once
 
 #ifdef EIGEN_FOUND
 
-#include <Eigen/SparseCore>
-#include "osp/concepts/graph_traits.hpp"
+#    include <Eigen/SparseCore>
+
+#    include "osp/concepts/graph_traits.hpp"
 
 namespace osp {
 
-template<typename Graph, typename eigen_idx_type>
+template <typename Graph, typename eigen_idx_type>
 class EigenCSRRange {
-    const Graph& graph_;
+    const Graph &graph_;
     eigen_idx_type index_;
 
-public:
+  public:
     using CSRMatrix = Eigen::SparseMatrix<double, Eigen::RowMajor, eigen_idx_type>;
     using Inner = typename CSRMatrix::InnerIterator;
 
@@ -42,12 +41,12 @@ class EigenCSRRange {
         bool at_end_;
 
         void skip_diagonal() {
-            while ( ((!at_end_) && (it_.row() == skip_ )) & (it_.col() == skip_)) {
+            while (((!at_end_) && (it_.row() == skip_)) & (it_.col() == skip_)) {
                 ++(*this);
             }
         }
 
-    public:
+      public:
         using value_type = std::size_t;
         using reference = value_type;
         using pointer = void;
@@ -55,17 +54,17 @@ class EigenCSRRange {
         using iterator_category = std::input_iterator_tag;
 
         iterator() = default;
-        iterator(const iterator& other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {}
-        iterator& operator=(const iterator& other) {
+
+        iterator(const iterator &other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {}
+
+        iterator &operator=(const iterator &other) {
             it_ = other.it_;
             skip_ = other.skip_;
             at_end_ = other.at_end_;
             return *this;
         }
-        
 
-        iterator(const CSRMatrix& mat, eigen_idx_type idx, bool end = false)
-            : skip_(idx), at_end_(end) {
+        iterator(const CSRMatrix &mat, eigen_idx_type idx, bool end = false) : skip_(idx), at_end_(end) {
             if (!end) {
                 it_ = Inner(mat, idx);
                 at_end_ = !it_;
@@ -74,7 +73,8 @@ class EigenCSRRange {
         }
 
         reference operator*() const { return static_cast<std::size_t>(it_.col()); }
-        iterator& operator++() {
+
+        iterator &operator++() {
             ++it_;
             at_end_ = !it_;
             skip_diagonal();
@@ -86,30 +86,25 @@ class EigenCSRRange {
             ++(*this);
             return temp;
         }
-        
-        bool operator==(const iterator&) const { return at_end_; }
-        bool operator!=(const iterator&) const { return !at_end_; }
+
+        bool operator==(const iterator &) const { return at_end_; }
+
+        bool operator!=(const iterator &) const { return !at_end_; }
     };
 
-    EigenCSRRange(const Graph& graph, eigen_idx_type idx)
-        : graph_(graph), index_(idx) {}
+    EigenCSRRange(const Graph &graph, eigen_idx_type idx) : graph_(graph), index_(idx) {}
 
-    iterator begin() const {
-        return iterator(*graph_.getCSR(), index_);
-    }
+    iterator begin() const { return iterator(*graph_.getCSR(), index_); }
 
-    iterator end() const {
-        return iterator(*graph_.getCSR(), index_, true);
-    }
+    iterator end() const { return iterator(*graph_.getCSR(), index_, true); }
 };
 
-
-template<typename Graph, typename eigen_idx_type>
+template <typename Graph, typename eigen_idx_type>
 class EigenCSCRange {
-    const Graph& graph_;
+    const Graph &graph_;
     eigen_idx_type index_;
 
-public:
+  public:
     using CSCMatrix = Eigen::SparseMatrix<double, Eigen::ColMajor, eigen_idx_type>;
     using Inner = typename CSCMatrix::InnerIterator;
 
@@ -123,8 +118,8 @@ class EigenCSCRange {
                 ++(*this);
             }
         }
-        
-    public:
+
+      public:
         using value_type = std::size_t;
         using reference = value_type;
         using pointer = void;
@@ -132,17 +127,17 @@ class EigenCSCRange {
         using iterator_category = std::input_iterator_tag;
 
         iterator() = default;
-        iterator(const iterator& other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {}
-        iterator& operator=(const iterator& other) {
+
+        iterator(const iterator &other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {}
+
+        iterator &operator=(const iterator &other) {
             it_ = other.it_;
             skip_ = other.skip_;
             at_end_ = other.at_end_;
             return *this;
         }
 
-
-        iterator(const CSCMatrix& mat, eigen_idx_type idx, bool end = false)
-            : skip_(idx), at_end_(end) {
+        iterator(const CSCMatrix &mat, eigen_idx_type idx, bool end = false) : skip_(idx), at_end_(end) {
             if (!end) {
                 it_ = Inner(mat, idx);
                 at_end_ = !it_;
@@ -151,7 +146,8 @@ class EigenCSCRange {
         }
 
         reference operator*() const { return static_cast<std::size_t>(it_.row()); }
-        iterator& operator++() {
+
+        iterator &operator++() {
             ++it_;
             at_end_ = !it_;
             skip_diagonal();
@@ -163,23 +159,19 @@ class EigenCSCRange {
             ++(*this);
             return temp;
         }
-        
 
-        bool operator==(const iterator&) const { return at_end_; }
-        bool operator!=(const iterator&) const { return !at_end_; }
+        bool operator==(const iterator &) const { return at_end_; }
+
+        bool operator!=(const iterator &) const { return !at_end_; }
     };
 
-    EigenCSCRange(const Graph& graph, eigen_idx_type idx)
-        : graph_(graph), index_(idx) {}
+    EigenCSCRange(const Graph &graph, eigen_idx_type idx) : graph_(graph), index_(idx) {}
 
-    iterator begin() const {
-        return iterator(*graph_.getCSC(), index_);
-    }
+    iterator begin() const { return iterator(*graph_.getCSC(), index_); }
 
-    iterator end() const {
-        return iterator(*graph_.getCSC(), index_, true);
-    }
+    iterator end() const { return iterator(*graph_.getCSC(), index_, true); }
 };
-} // namespace osp
+
+}    // namespace osp
 
 #endif
diff --git a/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp b/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp
index 341389aa..7d989f69 100644
--- a/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp
+++ b/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp
@@ -20,30 +20,32 @@ limitations under the License.
 
 #ifdef EIGEN_FOUND
 
-#include <Eigen/SparseCore>
-#include "osp/concepts/directed_graph_concept.hpp"
-#include "osp/concepts/directed_graph_edge_desc_concept.hpp"
-#include "osp/concepts/computational_dag_concept.hpp"
-#include "osp/graph_implementations/integral_range.hpp"
-#include "eigen_sparse_iterator.hpp"
+#    include <Eigen/SparseCore>
+
+#    include "eigen_sparse_iterator.hpp"
+#    include "osp/concepts/computational_dag_concept.hpp"
+#    include "osp/concepts/directed_graph_concept.hpp"
+#    include "osp/concepts/directed_graph_edge_desc_concept.hpp"
+#    include "osp/graph_implementations/integral_range.hpp"
 
 namespace osp {
 
 /// @brief Implementation of a lower‐triangular sparse matrix as a directed acyclic graph.
 ///        Wraps Eigen's sparse matrix and exposes graph-like methods for scheduling and analysis.
-template<typename eigen_idx_type>
+template <typename eigen_idx_type>
 class SparseMatrixImp {
     static_assert(std::is_integral_v<eigen_idx_type>, "Eigen index type must be integral");
-private:
+
+  private:
     // Define Eigen-compatible matrix types using eigen_idx_type as the index type
-    using MatrixCSR = Eigen::SparseMatrix<double, Eigen::RowMajor, eigen_idx_type>;  // For parents
-    using MatrixCSC = Eigen::SparseMatrix<double, Eigen::ColMajor, eigen_idx_type>;  // For children
+    using MatrixCSR = Eigen::SparseMatrix<double, Eigen::RowMajor, eigen_idx_type>;    // For parents
+    using MatrixCSC = Eigen::SparseMatrix<double, Eigen::ColMajor, eigen_idx_type>;    // For children
 
     // Internal pointers to the sparse matrices (not owning)
-    MatrixCSR* L_csr_p = nullptr;
-    MatrixCSC* L_csc_p = nullptr;
+    MatrixCSR *L_csr_p = nullptr;
+    MatrixCSC *L_csc_p = nullptr;
 
-public:
+  public:
     // Vertex index type must match Eigen's StorageIndex (signed 32-bit)
     using vertex_idx = size_t;
 
@@ -53,32 +55,28 @@ class SparseMatrixImp {
     using vertex_mem_weight_type = int;
     using vertex_type_type = unsigned;
 
-    using eigen_idx_t = eigen_idx_type;    
+    using eigen_idx_t = eigen_idx_type;
 
     SparseMatrixImp() = default;
 
     // Setters for the internal CSR and CSC matrix pointers
-    void setCSR(MatrixCSR* mat) { L_csr_p = mat; }
-    void setCSC(MatrixCSC* mat) { L_csc_p = mat; }
+    void setCSR(MatrixCSR *mat) { L_csr_p = mat; }
+
+    void setCSC(MatrixCSC *mat) { L_csc_p = mat; }
 
     // Getters for internal matrices (used by EigenSparseRange)
-    const MatrixCSR* getCSR() const { return L_csr_p; }
-    const MatrixCSC* getCSC() const { return L_csc_p; }
+    const MatrixCSR *getCSR() const { return L_csr_p; }
+
+    const MatrixCSC *getCSC() const { return L_csc_p; }
 
     /// @brief Number of vertices = number of rows in the matrix
-    size_t num_vertices() const noexcept {
-        return static_cast<size_t>(L_csr_p->rows());
-    }
+    size_t num_vertices() const noexcept { return static_cast<size_t>(L_csr_p->rows()); }
 
     /// @brief Return a range over all vertices [0, num_vertices)
-    auto vertices() const {
-        return osp::integral_range<size_t>(num_vertices());
-    }
+    auto vertices() const { return osp::integral_range<size_t>(num_vertices()); }
 
     /// @brief Number of edges = total non-zeros minus diagonal elements
-    vertex_idx num_edges() const noexcept {
-        return static_cast<vertex_idx>(L_csr_p->nonZeros() - L_csr_p->rows());
-    }
+    vertex_idx num_edges() const noexcept { return static_cast<vertex_idx>(L_csr_p->nonZeros() - L_csr_p->rows()); }
 
     /// @brief In-degree = non-zero off-diagonal entries in row v (CSR)
     vertex_idx in_degree(vertex_idx v) const noexcept {
@@ -107,38 +105,32 @@ class SparseMatrixImp {
 
     // Default zero weights (placeholders, extend as needed)
     vertex_comm_weight_type vertex_comm_weight(vertex_idx) const noexcept { return 0; }
-    vertex_mem_weight_type vertex_mem_weight(vertex_idx) const noexcept  { return 0; }
+
+    vertex_mem_weight_type vertex_mem_weight(vertex_idx) const noexcept { return 0; }
 
     inline unsigned num_vertex_types() const { return 1; };
-    inline vertex_type_type vertex_type(const vertex_idx ) const { return 0; }
+
+    inline vertex_type_type vertex_type(const vertex_idx) const { return 0; }
 };
 
 using sparse_matrix_graph_int32_t = SparseMatrixImp<int32_t>;
 using sparse_matrix_graph_int64_t = SparseMatrixImp<int64_t>;
 
-
 static_assert(is_directed_graph_edge_desc_v<SparseMatrixImp<int32_t>>,
               "SparseMatrix must satisfy the directed_graph_edge_desc concept");
 
 // Verify that SparseMatrixImp satisfies the directed graph concept
-static_assert(is_directed_graph_v<SparseMatrixImp<int32_t>>,
-              "SparseMatrix must satisfy directed_graph_concept");
+static_assert(is_directed_graph_v<SparseMatrixImp<int32_t>>, "SparseMatrix must satisfy directed_graph_concept");
 
-static_assert(is_directed_graph_v<SparseMatrixImp<int64_t>>,
-              "SparseMatrix must satisfy directed_graph_concept");
+static_assert(is_directed_graph_v<SparseMatrixImp<int64_t>>, "SparseMatrix must satisfy directed_graph_concept");
 
-static_assert(has_vertex_weights_v<SparseMatrixImp<int32_t>>, 
-    "Compact_Sparse_Graph must satisfy the has_vertex_weights concept");
+static_assert(has_vertex_weights_v<SparseMatrixImp<int32_t>>, "Compact_Sparse_Graph must satisfy the has_vertex_weights concept");
 
-static_assert(has_vertex_weights_v<SparseMatrixImp<int64_t>>, 
-    "Compact_Sparse_Graph must satisfy the has_vertex_weights concept");
+static_assert(has_vertex_weights_v<SparseMatrixImp<int64_t>>, "Compact_Sparse_Graph must satisfy the has_vertex_weights concept");
 
 static_assert(is_computational_dag_typed_vertices_v<SparseMatrixImp<int32_t>>,
               "Compact_Sparse_Graph must satisfy the is_computation_dag concept");
 
+}    // namespace osp
 
-
-} // namespace osp
-
-
-#endif
\ No newline at end of file
+#endif
diff --git a/include/osp/graph_implementations/integral_range.hpp b/include/osp/graph_implementations/integral_range.hpp
index 92611da7..0217c64f 100644
--- a/include/osp/graph_implementations/integral_range.hpp
+++ b/include/osp/graph_implementations/integral_range.hpp
@@ -44,20 +44,21 @@ class integral_range {
      *
      * This iterator satisfies the RandomAccessIterator concept.
      */
-    class integral_iterator { // public for std::reverse_iterator
+    class integral_iterator {    // public for std::reverse_iterator
       public:
         using iterator_category = std::random_access_iterator_tag;
         using difference_type = std::ptrdiff_t;
         using value_type = T;
-        using pointer = void; // Not a real pointer
-        using reference = T;  // Not a real reference
+        using pointer = void;    // Not a real pointer
+        using reference = T;     // Not a real reference
 
         /**
          * @brief Proxy object to support operator-> for integral types.
          */
         struct arrow_proxy {
             T value;
-            constexpr const T* operator->() const noexcept { return &value; }
+
+            constexpr const T *operator->() const noexcept { return &value; }
         };
 
       private:
@@ -113,17 +114,22 @@ class integral_range {
             return temp;
         }
 
-        [[nodiscard]] constexpr bool operator==(const integral_iterator &other) const noexcept { return current == other.current; }
+        [[nodiscard]] constexpr bool operator==(const integral_iterator &other) const noexcept {
+            return current == other.current;
+        }
+
         [[nodiscard]] constexpr bool operator!=(const integral_iterator &other) const noexcept { return !(*this == other); }
 
         constexpr integral_iterator &operator+=(difference_type n) noexcept {
             current = static_cast<value_type>(current + n);
             return *this;
         }
+
         [[nodiscard]] constexpr integral_iterator operator+(difference_type n) const noexcept {
             integral_iterator temp = *this;
             return temp += n;
         }
+
         [[nodiscard]] friend constexpr integral_iterator operator+(difference_type n, const integral_iterator &it) noexcept {
             return it + n;
         }
@@ -132,10 +138,12 @@ class integral_range {
             current = static_cast<value_type>(current - n);
             return *this;
         }
+
         [[nodiscard]] constexpr integral_iterator operator-(difference_type n) const noexcept {
             integral_iterator temp = *this;
             return temp -= n;
         }
+
         [[nodiscard]] constexpr difference_type operator-(const integral_iterator &other) const noexcept {
             return static_cast<difference_type>(current) - static_cast<difference_type>(other.current);
         }
@@ -143,9 +151,16 @@ class integral_range {
         [[nodiscard]] constexpr value_type operator[](difference_type n) const noexcept { return *(*this + n); }
 
         [[nodiscard]] constexpr bool operator<(const integral_iterator &other) const noexcept { return current < other.current; }
+
         [[nodiscard]] constexpr bool operator>(const integral_iterator &other) const noexcept { return current > other.current; }
-        [[nodiscard]] constexpr bool operator<=(const integral_iterator &other) const noexcept { return current <= other.current; }
-        [[nodiscard]] constexpr bool operator>=(const integral_iterator &other) const noexcept { return current >= other.current; }
+
+        [[nodiscard]] constexpr bool operator<=(const integral_iterator &other) const noexcept {
+            return current <= other.current;
+        }
+
+        [[nodiscard]] constexpr bool operator>=(const integral_iterator &other) const noexcept {
+            return current >= other.current;
+        }
     };
 
     using reverse_integral_iterator = std::reverse_iterator<integral_iterator>;
@@ -165,15 +180,19 @@ class integral_range {
     constexpr integral_range(T start_, T end_) noexcept : start(start_), finish(end_) {}
 
     [[nodiscard]] constexpr integral_iterator begin() const noexcept { return integral_iterator(start); }
+
     [[nodiscard]] constexpr integral_iterator cbegin() const noexcept { return integral_iterator(start); }
 
     [[nodiscard]] constexpr integral_iterator end() const noexcept { return integral_iterator(finish); }
+
     [[nodiscard]] constexpr integral_iterator cend() const noexcept { return integral_iterator(finish); }
 
     [[nodiscard]] constexpr reverse_integral_iterator rbegin() const noexcept { return reverse_integral_iterator(end()); }
+
     [[nodiscard]] constexpr reverse_integral_iterator crbegin() const noexcept { return reverse_integral_iterator(cend()); }
 
     [[nodiscard]] constexpr reverse_integral_iterator rend() const noexcept { return reverse_integral_iterator(begin()); }
+
     [[nodiscard]] constexpr reverse_integral_iterator crend() const noexcept { return reverse_integral_iterator(cbegin()); }
 
     /**
@@ -189,4 +208,4 @@ class integral_range {
     [[nodiscard]] constexpr bool empty() const noexcept { return start == finish; }
 };
 
-} // namespace osp
+}    // namespace osp
diff --git a/include/osp/partitioning/model/hypergraph.hpp b/include/osp/partitioning/model/hypergraph.hpp
index 39ca79b2..9c36beb6 100644
--- a/include/osp/partitioning/model/hypergraph.hpp
+++ b/include/osp/partitioning/model/hypergraph.hpp
@@ -17,31 +17,34 @@ limitations under the License.
 */
 #pragma once
 
-#include <vector>
 #include <stdexcept>
+#include <vector>
+
 #include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 
 namespace osp {
 
-template<typename index_type = size_t, typename workw_type = int, typename memw_type = int, typename commw_type = int>
+template <typename index_type = size_t, typename workw_type = int, typename memw_type = int, typename commw_type = int>
 class Hypergraph {
-
     using this_t = Hypergraph<index_type, workw_type, memw_type, commw_type>;
 
   public:
-
     using vertex_idx = index_type;
-    using vertex_work_weight_type   = workw_type;
-    using vertex_mem_weight_type    = memw_type;
-    using vertex_comm_weight_type   = commw_type;
+    using vertex_work_weight_type = workw_type;
+    using vertex_mem_weight_type = memw_type;
+    using vertex_comm_weight_type = commw_type;
 
     Hypergraph() = default;
 
     Hypergraph(index_type num_vertices_, index_type num_hyperedges_)
-        : Num_vertices(num_vertices_), Num_hyperedges(num_hyperedges_), vertex_work_weights(num_vertices_, 1),
-        vertex_memory_weights(num_vertices_, 1), hyperedge_weights(num_hyperedges_, 1),
-        incident_hyperedges_to_vertex(num_vertices_), vertices_in_hyperedge(num_hyperedges_){}
+        : Num_vertices(num_vertices_),
+          Num_hyperedges(num_hyperedges_),
+          vertex_work_weights(num_vertices_, 1),
+          vertex_memory_weights(num_vertices_, 1),
+          hyperedge_weights(num_hyperedges_, 1),
+          incident_hyperedges_to_vertex(num_vertices_),
+          vertices_in_hyperedge(num_hyperedges_) {}
 
     Hypergraph(const this_t &other) = default;
     Hypergraph &operator=(const this_t &other) = default;
@@ -49,27 +52,35 @@ class Hypergraph {
     virtual ~Hypergraph() = default;
 
     inline index_type num_vertices() const { return Num_vertices; }
+
     inline index_type num_hyperedges() const { return Num_hyperedges; }
+
     inline index_type num_pins() const { return Num_pins; }
+
     inline workw_type get_vertex_work_weight(index_type node) const { return vertex_work_weights[node]; }
+
     inline memw_type get_vertex_memory_weight(index_type node) const { return vertex_memory_weights[node]; }
+
     inline commw_type get_hyperedge_weight(index_type hyperedge) const { return hyperedge_weights[hyperedge]; }
 
     void add_pin(index_type vertex_idx, index_type hyperedge_idx);
     void add_vertex(workw_type work_weight = 1, memw_type memory_weight = 1);
     void add_empty_hyperedge(commw_type weight = 1);
-    void add_hyperedge(const std::vector<index_type>& pins, commw_type weight = 1);
+    void add_hyperedge(const std::vector<index_type> &pins, commw_type weight = 1);
     void set_vertex_work_weight(index_type vertex_idx, workw_type weight);
     void set_vertex_memory_weight(index_type vertex_idx, memw_type weight);
     void set_hyperedge_weight(index_type hyperedge_idx, commw_type weight);
 
-
     void clear();
     void reset(index_type num_vertices_, index_type num_hyperedges_);
 
-    inline const std::vector<index_type> &get_incident_hyperedges(index_type vertex) const { return incident_hyperedges_to_vertex[vertex]; }
-    inline const std::vector<index_type> &get_vertices_in_hyperedge(index_type hyperedge) const { return vertices_in_hyperedge[hyperedge]; }
+    inline const std::vector<index_type> &get_incident_hyperedges(index_type vertex) const {
+        return incident_hyperedges_to_vertex[vertex];
+    }
 
+    inline const std::vector<index_type> &get_vertices_in_hyperedge(index_type hyperedge) const {
+        return vertices_in_hyperedge[hyperedge];
+    }
 
   private:
     index_type Num_vertices = 0, Num_hyperedges = 0, Num_pins = 0;
@@ -84,83 +95,75 @@ class Hypergraph {
 
 using Hypergraph_def_t = Hypergraph<size_t, int, int, int>;
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::add_pin(index_type vertex, index_type hyperedge_idx)
-{
-    if(vertex >= Num_vertices)
-    {
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::add_pin(index_type vertex, index_type hyperedge_idx) {
+    if (vertex >= Num_vertices) {
         throw std::invalid_argument("Invalid Argument while adding pin: vertex index out of range.");
-    }
-    else if(hyperedge_idx >= Num_hyperedges)
-    {
+    } else if (hyperedge_idx >= Num_hyperedges) {
         throw std::invalid_argument("Invalid Argument while adding pin: hyperedge index out of range.");
-    }
-    else{    
+    } else {
         incident_hyperedges_to_vertex[vertex].push_back(hyperedge_idx);
         vertices_in_hyperedge[hyperedge_idx].push_back(vertex);
         ++Num_pins;
     }
 }
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::add_vertex(workw_type work_weight, memw_type memory_weight)
-{
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::add_vertex(workw_type work_weight, memw_type memory_weight) {
     vertex_work_weights.push_back(work_weight);
     vertex_memory_weights.push_back(memory_weight);
     incident_hyperedges_to_vertex.emplace_back();
     ++Num_vertices;
 }
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::add_empty_hyperedge(commw_type weight)
-{
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::add_empty_hyperedge(commw_type weight) {
     vertices_in_hyperedge.emplace_back();
     hyperedge_weights.push_back(weight);
     ++Num_hyperedges;
 }
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::add_hyperedge(const std::vector<index_type>& pins, commw_type weight)
-{
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::add_hyperedge(const std::vector<index_type> &pins,
+                                                                              commw_type weight) {
     vertices_in_hyperedge.emplace_back(pins);
     hyperedge_weights.push_back(weight);
-    for(index_type vertex : pins)
+    for (index_type vertex : pins) {
         incident_hyperedges_to_vertex[vertex].push_back(Num_hyperedges);
+    }
     ++Num_hyperedges;
     Num_pins += static_cast<index_type>(pins.size());
 }
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::set_vertex_work_weight(index_type vertex, workw_type weight)
-{
-    if(vertex >= Num_vertices)
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::set_vertex_work_weight(index_type vertex, workw_type weight) {
+    if (vertex >= Num_vertices) {
         throw std::invalid_argument("Invalid Argument while setting vertex weight: vertex index out of range.");
-    else   
+    } else {
         vertex_work_weights[vertex] = weight;
+    }
 }
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::set_vertex_memory_weight(index_type vertex, memw_type weight)
-{
-    if(vertex >= Num_vertices)
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::set_vertex_memory_weight(index_type vertex, memw_type weight) {
+    if (vertex >= Num_vertices) {
         throw std::invalid_argument("Invalid Argument while setting vertex weight: vertex index out of range.");
-    else   
+    } else {
         vertex_memory_weights[vertex] = weight;
+    }
 }
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::set_hyperedge_weight(index_type hyperedge_idx, commw_type weight)
-{
-    if(hyperedge_idx >= Num_hyperedges)
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::set_hyperedge_weight(index_type hyperedge_idx, commw_type weight) {
+    if (hyperedge_idx >= Num_hyperedges) {
         throw std::invalid_argument("Invalid Argument while setting hyperedge weight: hyepredge index out of range.");
-    else   
+    } else {
         hyperedge_weights[hyperedge_idx] = weight;
+    }
 }
 
-
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::clear()
-{
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::clear() {
     Num_vertices = 0;
     Num_hyperedges = 0;
     Num_pins = 0;
@@ -172,9 +175,8 @@ void Hypergraph<index_type, workw_type, memw_type, commw_type>::clear()
     vertices_in_hyperedge.clear();
 }
 
-template<typename index_type, typename workw_type, typename memw_type, typename commw_type>
-void Hypergraph<index_type, workw_type, memw_type, commw_type>::reset(index_type num_vertices_, index_type num_hyperedges_)
-{
+template <typename index_type, typename workw_type, typename memw_type, typename commw_type>
+void Hypergraph<index_type, workw_type, memw_type, commw_type>::reset(index_type num_vertices_, index_type num_hyperedges_) {
     clear();
 
     Num_vertices = num_vertices_;
@@ -187,7 +189,4 @@ void Hypergraph<index_type, workw_type, memw_type, commw_type>::reset(index_type
     vertices_in_hyperedge.resize(num_hyperedges_);
 }
 
-
-
-
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/partitioning/model/hypergraph_utility.hpp b/include/osp/partitioning/model/hypergraph_utility.hpp
index 46e698b4..a641519f 100644
--- a/include/osp/partitioning/model/hypergraph_utility.hpp
+++ b/include/osp/partitioning/model/hypergraph_utility.hpp
@@ -33,82 +33,79 @@ limitations under the License.
 
 namespace osp {
 
-
 // summing up weights
 
-template<typename hypergraph_t>
-typename hypergraph_t::vertex_work_weight_type compute_total_vertex_work_weight(const hypergraph_t& hgraph)
-{
+template <typename hypergraph_t>
+typename hypergraph_t::vertex_work_weight_type compute_total_vertex_work_weight(const hypergraph_t &hgraph) {
     using index_type = typename hypergraph_t::vertex_idx;
     using workw_type = typename hypergraph_t::vertex_work_weight_type;
 
     workw_type total = 0;
-    for(index_type node = 0; node < hgraph.num_vertices(); ++node)
+    for (index_type node = 0; node < hgraph.num_vertices(); ++node) {
         total += hgraph.get_vertex_work_weight(node);
+    }
     return total;
 }
 
-template<typename hypergraph_t>
-typename hypergraph_t::vertex_mem_weight_type compute_total_vertex_memory_weight(const hypergraph_t& hgraph)
-{
+template <typename hypergraph_t>
+typename hypergraph_t::vertex_mem_weight_type compute_total_vertex_memory_weight(const hypergraph_t &hgraph) {
     using index_type = typename hypergraph_t::vertex_idx;
     using memw_type = typename hypergraph_t::vertex_mem_weight_type;
 
     memw_type total = 0;
-    for(index_type node = 0; node < hgraph.num_vertices(); ++node)
+    for (index_type node = 0; node < hgraph.num_vertices(); ++node) {
         total += hgraph.get_vertex_memory_weight(node);
+    }
     return total;
 }
 
-
 // get induced subhypergraph
 
-template<typename hypergraph_t>
-hypergraph_t create_induced_hypergraph(const hypergraph_t& hgraph, const std::vector<bool>& include)
-{
-    if(include.size() != hgraph.num_vertices())
+template <typename hypergraph_t>
+hypergraph_t create_induced_hypergraph(const hypergraph_t &hgraph, const std::vector<bool> &include) {
+    if (include.size() != hgraph.num_vertices()) {
         throw std::invalid_argument("Invalid Argument while extracting induced hypergraph: input bool array has incorrect size.");
+    }
 
     using index_type = typename hypergraph_t::vertex_idx;
 
-
     std::vector<index_type> new_index(hgraph.num_vertices());
     unsigned current_index = 0;
-    for(index_type node = 0; node < hgraph.num_vertices(); ++node)
-        if(include[node])
+    for (index_type node = 0; node < hgraph.num_vertices(); ++node) {
+        if (include[node]) {
             new_index[node] = current_index++;
-    
+        }
+    }
+
     hypergraph_t new_hgraph(current_index, 0);
-    for(index_type node = 0; node < hgraph.num_vertices(); ++node)
-        if(include[node])
-        {
+    for (index_type node = 0; node < hgraph.num_vertices(); ++node) {
+        if (include[node]) {
             new_hgraph.set_vertex_work_weight(new_index[node], hgraph.get_vertex_work_weight(node));
             new_hgraph.set_vertex_memory_weight(new_index[node], hgraph.get_vertex_memory_weight(node));
         }
+    }
 
-    for(index_type hyperedge = 0; hyperedge < hgraph.num_hyperedges(); ++hyperedge)
-    {
+    for (index_type hyperedge = 0; hyperedge < hgraph.num_hyperedges(); ++hyperedge) {
         unsigned nr_induced_pins = 0;
         std::vector<index_type> induced_hyperedge;
-        for(index_type node : hgraph.get_vertices_in_hyperedge(hyperedge))
-            if(include[node])
-            {
+        for (index_type node : hgraph.get_vertices_in_hyperedge(hyperedge)) {
+            if (include[node]) {
                 induced_hyperedge.push_back(new_index[node]);
                 ++nr_induced_pins;
             }
-        
-        if(nr_induced_pins >= 2)
+        }
+
+        if (nr_induced_pins >= 2) {
             new_hgraph.add_hyperedge(induced_hyperedge, hgraph.get_hyperedge_weight(hyperedge));
+        }
     }
     return new_hgraph;
 }
 
-
 // conversion
 
-template<typename hypergraph_t, typename Graph_t>
-hypergraph_t convert_from_cdag_as_dag(const Graph_t& dag)
-{
+template <typename hypergraph_t, typename Graph_t>
+hypergraph_t convert_from_cdag_as_dag(const Graph_t &dag) {
     using index_type = typename hypergraph_t::vertex_idx;
     using workw_type = typename hypergraph_t::vertex_work_weight_type;
     using memw_type = typename hypergraph_t::vertex_mem_weight_type;
@@ -117,25 +114,26 @@ hypergraph_t convert_from_cdag_as_dag(const Graph_t& dag)
     static_assert(std::is_same_v<vertex_idx_t<Graph_t>, index_type>, "Index type mismatch, cannot convert DAG to hypergraph.");
     static_assert(std::is_same_v<v_workw_t<Graph_t>, workw_type>, "Work weight type mismatch, cannot convert DAG to hypergraph.");
     static_assert(std::is_same_v<v_memw_t<Graph_t>, memw_type>, "Memory weight type mismatch, cannot convert DAG to hypergraph.");
-    static_assert(!has_edge_weights_v<Graph_t> || std::is_same_v<e_commw_t<Graph_t>, commw_type>, "Communication weight type mismatch, cannot convert DAG to hypergraph.");
+    static_assert(!has_edge_weights_v<Graph_t> || std::is_same_v<e_commw_t<Graph_t>, commw_type>,
+                  "Communication weight type mismatch, cannot convert DAG to hypergraph.");
 
     hypergraph_t hgraph(dag.num_vertices(), 0);
-    for(const auto &node : dag.vertices())
-    {
+    for (const auto &node : dag.vertices()) {
         hgraph.set_vertex_work_weight(node, dag.vertex_work_weight(node));
         hgraph.set_vertex_memory_weight(node, dag.vertex_mem_weight(node));
-        for (const auto &child : dag.children(node))
-            if constexpr(has_edge_weights_v<Graph_t>)
+        for (const auto &child : dag.children(node)) {
+            if constexpr (has_edge_weights_v<Graph_t>) {
                 hgraph.add_hyperedge({node, child}, dag.edge_comm_weight(edge_desc(node, child, dag).first));
-            else 
+            } else {
                 hgraph.add_hyperedge({node, child});
+            }
+        }
     }
     return hgraph;
 }
 
-template<typename hypergraph_t, typename Graph_t>
-hypergraph_t convert_from_cdag_as_hyperdag(const Graph_t& dag)
-{
+template <typename hypergraph_t, typename Graph_t>
+hypergraph_t convert_from_cdag_as_hyperdag(const Graph_t &dag) {
     using index_type = typename hypergraph_t::vertex_idx;
     using workw_type = typename hypergraph_t::vertex_work_weight_type;
     using memw_type = typename hypergraph_t::vertex_mem_weight_type;
@@ -144,21 +142,23 @@ hypergraph_t convert_from_cdag_as_hyperdag(const Graph_t& dag)
     static_assert(std::is_same_v<vertex_idx_t<Graph_t>, index_type>, "Index type mismatch, cannot convert DAG to hypergraph.");
     static_assert(std::is_same_v<v_workw_t<Graph_t>, workw_type>, "Work weight type mismatch, cannot convert DAG to hypergraph.");
     static_assert(std::is_same_v<v_memw_t<Graph_t>, memw_type>, "Memory weight type mismatch, cannot convert DAG to hypergraph.");
-    static_assert(std::is_same_v<v_commw_t<Graph_t>, commw_type>, "Communication weight type mismatch, cannot convert DAG to hypergraph.");
+    static_assert(std::is_same_v<v_commw_t<Graph_t>, commw_type>,
+                  "Communication weight type mismatch, cannot convert DAG to hypergraph.");
 
     hypergraph_t hgraph(dag.num_vertices(), 0);
-    for(const auto &node : dag.vertices())
-    {
+    for (const auto &node : dag.vertices()) {
         hgraph.set_vertex_work_weight(node, dag.vertex_work_weight(node));
         hgraph.set_vertex_memory_weight(node, dag.vertex_mem_weight(node));
-        if(dag.out_degree(node) == 0)
+        if (dag.out_degree(node) == 0) {
             continue;
+        }
         std::vector<index_type> new_hyperedge({node});
-        for (const auto &child : dag.children(node))
+        for (const auto &child : dag.children(node)) {
             new_hyperedge.push_back(child);
+        }
         hgraph.add_hyperedge(new_hyperedge, dag.vertex_comm_weight(node));
     }
     return hgraph;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/partitioning/model/partitioning.hpp b/include/osp/partitioning/model/partitioning.hpp
index e39328a1..ae4ee277 100644
--- a/include/osp/partitioning/model/partitioning.hpp
+++ b/include/osp/partitioning/model/partitioning.hpp
@@ -26,11 +26,9 @@ namespace osp {
 
 // Represents a partitioning where each vertex of a hypergraph is assigned to a specifc partition
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 class Partitioning {
-
   private:
-
     using index_type = typename hypergraph_t::vertex_idx;
     using workw_type = typename hypergraph_t::vertex_work_weight_type;
     using memw_type = typename hypergraph_t::vertex_mem_weight_type;
@@ -41,7 +39,6 @@ class Partitioning {
     std::vector<unsigned> node_to_partition_assignment;
 
   public:
-  
     Partitioning() = delete;
 
     Partitioning(const PartitioningProblem<hypergraph_t> &inst)
@@ -57,40 +54,37 @@ class Partitioning {
 
     virtual ~Partitioning() = default;
 
-
     // getters and setters
 
     inline const PartitioningProblem<hypergraph_t> &getInstance() const { return *instance; }
 
     inline unsigned assignedPartition(index_type node) const { return node_to_partition_assignment[node]; }
+
     inline const std::vector<unsigned> &assignedPartitions() const { return node_to_partition_assignment; }
+
     inline std::vector<unsigned> &assignedPartitions() { return node_to_partition_assignment; }
 
     inline void setAssignedPartition(index_type node, unsigned part) { node_to_partition_assignment.at(node) = part; }
-    void setAssignedPartitions(const std::vector<unsigned> &vec) {
 
-        if (vec.size() == static_cast<std::size_t>(instance->getHypergraph().num_vertices()) ) {
+    void setAssignedPartitions(const std::vector<unsigned> &vec) {
+        if (vec.size() == static_cast<std::size_t>(instance->getHypergraph().num_vertices())) {
             node_to_partition_assignment = vec;
         } else {
-            throw std::invalid_argument(
-                "Invalid Argument while assigning processors: size does not match number of nodes.");
+            throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes.");
         }
     }
-    void setAssignedPartitions(std::vector<unsigned> &&vec) {
 
-        if (vec.size() == static_cast<std::size_t>(instance->getHypergraph().num_vertices()) ) {
+    void setAssignedPartitions(std::vector<unsigned> &&vec) {
+        if (vec.size() == static_cast<std::size_t>(instance->getHypergraph().num_vertices())) {
             node_to_partition_assignment = vec;
         } else {
-            throw std::invalid_argument(
-                "Invalid Argument while assigning processors: size does not match number of nodes.");
+            throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes.");
         }
     }
 
     std::vector<index_type> getPartitionContent(unsigned part) const {
-
         std::vector<index_type> content;
         for (index_type node = 0; node < node_to_partition_assignment.size(); ++node) {
-
             if (node_to_partition_assignment[node] == part) {
                 content.push_back(node);
             }
@@ -111,76 +105,79 @@ class Partitioning {
     commw_type computeCutNetCost() const;
 
     bool satisfiesBalanceConstraint() const;
-
 };
 
-template<typename hypergraph_t>
-std::vector<unsigned> Partitioning<hypergraph_t>::computeLambdaForHyperedges() const
-{
+template <typename hypergraph_t>
+std::vector<unsigned> Partitioning<hypergraph_t>::computeLambdaForHyperedges() const {
     std::vector<unsigned> lambda(instance->getHypergraph().num_hyperedges(), 0);
-    for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx)
-    {
+    for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) {
         const std::vector<index_type> &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx);
-        if(hyperedge.empty())
+        if (hyperedge.empty()) {
             continue;
+        }
         std::vector<bool> intersects_part(instance->getNumberOfPartitions(), false);
-        for(const index_type& node : hyperedge)
+        for (const index_type &node : hyperedge) {
             intersects_part[node_to_partition_assignment[node]] = true;
-        for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part)
-            if(intersects_part[part])
+        }
+        for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) {
+            if (intersects_part[part]) {
                 ++lambda[edge_idx];
+            }
+        }
     }
     return lambda;
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 typename hypergraph_t::vertex_comm_weight_type Partitioning<hypergraph_t>::computeConnectivityCost() const {
-
     commw_type total = 0;
     std::vector<unsigned> lambda = computeLambdaForHyperedges();
-    
-    for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx)
-        if(lambda[edge_idx] >= 1)
-            total += (static_cast<commw_type>(lambda[edge_idx])-1) * instance->getHypergraph().get_hyperedge_weight(edge_idx);
-    
+
+    for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) {
+        if (lambda[edge_idx] >= 1) {
+            total += (static_cast<commw_type>(lambda[edge_idx]) - 1) * instance->getHypergraph().get_hyperedge_weight(edge_idx);
+        }
+    }
+
     return total;
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 typename hypergraph_t::vertex_comm_weight_type Partitioning<hypergraph_t>::computeCutNetCost() const {
-
     commw_type total = 0;
     std::vector<unsigned> lambda = computeLambdaForHyperedges();
-    for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx)
-        if(lambda[edge_idx] > 1)
+    for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) {
+        if (lambda[edge_idx] > 1) {
             total += instance->getHypergraph().get_hyperedge_weight(edge_idx);
-    
+        }
+    }
+
     return total;
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 bool Partitioning<hypergraph_t>::satisfiesBalanceConstraint() const {
     std::vector<workw_type> work_weight(instance->getNumberOfPartitions(), 0);
     std::vector<memw_type> memory_weight(instance->getNumberOfPartitions(), 0);
     for (index_type node = 0; node < node_to_partition_assignment.size(); ++node) {
-        if (node_to_partition_assignment[node] > instance->getNumberOfPartitions())
+        if (node_to_partition_assignment[node] > instance->getNumberOfPartitions()) {
             throw std::invalid_argument("Invalid Argument while checking balance constraint: partition ID out of range.");
-        else
-        {
+        } else {
             work_weight[node_to_partition_assignment[node]] += instance->getHypergraph().get_vertex_work_weight(node);
             memory_weight[node_to_partition_assignment[node]] += instance->getHypergraph().get_vertex_memory_weight(node);
         }
     }
 
-    for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part)
-    {
-        if(work_weight[part] > instance->getMaxWorkWeightPerPartition())
+    for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) {
+        if (work_weight[part] > instance->getMaxWorkWeightPerPartition()) {
             return false;
-        if(memory_weight[part] > instance->getMaxMemoryWeightPerPartition())
+        }
+        if (memory_weight[part] > instance->getMaxMemoryWeightPerPartition()) {
             return false;
+        }
     }
 
     return true;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/partitioning/model/partitioning_problem.hpp b/include/osp/partitioning/model/partitioning_problem.hpp
index b121ddd9..90ae57c6 100644
--- a/include/osp/partitioning/model/partitioning_problem.hpp
+++ b/include/osp/partitioning/model/partitioning_problem.hpp
@@ -18,19 +18,17 @@ limitations under the License.
 
 #pragma once
 
-#include <iostream>
 #include <cmath>
+#include <iostream>
 
 #include "osp/partitioning/model/hypergraph_utility.hpp"
 
 namespace osp {
 
 // represents a hypergraph partitioning problem into a fixed number of parts with a balance constraint
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 class PartitioningProblem {
-
   private:
-
     using this_t = PartitioningProblem<hypergraph_t>;
 
     using index_type = typename hypergraph_t::vertex_idx;
@@ -47,20 +45,25 @@ class PartitioningProblem {
     bool allows_replication = false;
 
   public:
-
     PartitioningProblem() = default;
 
-    PartitioningProblem(const hypergraph_t &hgraph_, unsigned nr_parts_ = 2,
+    PartitioningProblem(const hypergraph_t &hgraph_,
+                        unsigned nr_parts_ = 2,
                         workw_type max_work_weight_ = std::numeric_limits<workw_type>::max(),
-                        memw_type max_memory_weight_ = std::numeric_limits<memw_type>::max()) :
-                        hgraph(hgraph_), nr_of_partitions(nr_parts_),
-                        max_work_weight_per_partition(max_work_weight_), max_memory_weight_per_partition(max_memory_weight_) {}
-
-    PartitioningProblem(const hypergraph_t &&hgraph_, unsigned nr_parts_ = 2,
+                        memw_type max_memory_weight_ = std::numeric_limits<memw_type>::max())
+        : hgraph(hgraph_),
+          nr_of_partitions(nr_parts_),
+          max_work_weight_per_partition(max_work_weight_),
+          max_memory_weight_per_partition(max_memory_weight_) {}
+
+    PartitioningProblem(const hypergraph_t &&hgraph_,
+                        unsigned nr_parts_ = 2,
                         workw_type max_work_weight_ = std::numeric_limits<workw_type>::max(),
-                        memw_type max_memory_weight_ = std::numeric_limits<memw_type>::max()) :
-                        hgraph(hgraph_), nr_of_partitions(nr_parts_),
-                        max_work_weight_per_partition(max_work_weight_), max_memory_weight_per_partition(max_memory_weight_) {}
+                        memw_type max_memory_weight_ = std::numeric_limits<memw_type>::max())
+        : hgraph(hgraph_),
+          nr_of_partitions(nr_parts_),
+          max_work_weight_per_partition(max_work_weight_),
+          max_memory_weight_per_partition(max_memory_weight_) {}
 
     PartitioningProblem(const this_t &other) = default;
     PartitioningProblem(this_t &&other) = default;
@@ -70,34 +73,45 @@ class PartitioningProblem {
 
     // getters
     inline const hypergraph_t &getHypergraph() const { return hgraph; }
+
     inline hypergraph_t &getHypergraph() { return hgraph; }
 
     inline unsigned getNumberOfPartitions() const { return nr_of_partitions; }
+
     inline workw_type getMaxWorkWeightPerPartition() const { return max_work_weight_per_partition; }
+
     inline memw_type getMaxMemoryWeightPerPartition() const { return max_memory_weight_per_partition; }
+
     inline bool getAllowsReplication() const { return allows_replication; }
 
     // setters
     inline void setHypergraph(const hypergraph_t &hgraph_) { hgraph = hgraph_; }
-    
+
     inline void setNumberOfPartitions(unsigned nr_parts_) { nr_of_partitions = nr_parts_; }
+
     inline void setAllowsReplication(bool allowed_) { allows_replication = allowed_; }
 
     inline void setMaxWorkWeightExplicitly(workw_type max_weight_) { max_work_weight_per_partition = max_weight_; }
-    void setMaxWorkWeightViaImbalanceFactor(double imbalance){
-        if(imbalance < 0 )
+
+    void setMaxWorkWeightViaImbalanceFactor(double imbalance) {
+        if (imbalance < 0) {
             throw std::invalid_argument("Invalid Argument while setting imbalance parameter: parameter is negative.");
-        else
-            max_work_weight_per_partition = static_cast<workw_type>(ceil(compute_total_vertex_work_weight(hgraph)/ static_cast<double>(nr_of_partitions) * (1.0+imbalance)));
+        } else {
+            max_work_weight_per_partition = static_cast<workw_type>(
+                ceil(compute_total_vertex_work_weight(hgraph) / static_cast<double>(nr_of_partitions) * (1.0 + imbalance)));
+        }
     }
+
     inline void setMaxMemoryWeightExplicitly(memw_type max_weight_) { max_memory_weight_per_partition = max_weight_; }
-    void setMaxMemoryWeightViaImbalanceFactor(double imbalance){
-        if(imbalance < 0 )
+
+    void setMaxMemoryWeightViaImbalanceFactor(double imbalance) {
+        if (imbalance < 0) {
             throw std::invalid_argument("Invalid Argument while setting imbalance parameter: parameter is negative.");
-        else
-            max_memory_weight_per_partition = static_cast<memw_type>(ceil(compute_total_vertex_memory_weight(hgraph)/ static_cast<double>(nr_of_partitions) * (1.0+imbalance)));
+        } else {
+            max_memory_weight_per_partition = static_cast<memw_type>(
+                ceil(compute_total_vertex_memory_weight(hgraph) / static_cast<double>(nr_of_partitions) * (1.0 + imbalance)));
+        }
     }
 };
 
-
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/partitioning/model/partitioning_replication.hpp b/include/osp/partitioning/model/partitioning_replication.hpp
index eae0b65e..698db721 100644
--- a/include/osp/partitioning/model/partitioning_replication.hpp
+++ b/include/osp/partitioning/model/partitioning_replication.hpp
@@ -26,29 +26,27 @@ namespace osp {
 
 // Represents a partitioning where each vertex of a hypergraph can be assinged to one or more partitions
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 class PartitioningWithReplication {
-
   private:
-
     using index_type = typename hypergraph_t::vertex_idx;
     using workw_type = typename hypergraph_t::vertex_work_weight_type;
     using memw_type = typename hypergraph_t::vertex_mem_weight_type;
     using commw_type = typename hypergraph_t::vertex_comm_weight_type;
 
-
     const PartitioningProblem<hypergraph_t> *instance;
 
-    std::vector<std::vector<unsigned> > node_to_partitions_assignment;
+    std::vector<std::vector<unsigned>> node_to_partitions_assignment;
 
   public:
-  
     PartitioningWithReplication() = delete;
 
     PartitioningWithReplication(const PartitioningProblem<hypergraph_t> &inst)
-        : instance(&inst), node_to_partitions_assignment(std::vector<std::vector<unsigned>>(inst.getHypergraph().num_vertices(), {0})) {}
+        : instance(&inst),
+          node_to_partitions_assignment(std::vector<std::vector<unsigned>>(inst.getHypergraph().num_vertices(), {0})) {}
 
-    PartitioningWithReplication(const PartitioningProblem<hypergraph_t> &inst, const std::vector<std::vector<unsigned> > &partition_assignment_)
+    PartitioningWithReplication(const PartitioningProblem<hypergraph_t> &inst,
+                                const std::vector<std::vector<unsigned>> &partition_assignment_)
         : instance(&inst), node_to_partitions_assignment(partition_assignment_) {}
 
     PartitioningWithReplication(const PartitioningWithReplication<hypergraph_t> &partitioning_) = default;
@@ -58,41 +56,43 @@ class PartitioningWithReplication {
 
     virtual ~PartitioningWithReplication() = default;
 
-
     // getters and setters
 
     inline const PartitioningProblem<hypergraph_t> &getInstance() const { return *instance; }
 
     inline std::vector<unsigned> assignedPartitions(index_type node) const { return node_to_partitions_assignment[node]; }
-    inline const std::vector<std::vector<unsigned> > &assignedPartitions() const { return node_to_partitions_assignment; }
-    inline std::vector<std::vector<unsigned> > &assignedPartitions() { return node_to_partitions_assignment; }
 
-    inline void setAssignedPartitions(index_type node, const std::vector<unsigned>& parts) { node_to_partitions_assignment.at(node) = parts; }
-    void setAssignedPartitionVectors(const std::vector<std::vector<unsigned> > &vec) {
+    inline const std::vector<std::vector<unsigned>> &assignedPartitions() const { return node_to_partitions_assignment; }
+
+    inline std::vector<std::vector<unsigned>> &assignedPartitions() { return node_to_partitions_assignment; }
 
-        if (vec.size() == static_cast<std::size_t>(instance->getHypergraph().num_vertices()) ) {
+    inline void setAssignedPartitions(index_type node, const std::vector<unsigned> &parts) {
+        node_to_partitions_assignment.at(node) = parts;
+    }
+
+    void setAssignedPartitionVectors(const std::vector<std::vector<unsigned>> &vec) {
+        if (vec.size() == static_cast<std::size_t>(instance->getHypergraph().num_vertices())) {
             node_to_partitions_assignment = vec;
         } else {
-            throw std::invalid_argument(
-                "Invalid Argument while assigning processors: size does not match number of nodes.");
+            throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes.");
         }
     }
-    void setAssignedPartitionVectors(std::vector<std::vector<unsigned> > &&vec) {
 
-        if (vec.size() == static_cast<std::size_t>(instance->getHypergraph().num_vertices()) ) {
+    void setAssignedPartitionVectors(std::vector<std::vector<unsigned>> &&vec) {
+        if (vec.size() == static_cast<std::size_t>(instance->getHypergraph().num_vertices())) {
             node_to_partitions_assignment = vec;
         } else {
-            throw std::invalid_argument(
-                "Invalid Argument while assigning processors: size does not match number of nodes.");
+            throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes.");
         }
     }
 
-    std::vector<std::vector<index_type> > getPartitionContents() const {
-
-        std::vector<std::vector<index_type> > content(instance->getNumberOfPartitions());
-        for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node)
-            for(unsigned part : node_to_partitions_assignment[node])
+    std::vector<std::vector<index_type>> getPartitionContents() const {
+        std::vector<std::vector<index_type>> content(instance->getNumberOfPartitions());
+        for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node) {
+            for (unsigned part : node_to_partitions_assignment[node]) {
                 content[part].push_back(node);
+            }
+        }
 
         return content;
     }
@@ -108,121 +108,125 @@ class PartitioningWithReplication {
     commw_type computeCutNetCost() const;
 
     bool satisfiesBalanceConstraint() const;
-
 };
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 typename hypergraph_t::vertex_comm_weight_type PartitioningWithReplication<hypergraph_t>::computeConnectivityCost() const {
-
     // naive implementation. in the worst-case this is exponential in the number of parts
-    if(instance->getNumberOfPartitions() > 16)
+    if (instance->getNumberOfPartitions() > 16) {
         throw std::invalid_argument("Computing connectivity cost is not supported for more than 16 partitions.");
+    }
 
     commw_type total = 0;
     std::vector<bool> part_used(instance->getNumberOfPartitions(), false);
-    for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx)
-    {
+    for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) {
         const std::vector<index_type> &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx);
-        if(hyperedge.empty())
+        if (hyperedge.empty()) {
             continue;
+        }
 
         unsigned long mask = 0UL;
 
         std::vector<index_type> nr_nodes_covered_by_part(instance->getNumberOfPartitions(), 0);
-        for(const index_type& node : hyperedge)
-            if(node_to_partitions_assignment[node].size() == 1)
+        for (const index_type &node : hyperedge) {
+            if (node_to_partitions_assignment[node].size() == 1) {
                 mask = mask | (1UL << node_to_partitions_assignment[node].front());
+            }
+        }
 
         unsigned min_parts_to_cover = instance->getNumberOfPartitions();
         unsigned long mask_limit = 1UL << instance->getNumberOfPartitions();
-        for(unsigned long subset_mask = 1UL; subset_mask < mask_limit; ++subset_mask)
-        {
-            if((subset_mask & mask)!= mask)
+        for (unsigned long subset_mask = 1UL; subset_mask < mask_limit; ++subset_mask) {
+            if ((subset_mask & mask) != mask) {
                 continue;
-            
+            }
+
             unsigned nr_parts_used = 0;
-            for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part)
-            {
+            for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) {
                 part_used[part] = (((1UL << part) & subset_mask) > 0);
                 nr_parts_used += static_cast<unsigned>(part_used[part]);
             }
-            
+
             bool all_nodes_covered = true;
-            for(const index_type& node : hyperedge)
-            {
-                bool node_covered=false;
-                for(unsigned part : node_to_partitions_assignment[node])
-                    if(part_used[part])
-                    {
+            for (const index_type &node : hyperedge) {
+                bool node_covered = false;
+                for (unsigned part : node_to_partitions_assignment[node]) {
+                    if (part_used[part]) {
                         node_covered = true;
                         break;
                     }
-                if(!node_covered)
-                {
+                }
+                if (!node_covered) {
                     all_nodes_covered = false;
                     break;
                 }
             }
-            if(all_nodes_covered)
+            if (all_nodes_covered) {
                 min_parts_to_cover = std::min(min_parts_to_cover, nr_parts_used);
+            }
         }
- 
-        total += static_cast<commw_type>(min_parts_to_cover-1) * instance->getHypergraph().get_hyperedge_weight(edge_idx);
+
+        total += static_cast<commw_type>(min_parts_to_cover - 1) * instance->getHypergraph().get_hyperedge_weight(edge_idx);
     }
 
     return total;
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 typename hypergraph_t::vertex_comm_weight_type PartitioningWithReplication<hypergraph_t>::computeCutNetCost() const {
-
     commw_type total = 0;
-    for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx)
-    {
+    for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) {
         const std::vector<index_type> &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx);
-        if(hyperedge.empty())
+        if (hyperedge.empty()) {
             continue;
+        }
         std::vector<index_type> nr_nodes_covered_by_part(instance->getNumberOfPartitions(), 0);
-        for(const index_type& node : hyperedge)
-            for(unsigned part : node_to_partitions_assignment[node])
+        for (const index_type &node : hyperedge) {
+            for (unsigned part : node_to_partitions_assignment[node]) {
                 ++nr_nodes_covered_by_part[part];
-        
+            }
+        }
+
         bool covers_all = false;
-        for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part)
-            if(nr_nodes_covered_by_part[part] == hyperedge.size())
+        for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) {
+            if (nr_nodes_covered_by_part[part] == hyperedge.size()) {
                 covers_all = true;
-        
-        if(!covers_all)
+            }
+        }
+
+        if (!covers_all) {
             total += instance->getHypergraph().get_hyperedge_weight(edge_idx);
+        }
     }
 
     return total;
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 bool PartitioningWithReplication<hypergraph_t>::satisfiesBalanceConstraint() const {
     std::vector<workw_type> work_weight(instance->getNumberOfPartitions(), 0);
     std::vector<memw_type> memory_weight(instance->getNumberOfPartitions(), 0);
-    for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node)
-        for(unsigned part : node_to_partitions_assignment[node]){
-            if (part > instance->getNumberOfPartitions())
+    for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node) {
+        for (unsigned part : node_to_partitions_assignment[node]) {
+            if (part > instance->getNumberOfPartitions()) {
                 throw std::invalid_argument("Invalid Argument while checking balance constraint: partition ID out of range.");
-            else
-            {
+            } else {
                 work_weight[part] += instance->getHypergraph().get_vertex_work_weight(node);
                 memory_weight[part] += instance->getHypergraph().get_vertex_memory_weight(node);
             }
         }
+    }
 
-    for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part)
-    {
-        if(work_weight[part] > instance->getMaxWorkWeightPerPartition())
+    for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) {
+        if (work_weight[part] > instance->getMaxWorkWeightPerPartition()) {
             return false;
-        if(memory_weight[part] > instance->getMaxMemoryWeightPerPartition())
+        }
+        if (memory_weight[part] > instance->getMaxMemoryWeightPerPartition()) {
             return false;
+        }
     }
 
     return true;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/partitioning/partitioners/generic_FM.hpp b/include/osp/partitioning/partitioners/generic_FM.hpp
index a7df5bec..c17c90f4 100644
--- a/include/osp/partitioning/partitioners/generic_FM.hpp
+++ b/include/osp/partitioning/partitioners/generic_FM.hpp
@@ -13,27 +13,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
 
-#include "osp/partitioning/model/partitioning.hpp"
-#include <cmath>
 #include <algorithm>
+#include <cmath>
 
-namespace osp{
+#include "osp/partitioning/model/partitioning.hpp"
 
-template<typename hypergraph_t>
-class GenericFM {
+namespace osp {
 
+template <typename hypergraph_t>
+class GenericFM {
     using index_type = typename hypergraph_t::vertex_idx;
     using workw_type = typename hypergraph_t::vertex_work_weight_type;
     using memw_type = typename hypergraph_t::vertex_mem_weight_type;
     using commw_type = typename hypergraph_t::vertex_comm_weight_type;
 
-
-
   protected:
     unsigned max_number_of_passes = 10;
     index_type max_nodes_in_part = 0;
@@ -42,46 +40,48 @@ class GenericFM {
     std::vector<index_type> getMaxNodesOnLevel(index_type nr_nodes, unsigned nr_parts) const;
 
   public:
+    void ImprovePartitioning(Partitioning<hypergraph_t> &partition);
 
-    void ImprovePartitioning(Partitioning<hypergraph_t>& partition);
-
-    void RecursiveFM(Partitioning<hypergraph_t>& partition);
+    void RecursiveFM(Partitioning<hypergraph_t> &partition);
 
     inline unsigned getMaxNumberOfPasses() const { return max_number_of_passes; }
+
     inline void setMaxNumberOfPasses(unsigned passes_) { max_number_of_passes = passes_; }
+
     inline index_type getMaxNodesInPart() const { return max_nodes_in_part; }
+
     inline void setMaxNodesInPart(index_type max_nodes_) { max_nodes_in_part = max_nodes_; }
 };
 
-template<typename hypergraph_t>
-void GenericFM<hypergraph_t>::ImprovePartitioning(Partitioning<hypergraph_t>& partition)
-{
+template <typename hypergraph_t>
+void GenericFM<hypergraph_t>::ImprovePartitioning(Partitioning<hypergraph_t> &partition) {
     // Note: this algorithm disregards hyperedge weights, in order to keep the size of the gain bucket array bounded!
 
-    if(partition.getInstance().getNumberOfPartitions() != 2)
-    {
+    if (partition.getInstance().getNumberOfPartitions() != 2) {
         std::cout << "Error: FM can only be used for 2 partitions." << std::endl;
         return;
     }
-    
-    if(!partition.satisfiesBalanceConstraint())
-    {
+
+    if (!partition.satisfiesBalanceConstraint()) {
         std::cout << "Error: initial partition to FM does not satisfy balance constraint." << std::endl;
         return;
     }
 
-    const Hypergraph<index_type, workw_type, memw_type, commw_type>& Hgraph = partition.getInstance().getHypergraph();
+    const Hypergraph<index_type, workw_type, memw_type, commw_type> &Hgraph = partition.getInstance().getHypergraph();
 
     index_type max_degree = 0;
-    for(index_type node = 0; node < Hgraph.num_vertices(); ++node)
+    for (index_type node = 0; node < Hgraph.num_vertices(); ++node) {
         max_degree = std::max(max_degree, static_cast<index_type>(Hgraph.get_incident_hyperedges(node).size()));
+    }
 
-    if(max_nodes_in_part == 0) // if not initialized
-        max_nodes_in_part = static_cast<index_type>(ceil(static_cast<double>(Hgraph.num_vertices()) * static_cast<double>(partition.getInstance().getMaxWorkWeightPerPartition())
-                                         / static_cast<double>(compute_total_vertex_work_weight(Hgraph)) ));
+    if (max_nodes_in_part == 0) {    // if not initialized
+        max_nodes_in_part
+            = static_cast<index_type>(ceil(static_cast<double>(Hgraph.num_vertices())
+                                           * static_cast<double>(partition.getInstance().getMaxWorkWeightPerPartition())
+                                           / static_cast<double>(compute_total_vertex_work_weight(Hgraph))));
+    }
 
-    for(unsigned pass_idx = 0; pass_idx < max_number_of_passes; ++pass_idx)
-    {
+    for (unsigned pass_idx = 0; pass_idx < max_number_of_passes; ++pass_idx) {
         std::vector<unsigned> node_to_new_part = partition.assignedPartitions();
         std::vector<bool> locked(Hgraph.num_vertices(), false);
         std::vector<int> gain(Hgraph.num_vertices(), 0);
@@ -89,53 +89,57 @@ void GenericFM<hypergraph_t>::ImprovePartitioning(Partitioning<hypergraph_t>& pa
         int cost = 0;
 
         index_type left_side = 0;
-        for(index_type node = 0; node < Hgraph.num_vertices(); ++node)
-            if(partition.assignedPartition(node) == 0)
+        for (index_type node = 0; node < Hgraph.num_vertices(); ++node) {
+            if (partition.assignedPartition(node) == 0) {
                 ++left_side;
+            }
+        }
 
-        if(left_side > max_nodes_in_part || Hgraph.num_vertices() - left_side > max_nodes_in_part)
-        {
-            if(pass_idx == 0)
-            {
-                std::cout<<"Error: initial partitioning of FM is not balanced."<<std::endl;
+        if (left_side > max_nodes_in_part || Hgraph.num_vertices() - left_side > max_nodes_in_part) {
+            if (pass_idx == 0) {
+                std::cout << "Error: initial partitioning of FM is not balanced." << std::endl;
                 return;
-            }
-            else
-            {
-                std::cout<<"Error during FM: partitionming somehow became imbalanced."<<std::endl;
+            } else {
+                std::cout << "Error during FM: partitionming somehow became imbalanced." << std::endl;
                 return;
             }
         }
 
         // Initialize gain values
-        for(index_type hyperedge = 0; hyperedge < Hgraph.num_hyperedges(); ++hyperedge)
-        {
-            for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge))
+        for (index_type hyperedge = 0; hyperedge < Hgraph.num_hyperedges(); ++hyperedge) {
+            for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) {
                 ++nr_nodes_in_hyperedge_on_side[hyperedge][partition.assignedPartition(node)];
+            }
 
-            if(Hgraph.get_vertices_in_hyperedge(hyperedge).size() < 2)
+            if (Hgraph.get_vertices_in_hyperedge(hyperedge).size() < 2) {
                 continue;
-            
-            for(unsigned part = 0; part < 2; ++part)
-            {
-                if(nr_nodes_in_hyperedge_on_side[hyperedge][part] == 1)
-                    for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge))
-                        if(partition.assignedPartition(node) == part)
+            }
+
+            for (unsigned part = 0; part < 2; ++part) {
+                if (nr_nodes_in_hyperedge_on_side[hyperedge][part] == 1) {
+                    for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) {
+                        if (partition.assignedPartition(node) == part) {
                             ++gain[node];
+                        }
+                    }
+                }
 
-                if(nr_nodes_in_hyperedge_on_side[hyperedge][part] == 0)
-                    for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge))
-                        if(partition.assignedPartition(node) != part)
+                if (nr_nodes_in_hyperedge_on_side[hyperedge][part] == 0) {
+                    for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) {
+                        if (partition.assignedPartition(node) != part) {
                             --gain[node];
+                        }
+                    }
+                }
             }
         }
 
         // build gain bucket array
-        std::vector<int> max_gain(2, -static_cast<int>(max_degree)-1);
-        std::vector<std::vector<std::vector<index_type> > > gain_bucket_array(2, std::vector<std::vector<index_type> >(2*max_degree+1));
-        for(index_type node = 0; node < Hgraph.num_vertices(); ++node)
-        {
-            const unsigned& part = partition.assignedPartition(node);
+        std::vector<int> max_gain(2, -static_cast<int>(max_degree) - 1);
+        std::vector<std::vector<std::vector<index_type> > > gain_bucket_array(
+            2, std::vector<std::vector<index_type> >(2 * max_degree + 1));
+        for (index_type node = 0; node < Hgraph.num_vertices(); ++node) {
+            const unsigned &part = partition.assignedPartition(node);
             gain_bucket_array[part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))].push_back(node);
             max_gain[part] = std::max(max_gain[part], gain[node]);
         }
@@ -145,27 +149,26 @@ void GenericFM<hypergraph_t>::ImprovePartitioning(Partitioning<hypergraph_t>& pa
         std::vector<index_type> moved_nodes;
 
         // the pass itself: make moves
-        while(moved_nodes.size() < Hgraph.num_vertices())
-        {
+        while (moved_nodes.size() < Hgraph.num_vertices()) {
             // select move
             index_type to_move = std::numeric_limits<index_type>::max();
             unsigned chosen_part = std::numeric_limits<unsigned>::max();
 
             unsigned gain_index = static_cast<unsigned>(std::max(max_gain[0], max_gain[1]) + static_cast<int>(max_degree));
-            while(gain_index < std::numeric_limits<unsigned>::max())
-            {
-                bool can_choose_left = (Hgraph.num_vertices() - left_side < max_nodes_in_part) && !gain_bucket_array[0][gain_index].empty();
+            while (gain_index < std::numeric_limits<unsigned>::max()) {
+                bool can_choose_left = (Hgraph.num_vertices() - left_side < max_nodes_in_part)
+                                       && !gain_bucket_array[0][gain_index].empty();
                 bool can_choose_right = (left_side < max_nodes_in_part) && !gain_bucket_array[1][gain_index].empty();
 
-                if(can_choose_left && can_choose_right)
+                if (can_choose_left && can_choose_right) {
                     chosen_part = (left_side >= Hgraph.num_vertices() / 2) ? 1 : 0;
-                else if(can_choose_left)
+                } else if (can_choose_left) {
                     chosen_part = 0;
-                else if(can_choose_right)
+                } else if (can_choose_right) {
                     chosen_part = 1;
+                }
 
-                if(chosen_part < 2)
-                {
+                if (chosen_part < 2) {
                     to_move = gain_bucket_array[chosen_part][gain_index].back();
                     gain_bucket_array[chosen_part][gain_index].pop_back();
                     break;
@@ -173,84 +176,82 @@ void GenericFM<hypergraph_t>::ImprovePartitioning(Partitioning<hypergraph_t>& pa
                 --gain_index;
             }
 
-            if(to_move == std::numeric_limits<index_type>::max())
+            if (to_move == std::numeric_limits<index_type>::max()) {
                 break;
-            
+            }
+
             // make move
 
             moved_nodes.push_back(to_move);
             cost -= gain[to_move];
-            if(cost < best_cost)
-            {
+            if (cost < best_cost) {
                 best_cost = cost;
                 best_index = static_cast<index_type>(moved_nodes.size()) + 1;
             }
             locked[to_move] = true;
             node_to_new_part[to_move] = 1 - node_to_new_part[to_move];
 
-            if(chosen_part == 0)
+            if (chosen_part == 0) {
                 --left_side;
-            else
+            } else {
                 ++left_side;
+            }
 
-            unsigned other_part = 1-chosen_part;
+            unsigned other_part = 1 - chosen_part;
 
             // update gain values
-            for(index_type hyperedge : Hgraph.get_incident_hyperedges(to_move))
-            {
-                if(nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 1)
-                {
-                    for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge))
-                    {
-                        if(locked[node])
+            for (index_type hyperedge : Hgraph.get_incident_hyperedges(to_move)) {
+                if (nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 1) {
+                    for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) {
+                        if (locked[node]) {
                             continue;
+                        }
 
-                        std::vector<index_type>& vec = gain_bucket_array[other_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))];
+                        std::vector<index_type> &vec
+                            = gain_bucket_array[other_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))];
                         vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end());
                         --gain[node];
-                        gain_bucket_array[other_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))].push_back(node);
+                        gain_bucket_array[other_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))].push_back(
+                            node);
                     }
-                }
-                else if(nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 2)
-                {
-                    for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge))
-                    {
-                        if(node_to_new_part[node] == chosen_part && !locked[node])
-                        {
-                            std::vector<index_type>& vec = gain_bucket_array[chosen_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))];
+                } else if (nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 2) {
+                    for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) {
+                        if (node_to_new_part[node] == chosen_part && !locked[node]) {
+                            std::vector<index_type> &vec
+                                = gain_bucket_array[chosen_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))];
                             vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end());
                             ++gain[node];
-                            gain_bucket_array[chosen_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))].push_back(node);
+                            gain_bucket_array[chosen_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))]
+                                .push_back(node);
                             max_gain[chosen_part] = std::max(max_gain[chosen_part], gain[node]);
                             break;
                         }
                     }
                 }
-                if(nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 1)
-                {
-                    for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge))
-                    {
-                        if(node_to_new_part[node] == other_part && !locked[node])
-                        {
-                            std::vector<index_type>& vec = gain_bucket_array[other_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))];
+                if (nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 1) {
+                    for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) {
+                        if (node_to_new_part[node] == other_part && !locked[node]) {
+                            std::vector<index_type> &vec
+                                = gain_bucket_array[other_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))];
                             vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end());
                             --gain[node];
-                            gain_bucket_array[other_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))].push_back(node);
+                            gain_bucket_array[other_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))].push_back(
+                                node);
                             break;
                         }
                     }
-                }
-                else if(nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 0)
-                {
-                    for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge))
-                    {
-                        if(locked[node])
+                } else if (nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 0) {
+                    for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) {
+                        if (locked[node]) {
                             continue;
+                        }
 
-                        std::vector<index_type>& vec = gain_bucket_array[chosen_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))];
+                        std::vector<index_type> &vec
+                            = gain_bucket_array[chosen_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))];
                         vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end());
                         ++gain[node];
-                        gain_bucket_array[chosen_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))].push_back(node);
+                        gain_bucket_array[chosen_part][static_cast<unsigned>(gain[node] + static_cast<int>(max_degree))].push_back(
+                            node);
                         max_gain[chosen_part] = std::max(max_gain[chosen_part], gain[node]);
                     }
                 }
@@ -260,39 +261,41 @@ void GenericFM<hypergraph_t>::ImprovePartitioning(Partitioning<hypergraph_t>& pa
         }
 
         // apply best configuration seen
-        if(best_index == 0)
+        if (best_index == 0) {
             break;
+        }
 
-        for(index_type node_idx = 0; node_idx < best_index && node_idx < static_cast<index_type>(moved_nodes.size()); ++node_idx)
-            partition.setAssignedPartition(moved_nodes[node_idx], 1U-partition.assignedPartition(moved_nodes[node_idx]));
-
+        for (index_type node_idx = 0; node_idx < best_index && node_idx < static_cast<index_type>(moved_nodes.size()); ++node_idx) {
+            partition.setAssignedPartition(moved_nodes[node_idx], 1U - partition.assignedPartition(moved_nodes[node_idx]));
+        }
     }
 }
 
-template<typename hypergraph_t>
-void GenericFM<hypergraph_t>::RecursiveFM(Partitioning<hypergraph_t>& partition)
-{
-    const unsigned& nr_parts = partition.getInstance().getNumberOfPartitions();
-    const index_type& nr_nodes = partition.getInstance().getHypergraph().num_vertices();
+template <typename hypergraph_t>
+void GenericFM<hypergraph_t>::RecursiveFM(Partitioning<hypergraph_t> &partition) {
+    const unsigned &nr_parts = partition.getInstance().getNumberOfPartitions();
+    const index_type &nr_nodes = partition.getInstance().getHypergraph().num_vertices();
 
     using Hgraph = Hypergraph<index_type, workw_type, memw_type, commw_type>;
 
     // Note: this is just a simple recursive heuristic for the case when the partitions are a small power of 2
-    if(nr_parts != 4 && nr_parts != 8 && nr_parts != 16 && nr_parts != 32)
-    {
+    if (nr_parts != 4 && nr_parts != 8 && nr_parts != 16 && nr_parts != 32) {
         std::cout << "Error: Recursive FM can only be used for 4, 8, 16 or 32 partitions currently." << std::endl;
         return;
     }
 
-    for(index_type node = 0; node < nr_nodes; ++node)
+    for (index_type node = 0; node < nr_nodes; ++node) {
         partition.setAssignedPartition(node, static_cast<unsigned>(node % 2));
+    }
 
-    if(max_nodes_in_part == 0) // if not initialized
-        max_nodes_in_part = static_cast<index_type>(ceil(static_cast<double>(nr_nodes) * static_cast<double>(partition.getInstance().getMaxWorkWeightPerPartition())
-                                         / static_cast<double>(compute_total_vertex_work_weight(partition.getInstance().getHypergraph())) ));
+    if (max_nodes_in_part == 0) {    // if not initialized
+        max_nodes_in_part = static_cast<index_type>(
+            ceil(static_cast<double>(nr_nodes) * static_cast<double>(partition.getInstance().getMaxWorkWeightPerPartition())
+                 / static_cast<double>(compute_total_vertex_work_weight(partition.getInstance().getHypergraph()))));
+    }
 
     const std::vector<index_type> max_nodes_on_level = getMaxNodesOnLevel(nr_nodes, nr_parts);
-    
+
     unsigned parts = 1;
     unsigned level = 0;
     std::vector<Hgraph> sub_hgraphs({partition.getInstance().getHypergraph()});
@@ -300,42 +303,40 @@ void GenericFM<hypergraph_t>::RecursiveFM(Partitioning<hypergraph_t>& partition)
 
     std::map<index_type, std::pair<unsigned, index_type> > node_to_new_hgraph_and_id;
     std::map<std::pair<unsigned, index_type>, index_type> hgraph_and_id_to_old_idx;
-    for(index_type node = 0; node < nr_nodes; ++node)
-    {
+    for (index_type node = 0; node < nr_nodes; ++node) {
         node_to_new_hgraph_and_id[node] = std::make_pair(0, node);
         hgraph_and_id_to_old_idx[std::make_pair(0, node)] = node;
     }
 
-    while(parts < nr_parts)
-    {
+    while (parts < nr_parts) {
         unsigned end_idx = static_cast<unsigned>(sub_hgraphs.size());
-        for(unsigned sub_hgraph_index = start_index; sub_hgraph_index < end_idx; ++sub_hgraph_index)
-        {
-            const Hgraph& hgraph = sub_hgraphs[sub_hgraph_index];
+        for (unsigned sub_hgraph_index = start_index; sub_hgraph_index < end_idx; ++sub_hgraph_index) {
+            const Hgraph &hgraph = sub_hgraphs[sub_hgraph_index];
             PartitioningProblem instance(hgraph, 2);
             Partitioning sub_partition(instance);
-            for(index_type node = 0; node < hgraph.num_vertices(); ++node)
-                sub_partition.setAssignedPartition(node, node%2);
-            
+            for (index_type node = 0; node < hgraph.num_vertices(); ++node) {
+                sub_partition.setAssignedPartition(node, node % 2);
+            }
+
             GenericFM sub_fm;
             sub_fm.setMaxNodesInPart(max_nodes_on_level[level]);
-            //std::cout<<"Hgraph of size "<<hgraph.num_vertices()<<" split into two parts of at most "<<max_nodes_on_level[level]<<std::endl;
+            // std::cout<<"Hgraph of size "<<hgraph.num_vertices()<<" split into two parts of at most "<<max_nodes_on_level[level]<<std::endl;
             sub_fm.ImprovePartitioning(sub_partition);
 
             std::vector<unsigned> current_idx(2, 0);
             std::vector<std::vector<bool> > part_indicator(2, std::vector<bool>(hgraph.num_vertices(), false));
-            for(index_type node = 0; node < hgraph.num_vertices(); ++node)
-            {
+            for (index_type node = 0; node < hgraph.num_vertices(); ++node) {
                 const unsigned part_id = sub_partition.assignedPartition(node);
                 const index_type original_id = hgraph_and_id_to_old_idx[std::make_pair(sub_hgraph_index, node)];
-                node_to_new_hgraph_and_id[original_id] = std::make_pair(sub_hgraphs.size()+part_id, current_idx[part_id]);
-                hgraph_and_id_to_old_idx[std::make_pair(sub_hgraphs.size()+part_id, current_idx[part_id])] = original_id;
+                node_to_new_hgraph_and_id[original_id] = std::make_pair(sub_hgraphs.size() + part_id, current_idx[part_id]);
+                hgraph_and_id_to_old_idx[std::make_pair(sub_hgraphs.size() + part_id, current_idx[part_id])] = original_id;
                 ++current_idx[part_id];
                 part_indicator[part_id][node] = true;
             }
 
-            for(unsigned part = 0; part < 2; ++part)
+            for (unsigned part = 0; part < 2; ++part) {
                 sub_hgraphs.push_back(create_induced_hypergraph(sub_hgraphs[sub_hgraph_index], part_indicator[part]));
+            }
 
             ++start_index;
         }
@@ -343,33 +344,35 @@ void GenericFM<hypergraph_t>::RecursiveFM(Partitioning<hypergraph_t>& partition)
         parts *= 2;
         ++level;
     }
-    
-    for(index_type node = 0; node < nr_nodes; ++node)
-        partition.setAssignedPartition(node, node_to_new_hgraph_and_id[node].first - (static_cast<unsigned>(sub_hgraphs.size())-nr_parts));    
-    
+
+    for (index_type node = 0; node < nr_nodes; ++node) {
+        partition.setAssignedPartition(
+            node, node_to_new_hgraph_and_id[node].first - (static_cast<unsigned>(sub_hgraphs.size()) - nr_parts));
+    }
 }
 
-template<typename hypergraph_t>
-std::vector<typename hypergraph_t::vertex_idx> GenericFM<hypergraph_t>::getMaxNodesOnLevel(typename hypergraph_t::vertex_idx nr_nodes, unsigned nr_parts) const
-{
+template <typename hypergraph_t>
+std::vector<typename hypergraph_t::vertex_idx> GenericFM<hypergraph_t>::getMaxNodesOnLevel(
+    typename hypergraph_t::vertex_idx nr_nodes, unsigned nr_parts) const {
     std::vector<index_type> max_nodes_on_level;
     std::vector<index_type> limit_per_level({static_cast<index_type>(ceil(static_cast<double>(nr_nodes) / 2.0))});
-    for(unsigned parts = nr_parts / 4; parts > 0; parts /= 2)
+    for (unsigned parts = nr_parts / 4; parts > 0; parts /= 2) {
         limit_per_level.push_back(static_cast<index_type>(ceil(static_cast<double>(limit_per_level.back()) / 2.0)));
+    }
 
     max_nodes_on_level.push_back(max_nodes_in_part);
-    for(unsigned parts = 2; parts < nr_parts; parts *= 2)
-    {
-        index_type next_limit = max_nodes_on_level.back()*2;
-        if(next_limit > limit_per_level.back())
+    for (unsigned parts = 2; parts < nr_parts; parts *= 2) {
+        index_type next_limit = max_nodes_on_level.back() * 2;
+        if (next_limit > limit_per_level.back()) {
             --next_limit;
-        
+        }
+
         limit_per_level.pop_back();
         max_nodes_on_level.push_back(next_limit);
     }
 
-    std::reverse(max_nodes_on_level.begin(),max_nodes_on_level.end());
+    std::reverse(max_nodes_on_level.begin(), max_nodes_on_level.end());
     return max_nodes_on_level;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/partitioning/partitioners/partitioning_ILP.hpp b/include/osp/partitioning/partitioners/partitioning_ILP.hpp
index 2e6c4e0e..5623d581 100644
--- a/include/osp/partitioning/partitioners/partitioning_ILP.hpp
+++ b/include/osp/partitioning/partitioners/partitioning_ILP.hpp
@@ -27,9 +27,8 @@ limitations under the License.
 
 namespace osp {
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 class HypergraphPartitioningILP : public HypergraphPartitioningILPBase<hypergraph_t> {
-
   protected:
     std::vector<unsigned> readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model &model);
 
@@ -45,7 +44,7 @@ class HypergraphPartitioningILP : public HypergraphPartitioningILPBase<hypergrap
     virtual std::string getAlgorithmName() const override { return "HypergraphPartitioningILP"; }
 };
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Partitioning<hypergraph_t> &result) {
     Envr env;
     Model model = env.CreateModel("HypergraphPart");
@@ -53,24 +52,21 @@ RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Parti
     this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model);
     setupExtraVariablesConstraints(result.getInstance(), model);
 
-    if (this->use_initial_solution)
+    if (this->use_initial_solution) {
         setInitialSolution(result, model);
+    }
 
     this->solveILP(model);
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
         result.setAssignedPartitions(readCoptAssignment(result.getInstance(), model));
         return RETURN_STATUS::OSP_SUCCESS;
 
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
         return RETURN_STATUS::ERROR;
 
     } else {
-
         if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
             result.setAssignedPartitions(readCoptAssignment(result.getInstance(), model));
             return RETURN_STATUS::OSP_SUCCESS;
 
@@ -80,9 +76,9 @@ RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Parti
     }
 }
 
-template<typename hypergraph_t>
-void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model &model) {
-
+template <typename hypergraph_t>
+void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance,
+                                                                             Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
     const index_type numberOfParts = instance.getNumberOfPartitions();
@@ -92,52 +88,61 @@ void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(con
 
     // each node assigned to exactly one partition
     for (index_type node = 0; node < numberOfVertices; node++) {
-
         Expr expr;
-        for (unsigned part = 0; part < numberOfParts; part++)
+        for (unsigned part = 0; part < numberOfParts; part++) {
             expr += this->node_in_partition[node][static_cast<int>(part)];
+        }
 
         model.AddConstr(expr == 1);
     }
 
     // hyperedge indicators match node variables
-    for (unsigned part = 0; part < numberOfParts; part++)
-        for (index_type node = 0; node < numberOfVertices; node++)
-            for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
-                model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part)] >= this->node_in_partition[node][static_cast<int>(part)]);
+    for (unsigned part = 0; part < numberOfParts; part++) {
+        for (index_type node = 0; node < numberOfVertices; node++) {
+            for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) {
+                model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part)]
+                                >= this->node_in_partition[node][static_cast<int>(part)]);
+            }
+        }
+    }
 }
 
 // convert generic one-to-many assingment (of base class function) to one-to-one
-template<typename hypergraph_t>
-std::vector<unsigned> HypergraphPartitioningILP<hypergraph_t>::readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model &model) {
+template <typename hypergraph_t>
+std::vector<unsigned> HypergraphPartitioningILP<hypergraph_t>::readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance,
+                                                                                  Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
     std::vector<unsigned> node_to_partition(instance.getHypergraph().num_vertices(), std::numeric_limits<unsigned>::max());
     std::vector<std::vector<unsigned>> assignmentsGenericForm = this->readAllCoptAssignments(instance, model);
 
-    for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++)
+    for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) {
         node_to_partition[node] = assignmentsGenericForm[node].front();
+    }
 
     return node_to_partition;
 }
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 void HypergraphPartitioningILP<hypergraph_t>::setInitialSolution(const Partitioning<hypergraph_t> &partition, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
     const std::vector<unsigned> &assignment = partition.assignedPartitions();
     const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions();
-    if (assignment.size() != partition.getInstance().getHypergraph().num_vertices())
+    if (assignment.size() != partition.getInstance().getHypergraph().num_vertices()) {
         return;
+    }
 
     for (index_type node = 0; node < assignment.size(); ++node) {
-        if (assignment[node] >= numPartitions)
+        if (assignment[node] >= numPartitions) {
             continue;
+        }
 
-        for (unsigned part = 0; part < numPartitions; ++part)
+        for (unsigned part = 0; part < numPartitions; ++part) {
             model.SetMipStart(this->node_in_partition[node][static_cast<int>(part)], static_cast<int>(assignment[node] == part));
+        }
     }
     model.LoadMipStart();
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp b/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp
index b5df2fb4..23e12d5b 100644
--- a/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp
+++ b/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
@@ -21,14 +21,13 @@ limitations under the License.
 #include <callbackbase.h>
 #include <coptcpp_pch.h>
 
+#include "osp/bsp/model/BspInstance.hpp"    // for return statuses (stati?)
 #include "osp/partitioning/model/partitioning_problem.hpp"
-#include "osp/bsp/model/BspInstance.hpp" // for return statuses (stati?)
 
-namespace osp{
+namespace osp {
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 class HypergraphPartitioningILPBase {
-
   protected:
     std::vector<VarArray> node_in_partition;
     std::vector<VarArray> hyperedge_uses_partition;
@@ -36,26 +35,26 @@ class HypergraphPartitioningILPBase {
     unsigned time_limit_seconds = 3600;
     bool use_initial_solution = false;
 
-    std::vector<std::vector<unsigned> > readAllCoptAssignments(const PartitioningProblem<hypergraph_t> &instance, Model& model);
+    std::vector<std::vector<unsigned> > readAllCoptAssignments(const PartitioningProblem<hypergraph_t> &instance, Model &model);
 
-    void setupFundamentalVariablesConstraintsObjective(const PartitioningProblem<hypergraph_t> &instance, Model& model);
+    void setupFundamentalVariablesConstraintsObjective(const PartitioningProblem<hypergraph_t> &instance, Model &model);
 
-    void solveILP(Model& model);
+    void solveILP(Model &model);
 
   public:
-
     virtual std::string getAlgorithmName() const = 0;
 
     inline unsigned getTimeLimitSeconds() const { return time_limit_seconds; }
+
     inline void setTimeLimitSeconds(unsigned limit_) { time_limit_seconds = limit_; }
+
     inline void setUseInitialSolution(bool use_) { use_initial_solution = use_; }
 
     virtual ~HypergraphPartitioningILPBase() = default;
 };
 
-template<typename hypergraph_t>
-void HypergraphPartitioningILPBase<hypergraph_t>::solveILP(Model& model) {
-
+template <typename hypergraph_t>
+void HypergraphPartitioningILPBase<hypergraph_t>::solveILP(Model &model) {
     model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0);
 
     model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds);
@@ -74,9 +73,9 @@ void HypergraphPartitioningILPBase<hypergraph_t>::solveILP(Model& model) {
     model.Solve();
 }
 
-template<typename hypergraph_t>
-void HypergraphPartitioningILPBase<hypergraph_t>::setupFundamentalVariablesConstraintsObjective(const PartitioningProblem<hypergraph_t> &instance, Model& model) {
-
+template <typename hypergraph_t>
+void HypergraphPartitioningILPBase<hypergraph_t>::setupFundamentalVariablesConstraintsObjective(
+    const PartitioningProblem<hypergraph_t> &instance, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
     using workw_type = typename hypergraph_t::vertex_work_weight_type;
     using memw_type = typename hypergraph_t::vertex_mem_weight_type;
@@ -89,90 +88,93 @@ void HypergraphPartitioningILPBase<hypergraph_t>::setupFundamentalVariablesConst
 
     node_in_partition = std::vector<VarArray>(numberOfVertices);
 
-    for (index_type node = 0; node < numberOfVertices; node++)
+    for (index_type node = 0; node < numberOfVertices; node++) {
         node_in_partition[node] = model.AddVars(static_cast<int>(numberOfParts), COPT_BINARY, "node_in_partition");
+    }
 
     hyperedge_uses_partition = std::vector<VarArray>(numberOfHyperedges);
 
-    for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++)
-        hyperedge_uses_partition[hyperedge] = model.AddVars(static_cast<int>(numberOfParts), COPT_BINARY, "hyperedge_uses_partition");
-    
+    for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) {
+        hyperedge_uses_partition[hyperedge]
+            = model.AddVars(static_cast<int>(numberOfParts), COPT_BINARY, "hyperedge_uses_partition");
+    }
+
     // partition size constraints
-    if(instance.getMaxWorkWeightPerPartition() < std::numeric_limits<workw_type>::max())
-    {
-        for (unsigned part = 0; part < numberOfParts; part++)
-        {
+    if (instance.getMaxWorkWeightPerPartition() < std::numeric_limits<workw_type>::max()) {
+        for (unsigned part = 0; part < numberOfParts; part++) {
             Expr expr;
-            for (index_type node = 0; node < numberOfVertices; node++)
+            for (index_type node = 0; node < numberOfVertices; node++) {
                 expr += instance.getHypergraph().get_vertex_work_weight(node) * node_in_partition[node][static_cast<int>(part)];
+            }
 
             model.AddConstr(expr <= instance.getMaxWorkWeightPerPartition());
         }
-    }    
-    if(instance.getMaxMemoryWeightPerPartition() < std::numeric_limits<memw_type>::max())
-    {
-        for (unsigned part = 0; part < numberOfParts; part++)
-        {
+    }
+    if (instance.getMaxMemoryWeightPerPartition() < std::numeric_limits<memw_type>::max()) {
+        for (unsigned part = 0; part < numberOfParts; part++) {
             Expr expr;
-            for (index_type node = 0; node < numberOfVertices; node++)
+            for (index_type node = 0; node < numberOfVertices; node++) {
                 expr += instance.getHypergraph().get_vertex_memory_weight(node) * node_in_partition[node][static_cast<int>(part)];
+            }
 
             model.AddConstr(expr <= instance.getMaxMemoryWeightPerPartition());
         }
-    } 
+    }
 
     // set objective
     Expr expr;
-    for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++)
-    {
+    for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) {
         expr -= instance.getHypergraph().get_hyperedge_weight(hyperedge);
-        for (unsigned part = 0; part < numberOfParts; part++)
-            expr += instance.getHypergraph().get_hyperedge_weight(hyperedge) * hyperedge_uses_partition[hyperedge][static_cast<int>(part)];
+        for (unsigned part = 0; part < numberOfParts; part++) {
+            expr += instance.getHypergraph().get_hyperedge_weight(hyperedge)
+                    * hyperedge_uses_partition[hyperedge][static_cast<int>(part)];
+        }
     }
 
     model.SetObjective(expr, COPT_MINIMIZE);
-             
 }
 
-template<typename hypergraph_t>
-std::vector<std::vector<unsigned> > HypergraphPartitioningILPBase<hypergraph_t>::readAllCoptAssignments(const PartitioningProblem<hypergraph_t> &instance, Model& model)
-{
+template <typename hypergraph_t>
+std::vector<std::vector<unsigned> > HypergraphPartitioningILPBase<hypergraph_t>::readAllCoptAssignments(
+    const PartitioningProblem<hypergraph_t> &instance, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
-
     std::vector<std::vector<unsigned> > node_to_partitions(instance.getHypergraph().num_vertices());
 
     std::set<unsigned> nonempty_partition_ids;
-    for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++)
-        for(unsigned part = 0; part < instance.getNumberOfPartitions(); part++)
-            if(node_in_partition[node][static_cast<int>(part)].Get(COPT_DBLINFO_VALUE) >= .99)
-            {
+    for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) {
+        for (unsigned part = 0; part < instance.getNumberOfPartitions(); part++) {
+            if (node_in_partition[node][static_cast<int>(part)].Get(COPT_DBLINFO_VALUE) >= .99) {
                 node_to_partitions[node].push_back(part);
                 nonempty_partition_ids.insert(part);
             }
+        }
+    }
 
-    for(std::vector<unsigned>& chosen_partitions : node_to_partitions)
-        if(chosen_partitions.empty())
-        {
-            std::cout<<"Error: partitioning returned by ILP seems incomplete!"<<std::endl;
+    for (std::vector<unsigned> &chosen_partitions : node_to_partitions) {
+        if (chosen_partitions.empty()) {
+            std::cout << "Error: partitioning returned by ILP seems incomplete!" << std::endl;
             chosen_partitions.push_back(std::numeric_limits<unsigned>::max());
         }
-    
+    }
+
     unsigned current_index = 0;
     std::map<unsigned, unsigned> new_part_index;
-    for(unsigned part_index : nonempty_partition_ids)
-    {
+    for (unsigned part_index : nonempty_partition_ids) {
         new_part_index[part_index] = current_index;
         ++current_index;
     }
 
-    for(index_type node = 0; node < instance.getHypergraph().num_vertices(); node++)
-        for(unsigned entry_idx = 0; entry_idx < node_to_partitions[node].size(); ++entry_idx)
+    for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) {
+        for (unsigned entry_idx = 0; entry_idx < node_to_partitions[node].size(); ++entry_idx) {
             node_to_partitions[node][entry_idx] = new_part_index[node_to_partitions[node][entry_idx]];
+        }
+    }
 
-    std::cout<<"Hypergraph partitioning ILP best solution value: "<<model.GetDblAttr(COPT_DBLATTR_BESTOBJ)<<", best lower bound: "<<model.GetDblAttr(COPT_DBLATTR_BESTBND)<<std::endl;
+    std::cout << "Hypergraph partitioning ILP best solution value: " << model.GetDblAttr(COPT_DBLATTR_BESTOBJ)
+              << ", best lower bound: " << model.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl;
 
     return node_to_partitions;
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/partitioning/partitioners/partitioning_ILP_replication.hpp b/include/osp/partitioning/partitioners/partitioning_ILP_replication.hpp
index d1e9d5c8..5378ff1a 100644
--- a/include/osp/partitioning/partitioners/partitioning_ILP_replication.hpp
+++ b/include/osp/partitioning/partitioners/partitioning_ILP_replication.hpp
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
@@ -21,62 +21,57 @@ limitations under the License.
 #include <callbackbase.h>
 #include <coptcpp_pch.h>
 
-#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp"
 #include "osp/partitioning/model/partitioning_replication.hpp"
+#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp"
 
-namespace osp{
+namespace osp {
 
-template<typename hypergraph_t>
+template <typename hypergraph_t>
 class HypergraphPartitioningILPWithReplication : public HypergraphPartitioningILPBase<hypergraph_t> {
-
   public:
     enum class REPLICATION_MODEL_IN_ILP { ONLY_TWICE, GENERAL };
 
   protected:
-    void setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model& model);
+    void setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model &model);
 
-    void setInitialSolution(const PartitioningWithReplication<hypergraph_t> &partition, Model& model);
+    void setInitialSolution(const PartitioningWithReplication<hypergraph_t> &partition, Model &model);
 
     REPLICATION_MODEL_IN_ILP replication_model = REPLICATION_MODEL_IN_ILP::ONLY_TWICE;
 
   public:
-
     virtual ~HypergraphPartitioningILPWithReplication() override = default;
 
-    RETURN_STATUS computePartitioning(PartitioningWithReplication<hypergraph_t>& result);
+    RETURN_STATUS computePartitioning(PartitioningWithReplication<hypergraph_t> &result);
 
     virtual std::string getAlgorithmName() const override { return "HypergraphPartitioningILPWithReplication"; }
 
     void setReplicationModel(REPLICATION_MODEL_IN_ILP replication_model_) { replication_model = replication_model_; }
 };
 
-template<typename hypergraph_t>
-RETURN_STATUS HypergraphPartitioningILPWithReplication<hypergraph_t>::computePartitioning(PartitioningWithReplication<hypergraph_t>& result)
-{
+template <typename hypergraph_t>
+RETURN_STATUS HypergraphPartitioningILPWithReplication<hypergraph_t>::computePartitioning(
+    PartitioningWithReplication<hypergraph_t> &result) {
     Envr env;
     Model model = env.CreateModel("HypergraphPartRepl");
 
     this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model);
     setupExtraVariablesConstraints(result.getInstance(), model);
 
-    if(this->use_initial_solution)
+    if (this->use_initial_solution) {
         setInitialSolution(result, model);
+    }
 
     this->solveILP(model);
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
         result.setAssignedPartitionVectors(this->readAllCoptAssignments(result.getInstance(), model));
         return RETURN_STATUS::OSP_SUCCESS;
 
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
         return RETURN_STATUS::ERROR;
 
     } else {
-
         if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
             result.setAssignedPartitionVectors(this->readAllCoptAssignments(result.getInstance(), model));
             return RETURN_STATUS::OSP_SUCCESS;
 
@@ -86,104 +81,126 @@ RETURN_STATUS HypergraphPartitioningILPWithReplication<hypergraph_t>::computePar
     }
 }
 
-template<typename hypergraph_t>
-void HypergraphPartitioningILPWithReplication<hypergraph_t>::setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model& model) {
-
+template <typename hypergraph_t>
+void HypergraphPartitioningILPWithReplication<hypergraph_t>::setupExtraVariablesConstraints(
+    const PartitioningProblem<hypergraph_t> &instance, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
-
     const index_type numberOfParts = instance.getNumberOfPartitions();
     const index_type numberOfVertices = instance.getHypergraph().num_vertices();
 
-    if(replication_model == REPLICATION_MODEL_IN_ILP::GENERAL)
-    {
+    if (replication_model == REPLICATION_MODEL_IN_ILP::GENERAL) {
         // create variables for each pin+partition combination
         std::map<std::pair<index_type, unsigned>, index_type> pin_ID_map;
         index_type nr_of_pins = 0;
-        for (index_type node = 0; node < numberOfVertices; node++)
-            for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
+        for (index_type node = 0; node < numberOfVertices; node++) {
+            for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) {
                 pin_ID_map[std::make_pair(node, hyperedge)] = nr_of_pins++;
-        
+            }
+        }
+
         std::vector<VarArray> pin_covered_by_partition = std::vector<VarArray>(nr_of_pins);
 
-        for (index_type pin = 0; pin < nr_of_pins; pin++)
+        for (index_type pin = 0; pin < nr_of_pins; pin++) {
             pin_covered_by_partition[pin] = model.AddVars(static_cast<int>(numberOfParts), COPT_BINARY, "pin_covered_by_partition");
+        }
 
         //  each pin covered exactly once
         for (index_type pin = 0; pin < nr_of_pins; pin++) {
-
             Expr expr;
-            for (unsigned part = 0; part < numberOfParts; part++)
+            for (unsigned part = 0; part < numberOfParts; part++) {
                 expr += pin_covered_by_partition[pin][static_cast<int>(part)];
+            }
 
             model.AddConstr(expr == 1);
         }
 
         // pin covering requires node assignment
-        for (unsigned part = 0; part < numberOfParts; part++)
-            for (index_type node = 0; node < numberOfVertices; node++)
-                for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
-                    model.AddConstr(this->node_in_partition[node][static_cast<int>(part)] >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast<int>(part)]);
+        for (unsigned part = 0; part < numberOfParts; part++) {
+            for (index_type node = 0; node < numberOfVertices; node++) {
+                for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) {
+                    model.AddConstr(
+                        this->node_in_partition[node][static_cast<int>(part)]
+                        >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast<int>(part)]);
+                }
+            }
+        }
 
         // pin covering requires hyperedge use
-        for (unsigned part = 0; part < numberOfParts; part++)
-            for (index_type node = 0; node < numberOfVertices; node++)
-                for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
-                    model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part)] >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast<int>(part)]);
+        for (unsigned part = 0; part < numberOfParts; part++) {
+            for (index_type node = 0; node < numberOfVertices; node++) {
+                for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) {
+                    model.AddConstr(
+                        this->hyperedge_uses_partition[hyperedge][static_cast<int>(part)]
+                        >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast<int>(part)]);
+                }
+            }
+        }
 
-    }
-    else if(replication_model == REPLICATION_MODEL_IN_ILP::ONLY_TWICE)
-    {
+    } else if (replication_model == REPLICATION_MODEL_IN_ILP::ONLY_TWICE) {
         // each node has one or two copies
         VarArray node_replicated = model.AddVars(static_cast<int>(numberOfVertices), COPT_BINARY, "node_replicated");
-        
-        for (index_type node = 0; node < numberOfVertices; node++) {
 
+        for (index_type node = 0; node < numberOfVertices; node++) {
             Expr expr = -1;
-            for (unsigned part = 0; part < numberOfParts; part++)
+            for (unsigned part = 0; part < numberOfParts; part++) {
                 expr += this->node_in_partition[node][static_cast<int>(part)];
+            }
 
             model.AddConstr(expr == node_replicated[static_cast<int>(node)]);
         }
 
         // hyperedge indicators if node is not replicated
-        for (unsigned part = 0; part < numberOfParts; part++)
-            for (index_type node = 0; node < numberOfVertices; node++)
-                for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
-                    model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part)] >= this->node_in_partition[node][static_cast<int>(part)] - node_replicated[static_cast<int>(node)]);
-        
+        for (unsigned part = 0; part < numberOfParts; part++) {
+            for (index_type node = 0; node < numberOfVertices; node++) {
+                for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) {
+                    model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part)]
+                                    >= this->node_in_partition[node][static_cast<int>(part)]
+                                           - node_replicated[static_cast<int>(node)]);
+                }
+            }
+        }
+
         // hyperedge indicators if node is replicated
-        for (index_type node = 0; node < numberOfVertices; node++)
-            for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
-                for (unsigned part1 = 0; part1 < numberOfParts; part1++)
-                    for (unsigned part2 = part1+1; part2 < numberOfParts; part2++)
-                        model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part1)] + this->hyperedge_uses_partition[hyperedge][static_cast<int>(part2)] >=
-                                        this->node_in_partition[node][static_cast<int>(part1)] + this->node_in_partition[node][static_cast<int>(part2)] - 1);
+        for (index_type node = 0; node < numberOfVertices; node++) {
+            for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) {
+                for (unsigned part1 = 0; part1 < numberOfParts; part1++) {
+                    for (unsigned part2 = part1 + 1; part2 < numberOfParts; part2++) {
+                        model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part1)]
+                                            + this->hyperedge_uses_partition[hyperedge][static_cast<int>(part2)]
+                                        >= this->node_in_partition[node][static_cast<int>(part1)]
+                                               + this->node_in_partition[node][static_cast<int>(part2)] - 1);
+                    }
+                }
+            }
+        }
     }
-             
 }
 
-template<typename hypergraph_t>
-void HypergraphPartitioningILPWithReplication<hypergraph_t>::setInitialSolution(const PartitioningWithReplication<hypergraph_t> &partition,  Model& model)
-{
+template <typename hypergraph_t>
+void HypergraphPartitioningILPWithReplication<hypergraph_t>::setInitialSolution(
+    const PartitioningWithReplication<hypergraph_t> &partition, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
-    const std::vector<std::vector<unsigned> >& assignments = partition.assignedPartitions();
-    const unsigned& numPartitions = partition.getInstance().getNumberOfPartitions();
-    if(assignments.size() != partition.getInstance().getHypergraph().num_vertices())
+    const std::vector<std::vector<unsigned> > &assignments = partition.assignedPartitions();
+    const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions();
+    if (assignments.size() != partition.getInstance().getHypergraph().num_vertices()) {
         return;
+    }
 
-    for(index_type node = 0; node < assignments.size(); ++node)
-    {
+    for (index_type node = 0; node < assignments.size(); ++node) {
         std::vector<bool> assingedToPart(numPartitions, false);
-        for(unsigned part : assignments[node])
-            if(part < numPartitions)
+        for (unsigned part : assignments[node]) {
+            if (part < numPartitions) {
                 assingedToPart[part] = true;
-        
-        for(unsigned part = 0; part < numPartitions; ++part)
+            }
+        }
+
+        for (unsigned part = 0; part < numPartitions; ++part) {
             model.SetMipStart(this->node_in_partition[node][static_cast<int>(part)], static_cast<int>(assingedToPart[part]));
+        }
     }
     model.LoadMipStart();
 }
 
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/pebbling/PebblingSchedule.hpp b/include/osp/pebbling/PebblingSchedule.hpp
index 008e3fa8..440d6254 100644
--- a/include/osp/pebbling/PebblingSchedule.hpp
+++ b/include/osp/pebbling/PebblingSchedule.hpp
@@ -13,17 +13,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
 
+#include <algorithm>
+#include <iostream>
 #include <list>
 #include <map>
 #include <stdexcept>
 #include <vector>
-#include <algorithm>
-#include <iostream>
 
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
@@ -37,27 +37,27 @@ typedef std::tuple<unsigned int, unsigned int, unsigned int> KeyTriple;
  * @brief Represents a schedule for 2-level MultiBSP model with memory constraints.
  *
  * Alternatively, can be understood as the generalization of multiprocessor red-blue pebble game with node weights.
- * The synchronous interpretation is essentially a 2-level Multi-BSP, while the asynchronous interpretation is 
+ * The synchronous interpretation is essentially a 2-level Multi-BSP, while the asynchronous interpretation is
  * closer to makespan metrics in classical schedules.
- * 
- * Besides basic utility such as validity check, cost computation and conversion from a Bsp Schedule, it also allows 
+ *
+ * Besides basic utility such as validity check, cost computation and conversion from a Bsp Schedule, it also allows
  * conversions to/from several MultiProcessorPebbling ILP methods that address this problem.
  *
  * Works with a `BspInstance` object, which represents the instance of the scheduling problem being solved.
  *
  * @see BspInstance
  */
-template<typename Graph_t>
+template <typename Graph_t>
 class PebblingSchedule {
-
-  static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs.");
+    static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs.");
 
   private:
     using vertex_idx = vertex_idx_t<Graph_t>;
     using cost_type = v_workw_t<Graph_t>;
     using memweight_type = v_memw_t<Graph_t>;
 
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>, "PebblingSchedule requires work and comm. weights to have the same type.");
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "PebblingSchedule requires work and comm. weights to have the same type.");
 
     const BspInstance<Graph_t> *instance;
 
@@ -65,92 +65,95 @@ class PebblingSchedule {
 
     bool need_to_load_inputs = true;
 
-    struct compute_step
-    {
-      vertex_idx node;
-      std::vector<vertex_idx> nodes_evicted_after;
+    struct compute_step {
+        vertex_idx node;
+        std::vector<vertex_idx> nodes_evicted_after;
 
-      compute_step() {}
-      compute_step(vertex_idx node_) : node(node_) {}
-      compute_step(vertex_idx node_, const std::vector<vertex_idx>& evicted_) : node(node_), nodes_evicted_after(evicted_) {}
+        compute_step() {}
+
+        compute_step(vertex_idx node_) : node(node_) {}
+
+        compute_step(vertex_idx node_, const std::vector<vertex_idx> &evicted_) : node(node_), nodes_evicted_after(evicted_) {}
     };
 
     // executed nodes in order in a computation phase, for processor p and superstep s
-    std::vector<std::vector<std::vector<compute_step> > > compute_steps_for_proc_superstep;
+    std::vector<std::vector<std::vector<compute_step>>> compute_steps_for_proc_superstep;
 
     // nodes evicted from cache in a given superstep's comm phase
-    std::vector<std::vector<std::vector<vertex_idx> > > nodes_evicted_in_comm;
+    std::vector<std::vector<std::vector<vertex_idx>>> nodes_evicted_in_comm;
 
     // nodes sent down to processor p in superstep s
-    std::vector<std::vector<std::vector<vertex_idx> > > nodes_sent_down;
+    std::vector<std::vector<std::vector<vertex_idx>>> nodes_sent_down;
 
     // nodes sent up from processor p in superstep s
-    std::vector<std::vector<std::vector<vertex_idx> > > nodes_sent_up;
+    std::vector<std::vector<std::vector<vertex_idx>>> nodes_sent_up;
 
     // set of nodes that need to have blue pebble at end, sinks by default, and
     // set of nodes on each processor that begin with red pebble, nothing by default
     // (TODO: maybe move to problem definition classes instead?)
     std::set<vertex_idx> needs_blue_at_end;
-    std::vector<std::set<vertex_idx> > has_red_in_beginning;
+    std::vector<std::set<vertex_idx>> has_red_in_beginning;
 
     // nodes that are from a previous part of a larger DAG, handled differently in conversion
     std::set<vertex_idx> external_sources;
 
   public:
-
-    enum CACHE_EVICTION_STRATEGY
-    {
-        FORESIGHT,
-        LEAST_RECENTLY_USED,
-        LARGEST_ID
-    };
+    enum CACHE_EVICTION_STRATEGY { FORESIGHT, LEAST_RECENTLY_USED, LARGEST_ID };
 
     /**
      * @brief Default constructor for the PebblingSchedule class.
      */
     PebblingSchedule() : instance(nullptr), number_of_supersteps(0) {}
 
-    PebblingSchedule(const BspInstance<Graph_t> &inst) : instance(&inst)
-    {
-      BspSchedule<Graph_t> schedule(inst, std::vector<unsigned int>(inst.numberOfVertices(), 0), std::vector<unsigned int>(inst.numberOfVertices(), 0));
-      ConvertFromBsp(schedule);
+    PebblingSchedule(const BspInstance<Graph_t> &inst) : instance(&inst) {
+        BspSchedule<Graph_t> schedule(
+            inst, std::vector<unsigned int>(inst.numberOfVertices(), 0), std::vector<unsigned int>(inst.numberOfVertices(), 0));
+        ConvertFromBsp(schedule);
     }
 
-    PebblingSchedule(const BspInstance<Graph_t> &inst, const std::vector<unsigned>& processor_assignment_,
-                  const std::vector<unsigned>& superstep_assignment_) : instance(&inst)
-    {
-      BspSchedule<Graph_t> schedule(inst, processor_assignment_, superstep_assignment_);
-      ConvertFromBsp(schedule);
+    PebblingSchedule(const BspInstance<Graph_t> &inst,
+                     const std::vector<unsigned> &processor_assignment_,
+                     const std::vector<unsigned> &superstep_assignment_)
+        : instance(&inst) {
+        BspSchedule<Graph_t> schedule(inst, processor_assignment_, superstep_assignment_);
+        ConvertFromBsp(schedule);
     }
 
     PebblingSchedule(const BspInstance<Graph_t> &inst,
-                   const std::vector<std::vector<std::vector<vertex_idx> > >& compute_steps,
-                   const std::vector<std::vector<std::vector<std::vector<vertex_idx> > > >& nodes_evicted_after_compute,
-                   const std::vector<std::vector<std::vector<vertex_idx> > >& nodes_sent_up_,
-                   const std::vector<std::vector<std::vector<vertex_idx> > >& nodes_sent_down_,
-                   const std::vector<std::vector<std::vector<vertex_idx> > >& nodes_evicted_in_comm_,
-                   const std::set<vertex_idx>& needs_blue_at_end_ = std::set<vertex_idx>(),
-                   const std::vector<std::set<vertex_idx> >& has_red_in_beginning_ = std::vector<std::set<vertex_idx> >(),
-                   const bool need_to_load_inputs_ = false) :
-                   instance(&inst), number_of_supersteps(0),
-                   need_to_load_inputs (need_to_load_inputs_),
-                   nodes_evicted_in_comm(nodes_evicted_in_comm_), nodes_sent_down(nodes_sent_down_), nodes_sent_up(nodes_sent_up_),
-                   needs_blue_at_end(needs_blue_at_end_), has_red_in_beginning(has_red_in_beginning_)
-
-    {
-      compute_steps_for_proc_superstep.resize(compute_steps.size(), std::vector<std::vector<compute_step> >(compute_steps[0].size()));
-      for(unsigned proc = 0; proc < compute_steps.size(); ++proc)
-      {
-        number_of_supersteps = std::max(number_of_supersteps, static_cast<unsigned>(compute_steps[proc].size()));
-        for(unsigned supstep = 0; supstep < static_cast<unsigned>(compute_steps[proc].size()); ++supstep)
-          for(unsigned step_index = 0; step_index < static_cast<unsigned>(compute_steps[proc][supstep].size()); ++step_index)
-            compute_steps_for_proc_superstep[proc][supstep].emplace_back(compute_steps[proc][supstep][step_index],
-                                                                          nodes_evicted_after_compute[proc][supstep][step_index]);
-      }
+                     const std::vector<std::vector<std::vector<vertex_idx>>> &compute_steps,
+                     const std::vector<std::vector<std::vector<std::vector<vertex_idx>>>> &nodes_evicted_after_compute,
+                     const std::vector<std::vector<std::vector<vertex_idx>>> &nodes_sent_up_,
+                     const std::vector<std::vector<std::vector<vertex_idx>>> &nodes_sent_down_,
+                     const std::vector<std::vector<std::vector<vertex_idx>>> &nodes_evicted_in_comm_,
+                     const std::set<vertex_idx> &needs_blue_at_end_ = std::set<vertex_idx>(),
+                     const std::vector<std::set<vertex_idx>> &has_red_in_beginning_ = std::vector<std::set<vertex_idx>>(),
+                     const bool need_to_load_inputs_ = false)
+        : instance(&inst),
+          number_of_supersteps(0),
+          need_to_load_inputs(need_to_load_inputs_),
+          nodes_evicted_in_comm(nodes_evicted_in_comm_),
+          nodes_sent_down(nodes_sent_down_),
+          nodes_sent_up(nodes_sent_up_),
+          needs_blue_at_end(needs_blue_at_end_),
+          has_red_in_beginning(has_red_in_beginning_) {
+        compute_steps_for_proc_superstep.resize(compute_steps.size(),
+                                                std::vector<std::vector<compute_step>>(compute_steps[0].size()));
+        for (unsigned proc = 0; proc < compute_steps.size(); ++proc) {
+            number_of_supersteps = std::max(number_of_supersteps, static_cast<unsigned>(compute_steps[proc].size()));
+            for (unsigned supstep = 0; supstep < static_cast<unsigned>(compute_steps[proc].size()); ++supstep) {
+                for (unsigned step_index = 0; step_index < static_cast<unsigned>(compute_steps[proc][supstep].size());
+                     ++step_index) {
+                    compute_steps_for_proc_superstep[proc][supstep].emplace_back(
+                        compute_steps[proc][supstep][step_index], nodes_evicted_after_compute[proc][supstep][step_index]);
+                }
+            }
+        }
     }
 
     PebblingSchedule(const BspSchedule<Graph_t> &schedule, CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID)
-    : instance(&schedule.getInstance()) { ConvertFromBsp(schedule, evict_rule); }
+        : instance(&schedule.getInstance()) {
+        ConvertFromBsp(schedule, evict_rule);
+    }
 
     virtual ~PebblingSchedule() = default;
 
@@ -164,9 +167,10 @@ class PebblingSchedule {
     // convert from unconstrained schedule
     void ConvertFromBsp(const BspSchedule<Graph_t> &schedule, CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID);
 
-    //auxiliary for conversion
-    std::vector<std::vector<std::vector<vertex_idx> > > computeTopOrdersDFS(const BspSchedule<Graph_t> &schedule) const;
-    static bool hasValidSolution(const BspInstance<Graph_t> &instance, const std::set<vertex_idx>& external_sources = std::set<vertex_idx>());
+    // auxiliary for conversion
+    std::vector<std::vector<std::vector<vertex_idx>>> computeTopOrdersDFS(const BspSchedule<Graph_t> &schedule) const;
+    static bool hasValidSolution(const BspInstance<Graph_t> &instance,
+                                 const std::set<vertex_idx> &external_sources = std::set<vertex_idx>());
     void SplitSupersteps(const BspSchedule<Graph_t> &schedule);
     void SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID);
 
@@ -175,10 +179,13 @@ class PebblingSchedule {
 
     // other basic operations
     bool isValid() const;
-    static std::vector<memweight_type> minimumMemoryRequiredPerNodeType(const BspInstance<Graph_t>& instance, const std::set<vertex_idx>& external_sources = std::set<vertex_idx>());
+    static std::vector<memweight_type> minimumMemoryRequiredPerNodeType(const BspInstance<Graph_t> &instance,
+                                                                        const std::set<vertex_idx> &external_sources
+                                                                        = std::set<vertex_idx>());
 
     // expand a MemSchedule from a coarsened DAG to the original DAG
-    PebblingSchedule<Graph_t> ExpandMemSchedule(const BspInstance<Graph_t>& original_instance, const std::vector<vertex_idx> mapping_to_coarse) const;
+    PebblingSchedule<Graph_t> ExpandMemSchedule(const BspInstance<Graph_t> &original_instance,
+                                                const std::vector<vertex_idx> mapping_to_coarse) const;
 
     // convert to BSP (ignores vertical I/O and recomputation)
     BspSchedule<Graph_t> ConvertToBsp() const;
@@ -190,7 +197,6 @@ class PebblingSchedule {
      */
     const BspInstance<Graph_t> &getInstance() const { return *instance; }
 
-  
     /**
      * @brief Returns the number of supersteps in the schedule.
      *
@@ -201,272 +207,310 @@ class PebblingSchedule {
     void updateNumberOfSupersteps(unsigned new_number_of_supersteps);
 
     inline bool needsToLoadInputs() const { return need_to_load_inputs; }
-    inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_;}
 
-    void getDataForMultiprocessorPebbling(std::vector<std::vector<std::vector<vertex_idx> > >& computeSteps,
-                                          std::vector<std::vector<std::vector<vertex_idx> > >& sendUpSteps,
-                                          std::vector<std::vector<std::vector<vertex_idx> > >& sendDownSteps,
-                                          std::vector<std::vector<std::vector<vertex_idx> > >& nodesEvictedAfterStep) const;
+    inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_; }
 
+    void getDataForMultiprocessorPebbling(std::vector<std::vector<std::vector<vertex_idx>>> &computeSteps,
+                                          std::vector<std::vector<std::vector<vertex_idx>>> &sendUpSteps,
+                                          std::vector<std::vector<std::vector<vertex_idx>>> &sendDownSteps,
+                                          std::vector<std::vector<std::vector<vertex_idx>>> &nodesEvictedAfterStep) const;
 
     // utility for partial ILPs
-    std::vector<std::set<vertex_idx> > getMemContentAtEnd() const;
+    std::vector<std::set<vertex_idx>> getMemContentAtEnd() const;
     void removeEvictStepsFromEnd();
 
     void CreateFromPartialPebblings(const BspInstance<Graph_t> &bsp_instance,
-                                    const std::vector<PebblingSchedule<Graph_t> >& pebblings,
-                                    const std::vector<std::set<unsigned> >& processors_to_parts,
-                                    const std::vector<std::map<vertex_idx, vertex_idx> >& original_node_id,
-                                    const std::vector<std::map<unsigned, unsigned> >& original_proc_id,
-                                    const std::vector<std::vector<std::set<vertex_idx> > >& has_reds_in_beginning);
+                                    const std::vector<PebblingSchedule<Graph_t>> &pebblings,
+                                    const std::vector<std::set<unsigned>> &processors_to_parts,
+                                    const std::vector<std::map<vertex_idx, vertex_idx>> &original_node_id,
+                                    const std::vector<std::map<unsigned, unsigned>> &original_proc_id,
+                                    const std::vector<std::vector<std::set<vertex_idx>>> &has_reds_in_beginning);
 
-    
     // auxiliary function to remove some unnecessary communications after assembling from partial pebblings
-    void FixForceEvicts(const std::vector<std::tuple<vertex_idx, unsigned, unsigned> > force_evict_node_proc_step);
+    void FixForceEvicts(const std::vector<std::tuple<vertex_idx, unsigned, unsigned>> force_evict_node_proc_step);
 
     // auxiliary after partial pebblings: try to merge supersteps
     void TryToMergeSupersteps();
 
-    const std::vector<compute_step>& GetComputeStepsForProcSuperstep(unsigned proc, unsigned supstep) const {return compute_steps_for_proc_superstep[proc][supstep];}
-    const std::vector<vertex_idx>& GetNodesEvictedInComm(unsigned proc, unsigned supstep) const {return nodes_evicted_in_comm[proc][supstep];}
-    const std::vector<vertex_idx>& GetNodesSentDown(unsigned proc, unsigned supstep) const {return nodes_sent_down[proc][supstep];}
-    const std::vector<vertex_idx>& GetNodesSentUp(unsigned proc, unsigned supstep) const {return nodes_sent_up[proc][supstep];}
+    const std::vector<compute_step> &GetComputeStepsForProcSuperstep(unsigned proc, unsigned supstep) const {
+        return compute_steps_for_proc_superstep[proc][supstep];
+    }
+
+    const std::vector<vertex_idx> &GetNodesEvictedInComm(unsigned proc, unsigned supstep) const {
+        return nodes_evicted_in_comm[proc][supstep];
+    }
+
+    const std::vector<vertex_idx> &GetNodesSentDown(unsigned proc, unsigned supstep) const {
+        return nodes_sent_down[proc][supstep];
+    }
+
+    const std::vector<vertex_idx> &GetNodesSentUp(unsigned proc, unsigned supstep) const { return nodes_sent_up[proc][supstep]; }
 
-    void SetNeedsBlueAtEnd(const std::set<vertex_idx>& nodes_) {needs_blue_at_end = nodes_;}
-    void SetExternalSources(const std::set<vertex_idx>& nodes_) {external_sources = nodes_;}
-    void SetHasRedInBeginning(const std::vector<std::set<vertex_idx> >& nodes_) {has_red_in_beginning = nodes_;}    
+    void SetNeedsBlueAtEnd(const std::set<vertex_idx> &nodes_) { needs_blue_at_end = nodes_; }
 
+    void SetExternalSources(const std::set<vertex_idx> &nodes_) { external_sources = nodes_; }
+
+    void SetHasRedInBeginning(const std::vector<std::set<vertex_idx>> &nodes_) { has_red_in_beginning = nodes_; }
 };
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::updateNumberOfSupersteps(unsigned new_number_of_supersteps)
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::updateNumberOfSupersteps(unsigned new_number_of_supersteps) {
     number_of_supersteps = new_number_of_supersteps;
 
     compute_steps_for_proc_superstep.clear();
-    compute_steps_for_proc_superstep.resize(instance->numberOfProcessors(), std::vector<std::vector<compute_step> >(number_of_supersteps));
+    compute_steps_for_proc_superstep.resize(instance->numberOfProcessors(),
+                                            std::vector<std::vector<compute_step>>(number_of_supersteps));
 
     nodes_evicted_in_comm.clear();
-    nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
+    nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
 
     nodes_sent_down.clear();
-    nodes_sent_down.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
+    nodes_sent_down.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
 
     nodes_sent_up.clear();
-    nodes_sent_up.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
+    nodes_sent_up.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
 }
 
-template<typename Graph_t>
-v_workw_t<Graph_t> PebblingSchedule<Graph_t>::computeCost() const
-{
+template <typename Graph_t>
+v_workw_t<Graph_t> PebblingSchedule<Graph_t>::computeCost() const {
     cost_type total_costs = 0;
-    for(unsigned step = 0; step < number_of_supersteps; ++step)
-    {
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
         // compute phase
         cost_type max_work = std::numeric_limits<cost_type>::min();
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
             cost_type work = 0;
-            for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step])
+            for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) {
                 work += instance->getComputationalDag().vertex_work_weight(computeStep.node);
+            }
 
-            if(work > max_work)
+            if (work > max_work) {
                 max_work = work;
+            }
         }
         total_costs += max_work;
 
         // communication phase
         cost_type max_send_up = std::numeric_limits<cost_type>::min();
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
             cost_type send_up = 0;
-            for(vertex_idx node : nodes_sent_up[proc][step])
-                send_up += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts();
+            for (vertex_idx node : nodes_sent_up[proc][step]) {
+                send_up += instance->getComputationalDag().vertex_comm_weight(node)
+                           * instance->getArchitecture().communicationCosts();
+            }
 
-            if(send_up > max_send_up)
+            if (send_up > max_send_up) {
                 max_send_up = send_up;
+            }
         }
         total_costs += max_send_up;
 
         total_costs += static_cast<cost_type>(instance->getArchitecture().synchronisationCosts());
 
         cost_type max_send_down = std::numeric_limits<cost_type>::min();
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
             cost_type send_down = 0;
-            for(vertex_idx node : nodes_sent_down[proc][step])
-                send_down += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts();
+            for (vertex_idx node : nodes_sent_down[proc][step]) {
+                send_down += instance->getComputationalDag().vertex_comm_weight(node)
+                             * instance->getArchitecture().communicationCosts();
+            }
 
-            if(send_down > max_send_down)
+            if (send_down > max_send_down) {
                 max_send_down = send_down;
+            }
         }
         total_costs += max_send_down;
-
     }
 
     return total_costs;
 }
 
-template<typename Graph_t>
-v_workw_t<Graph_t> PebblingSchedule<Graph_t>::computeAsynchronousCost() const{
-
+template <typename Graph_t>
+v_workw_t<Graph_t> PebblingSchedule<Graph_t>::computeAsynchronousCost() const {
     std::vector<cost_type> current_time_at_processor(instance->getArchitecture().numberOfProcessors(), 0);
-    std::vector<cost_type> time_when_node_gets_blue(instance->getComputationalDag().num_vertices(), std::numeric_limits<cost_type>::max());
-    if(need_to_load_inputs)
-        for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-            if(instance->getComputationalDag().in_degree(node) == 0)
+    std::vector<cost_type> time_when_node_gets_blue(instance->getComputationalDag().num_vertices(),
+                                                    std::numeric_limits<cost_type>::max());
+    if (need_to_load_inputs) {
+        for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+            if (instance->getComputationalDag().in_degree(node) == 0) {
                 time_when_node_gets_blue[node] = 0;
+            }
+        }
+    }
 
-    for(unsigned step = 0; step < number_of_supersteps; ++step)
-    {
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
         // compute phase
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-            for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step])
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) {
                 current_time_at_processor[proc] += instance->getComputationalDag().vertex_work_weight(computeStep.node);
+            }
+        }
 
         // communication phase - send up
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-            for(vertex_idx node : nodes_sent_up[proc][step])
-            {
-                current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts();
-                if(time_when_node_gets_blue[node] > current_time_at_processor[proc])
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            for (vertex_idx node : nodes_sent_up[proc][step]) {
+                current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node)
+                                                   * instance->getArchitecture().communicationCosts();
+                if (time_when_node_gets_blue[node] > current_time_at_processor[proc]) {
                     time_when_node_gets_blue[node] = current_time_at_processor[proc];
+                }
             }
+        }
 
         // communication phase - send down
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-            for(vertex_idx node : nodes_sent_down[proc][step])
-            {
-                if(current_time_at_processor[proc] < time_when_node_gets_blue[node])
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            for (vertex_idx node : nodes_sent_down[proc][step]) {
+                if (current_time_at_processor[proc] < time_when_node_gets_blue[node]) {
                     current_time_at_processor[proc] = time_when_node_gets_blue[node];
-                current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts();
+                }
+                current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node)
+                                                   * instance->getArchitecture().communicationCosts();
             }
-
+        }
     }
 
     cost_type makespan = 0;
-    for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-        if(current_time_at_processor[proc] > makespan)
+    for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+        if (current_time_at_processor[proc] > makespan) {
             makespan = current_time_at_processor[proc];
+        }
+    }
 
     return makespan;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void PebblingSchedule<Graph_t>::cleanSchedule() {
-
-    if(!isValid())
+    if (!isValid()) {
         return;
+    }
 
     // NOTE - this function removes unnecessary steps in most cases, but not all (some require e.g. multiple iterations)
 
-    std::vector<std::vector<std::deque<bool> > > needed(instance->numberOfVertices(), std::vector<std::deque<bool> >(instance->numberOfProcessors()));
-    std::vector<std::vector<bool > > keep_false(instance->numberOfVertices(), std::vector<bool >(instance->numberOfProcessors(), false));
-    std::vector<std::vector<bool > > has_red_after_cleaning(instance->numberOfVertices(), std::vector<bool >(instance->numberOfProcessors(), false));
-    
+    std::vector<std::vector<std::deque<bool>>> needed(instance->numberOfVertices(),
+                                                      std::vector<std::deque<bool>>(instance->numberOfProcessors()));
+    std::vector<std::vector<bool>> keep_false(instance->numberOfVertices(),
+                                              std::vector<bool>(instance->numberOfProcessors(), false));
+    std::vector<std::vector<bool>> has_red_after_cleaning(instance->numberOfVertices(),
+                                                          std::vector<bool>(instance->numberOfProcessors(), false));
+
     std::vector<bool> ever_needed_as_blue(instance->numberOfVertices(), false);
-    if(needs_blue_at_end.empty())
-    {
-        for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-            if(instance->getComputationalDag().out_degree(node) == 0)
+    if (needs_blue_at_end.empty()) {
+        for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+            if (instance->getComputationalDag().out_degree(node) == 0) {
                 ever_needed_as_blue[node] = true;
-    }
-    else
-    {
-        for(vertex_idx node : needs_blue_at_end)
+            }
+        }
+    } else {
+        for (vertex_idx node : needs_blue_at_end) {
             ever_needed_as_blue[node] = true;
+        }
     }
 
-    for(unsigned step = 0; step < number_of_supersteps; ++step)
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc)     
-            for(vertex_idx node : nodes_sent_down[proc][step])
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (vertex_idx node : nodes_sent_down[proc][step]) {
                 ever_needed_as_blue[node] = true;
+            }
+        }
+    }
 
-    if(!has_red_in_beginning.empty())
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(vertex_idx node : has_red_in_beginning[proc])
+    if (!has_red_in_beginning.empty()) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (vertex_idx node : has_red_in_beginning[proc]) {
                 has_red_after_cleaning[node][proc] = true;
-    
-    for(unsigned step = 0; step < number_of_supersteps; ++step)
-    {
+            }
+        }
+    }
+
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
         // compute phase
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-            for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step])
-            {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) {
                 vertex_idx node = computeStep.node;
                 needed[node][proc].emplace_back(false);
                 keep_false[node][proc] = has_red_after_cleaning[node][proc];
-                for(vertex_idx pred : instance->getComputationalDag().parents(node))
-                {
+                for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
                     has_red_after_cleaning[pred][proc] = true;
-                    if(!keep_false[pred][proc])
+                    if (!keep_false[pred][proc]) {
                         needed[pred][proc].back() = true;
+                    }
                 }
-                for(vertex_idx to_evict : computeStep.nodes_evicted_after)
+                for (vertex_idx to_evict : computeStep.nodes_evicted_after) {
                     has_red_after_cleaning[to_evict][proc] = false;
+                }
             }
+        }
 
         // send up phase
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-            for(vertex_idx node : nodes_sent_up[proc][step])
-                if(ever_needed_as_blue[node])
-                {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            for (vertex_idx node : nodes_sent_up[proc][step]) {
+                if (ever_needed_as_blue[node]) {
                     has_red_after_cleaning[node][proc] = true;
-                    if(!keep_false[node][proc])
+                    if (!keep_false[node][proc]) {
                         needed[node][proc].back() = true;
+                    }
                 }
+            }
+        }
 
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-            for(vertex_idx node : nodes_evicted_in_comm[proc][step])
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            for (vertex_idx node : nodes_evicted_in_comm[proc][step]) {
                 has_red_after_cleaning[node][proc] = false;
+            }
+        }
 
-        //send down phase
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)     
-            for(vertex_idx node : nodes_sent_down[proc][step])
-            {
+        // send down phase
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            for (vertex_idx node : nodes_sent_down[proc][step]) {
                 needed[node][proc].emplace_back(false);
                 keep_false[node][proc] = has_red_after_cleaning[node][proc];
             }
+        }
     }
 
-    std::vector<std::vector<std::vector<compute_step> > > new_compute_steps_for_proc_superstep(instance->numberOfProcessors(), std::vector<std::vector<compute_step> >(number_of_supersteps));
-    std::vector<std::vector<std::vector<vertex_idx> > > new_nodes_evicted_in_comm(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
-    std::vector<std::vector<std::vector<vertex_idx> > > new_nodes_sent_down(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
-    std::vector<std::vector<std::vector<vertex_idx> > > new_nodes_sent_up(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
-
-    std::vector<std::vector<bool> > has_red(instance->numberOfVertices(), std::vector<bool>(instance->numberOfProcessors(), false));
-    if(!has_red_in_beginning.empty())
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(vertex_idx node : has_red_in_beginning[proc])
+    std::vector<std::vector<std::vector<compute_step>>> new_compute_steps_for_proc_superstep(
+        instance->numberOfProcessors(), std::vector<std::vector<compute_step>>(number_of_supersteps));
+    std::vector<std::vector<std::vector<vertex_idx>>> new_nodes_evicted_in_comm(
+        instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
+    std::vector<std::vector<std::vector<vertex_idx>>> new_nodes_sent_down(
+        instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
+    std::vector<std::vector<std::vector<vertex_idx>>> new_nodes_sent_up(
+        instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
+
+    std::vector<std::vector<bool>> has_red(instance->numberOfVertices(), std::vector<bool>(instance->numberOfProcessors(), false));
+    if (!has_red_in_beginning.empty()) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (vertex_idx node : has_red_in_beginning[proc]) {
                 has_red[node][proc] = true;
-    
+            }
+        }
+    }
+
     std::vector<bool> has_blue(instance->numberOfVertices());
-    std::vector<cost_type> time_when_node_gets_blue(instance->getComputationalDag().num_vertices(), std::numeric_limits<cost_type>::max());
-    if(need_to_load_inputs)
-        for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-            if(instance->getComputationalDag().in_degree(node) == 0)
-            {
+    std::vector<cost_type> time_when_node_gets_blue(instance->getComputationalDag().num_vertices(),
+                                                    std::numeric_limits<cost_type>::max());
+    if (need_to_load_inputs) {
+        for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+            if (instance->getComputationalDag().in_degree(node) == 0) {
                 has_blue[node] = true;
                 time_when_node_gets_blue[node] = 0;
             }
+        }
+    }
 
     std::vector<cost_type> current_time_at_processor(instance->getArchitecture().numberOfProcessors(), 0);
 
-    for(unsigned superstep = 0; superstep < number_of_supersteps; ++superstep)
-    {
+    for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) {
         // compute phase
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
             std::vector<bool> step_remains(compute_steps_for_proc_superstep[proc][superstep].size(), false);
-            std::vector<std::vector<vertex_idx> > new_evict_after(compute_steps_for_proc_superstep[proc][superstep].size());
-            
+            std::vector<std::vector<vertex_idx>> new_evict_after(compute_steps_for_proc_superstep[proc][superstep].size());
+
             unsigned new_stepIndex = 0;
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
-            {
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node;
 
-                if(needed[node][proc].front())
-                {
+                if (needed[node][proc].front()) {
                     new_compute_steps_for_proc_superstep[proc][superstep].emplace_back(node, std::vector<vertex_idx>());
                     step_remains[stepIndex] = true;
                     has_red[node][proc] = true;
@@ -476,85 +520,88 @@ void PebblingSchedule<Graph_t>::cleanSchedule() {
 
                 needed[node][proc].pop_front();
 
-                for(vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after)
-                {
-                    if(has_red[to_evict][proc])
+                for (vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after) {
+                    if (has_red[to_evict][proc]) {
                         new_evict_after[stepIndex].push_back(to_evict);
+                    }
                     has_red[to_evict][proc] = false;
                 }
             }
 
             // go backwards to fix cache eviction steps
             std::vector<vertex_idx> to_evict;
-            for (size_t stepIndex = compute_steps_for_proc_superstep[proc][superstep].size() - 1; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); --stepIndex)
-            {
-                for(vertex_idx node : new_evict_after[stepIndex])
+            for (size_t stepIndex = compute_steps_for_proc_superstep[proc][superstep].size() - 1;
+                 stepIndex < compute_steps_for_proc_superstep[proc][superstep].size();
+                 --stepIndex) {
+                for (vertex_idx node : new_evict_after[stepIndex]) {
                     to_evict.push_back(node);
+                }
 
-                if(step_remains[stepIndex])
-                {
-                    new_compute_steps_for_proc_superstep[proc][superstep][new_stepIndex-1].nodes_evicted_after = to_evict;
+                if (step_remains[stepIndex]) {
+                    new_compute_steps_for_proc_superstep[proc][superstep][new_stepIndex - 1].nodes_evicted_after = to_evict;
                     to_evict.clear();
                     --new_stepIndex;
                 }
             }
-            if(!to_evict.empty() && superstep>=1)
-                for(vertex_idx node : to_evict)
-                {
-                    auto itr = std::find(new_nodes_sent_down[proc][superstep-1].begin(), new_nodes_sent_down[proc][superstep-1].end(), node);
-                    if(itr == new_nodes_sent_down[proc][superstep-1].end())
-                        new_nodes_evicted_in_comm[proc][superstep-1].push_back(node);
-                    else
-                        new_nodes_sent_down[proc][superstep-1].erase(itr);
+            if (!to_evict.empty() && superstep >= 1) {
+                for (vertex_idx node : to_evict) {
+                    auto itr = std::find(
+                        new_nodes_sent_down[proc][superstep - 1].begin(), new_nodes_sent_down[proc][superstep - 1].end(), node);
+                    if (itr == new_nodes_sent_down[proc][superstep - 1].end()) {
+                        new_nodes_evicted_in_comm[proc][superstep - 1].push_back(node);
+                    } else {
+                        new_nodes_sent_down[proc][superstep - 1].erase(itr);
+                    }
                 }
+            }
         }
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
             // send up phase
-            for(vertex_idx node : nodes_sent_up[proc][superstep])
-            {
-                if(!ever_needed_as_blue[node])
+            for (vertex_idx node : nodes_sent_up[proc][superstep]) {
+                if (!ever_needed_as_blue[node]) {
                     continue;
+                }
 
-                cost_type new_time_at_processor = current_time_at_processor[proc] + instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts();
+                cost_type new_time_at_processor = current_time_at_processor[proc]
+                                                  + instance->getComputationalDag().vertex_comm_weight(node)
+                                                        * instance->getArchitecture().communicationCosts();
 
                 // only copy send up step if it is not obsolete in at least one of the two cases (sync or async schedule)
-                if(!has_blue[node] || new_time_at_processor < time_when_node_gets_blue[node])
-                {
+                if (!has_blue[node] || new_time_at_processor < time_when_node_gets_blue[node]) {
                     new_nodes_sent_up[proc][superstep].push_back(node);
                     has_blue[node] = true;
                     current_time_at_processor[proc] = new_time_at_processor;
-                    if(time_when_node_gets_blue[node] > new_time_at_processor)
+                    if (time_when_node_gets_blue[node] > new_time_at_processor) {
                         time_when_node_gets_blue[node] = new_time_at_processor;
+                    }
                 }
             }
         }
 
         // comm phase evict
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-            for(vertex_idx node : nodes_evicted_in_comm[proc][superstep])
-                if(has_red[node][proc])
-                {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            for (vertex_idx node : nodes_evicted_in_comm[proc][superstep]) {
+                if (has_red[node][proc]) {
                     new_nodes_evicted_in_comm[proc][superstep].push_back(node);
                     has_red[node][proc] = false;
                 }
+            }
+        }
 
-        for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc)
-        {
-            //send down phase     
-            for(vertex_idx node : nodes_sent_down[proc][superstep])
-            {
-                if(needed[node][proc].front())
-                {
+        for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) {
+            // send down phase
+            for (vertex_idx node : nodes_sent_down[proc][superstep]) {
+                if (needed[node][proc].front()) {
                     new_nodes_sent_down[proc][superstep].push_back(node);
                     has_red[node][proc] = true;
-                    if(current_time_at_processor[proc] < time_when_node_gets_blue[node])
+                    if (current_time_at_processor[proc] < time_when_node_gets_blue[node]) {
                         current_time_at_processor[proc] = time_when_node_gets_blue[node];
-                    current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts();
+                    }
+                    current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node)
+                                                       * instance->getArchitecture().communicationCosts();
                 }
                 needed[node][proc].pop_front();
             }
-
         }
     }
 
@@ -564,15 +611,13 @@ void PebblingSchedule<Graph_t>::cleanSchedule() {
     nodes_sent_up = new_nodes_sent_up;
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::ConvertFromBsp(const BspSchedule<Graph_t> &schedule, CACHE_EVICTION_STRATEGY evict_rule)
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::ConvertFromBsp(const BspSchedule<Graph_t> &schedule, CACHE_EVICTION_STRATEGY evict_rule) {
     instance = &schedule.getInstance();
 
     // check if conversion possible at all
-    if(!hasValidSolution(schedule.getInstance(), external_sources))
-    {
-        std::cout<<"Conversion failed."<<std::endl;
+    if (!hasValidSolution(schedule.getInstance(), external_sources)) {
+        std::cout << "Conversion failed." << std::endl;
         return;
     }
 
@@ -580,235 +625,247 @@ void PebblingSchedule<Graph_t>::ConvertFromBsp(const BspSchedule<Graph_t> &sched
     SplitSupersteps(schedule);
 
     // track memory
-    SetMemoryMovement(evict_rule);   
+    SetMemoryMovement(evict_rule);
 }
 
-template<typename Graph_t>
-bool PebblingSchedule<Graph_t>::hasValidSolution(const BspInstance<Graph_t> &instance, const std::set<vertex_idx>& external_sources)
-{
+template <typename Graph_t>
+bool PebblingSchedule<Graph_t>::hasValidSolution(const BspInstance<Graph_t> &instance,
+                                                 const std::set<vertex_idx> &external_sources) {
     std::vector<memweight_type> memory_required = minimumMemoryRequiredPerNodeType(instance);
     std::vector<bool> has_enough_memory(instance.getComputationalDag().num_vertex_types(), true);
-    for(vertex_idx node = 0; node < instance.numberOfVertices(); ++node)
-        if(external_sources.find(node) == external_sources.end())
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) {
+        if (external_sources.find(node) == external_sources.end()) {
             has_enough_memory[instance.getComputationalDag().vertex_type(node)] = false;
+        }
+    }
 
-    for(v_type_t<Graph_t> node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type)
-        for(unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc)
-            if(instance.isCompatibleType(node_type, instance.getArchitecture().processorType(proc)) &&
-                instance.getArchitecture().memoryBound(proc) >= memory_required[node_type])
-            {
+    for (v_type_t<Graph_t> node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) {
+        for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
+            if (instance.isCompatibleType(node_type, instance.getArchitecture().processorType(proc))
+                && instance.getArchitecture().memoryBound(proc) >= memory_required[node_type]) {
                 has_enough_memory[node_type] = true;
                 break;
             }
+        }
+    }
 
-    for(v_type_t<Graph_t> node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type)
-        if(!has_enough_memory[node_type])
-        {
-            std::cout<<"No valid solution exists. Minimum memory required for node type "<<node_type<<" is "<<memory_required[node_type]<<std::endl;
+    for (v_type_t<Graph_t> node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) {
+        if (!has_enough_memory[node_type]) {
+            std::cout << "No valid solution exists. Minimum memory required for node type " << node_type << " is "
+                      << memory_required[node_type] << std::endl;
             return false;
         }
+    }
     return true;
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::SplitSupersteps(const BspSchedule<Graph_t> &schedule)
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::SplitSupersteps(const BspSchedule<Graph_t> &schedule) {
     // get DFS topological order in each superstep
-    std::vector<std::vector<std::vector<vertex_idx> > > top_orders = computeTopOrdersDFS(schedule);
+    std::vector<std::vector<std::vector<vertex_idx>>> top_orders = computeTopOrdersDFS(schedule);
 
     std::vector<unsigned> top_order_idx(instance->getComputationalDag().num_vertices(), 0);
-    for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        for(unsigned step=0; step<schedule.numberOfSupersteps(); ++step)
-            for(unsigned idx =0; idx < top_orders[proc][step].size(); ++idx)
+    for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+        for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) {
+            for (unsigned idx = 0; idx < top_orders[proc][step].size(); ++idx) {
                 top_order_idx[top_orders[proc][step][idx]] = idx;
+            }
+        }
+    }
 
     // split supersteps as needed
     std::vector<unsigned> new_superstep_ID(instance->getComputationalDag().num_vertices());
     unsigned superstep_index = 0;
-    for(unsigned step=0; step<schedule.numberOfSupersteps(); ++step)
-    {
+    for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) {
         unsigned max_segments_in_superstep = 0;
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
-            if(top_orders[proc][step].empty())
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            if (top_orders[proc][step].empty()) {
                 continue;
+            }
 
             // the superstep will be split into smaller segments
-            std::vector<std::pair<unsigned, unsigned> > segments;
+            std::vector<std::pair<unsigned, unsigned>> segments;
             unsigned start_idx = 0;
-            while(start_idx < top_orders[proc][step].size())
-            {
+            while (start_idx < top_orders[proc][step].size()) {
                 // binary search for largest segment that still statisfies mem constraint
                 bool doubling_phase = true;
-                unsigned end_lower_bound = start_idx, end_upper_bound = static_cast<unsigned>(top_orders[proc][step].size()-1);
-                while(end_lower_bound < end_upper_bound)
-                {
+                unsigned end_lower_bound = start_idx, end_upper_bound = static_cast<unsigned>(top_orders[proc][step].size() - 1);
+                while (end_lower_bound < end_upper_bound) {
                     unsigned end_current;
-                    
-                    if(doubling_phase)
-                    {
-                        if(end_lower_bound == start_idx)
+
+                    if (doubling_phase) {
+                        if (end_lower_bound == start_idx) {
                             end_current = start_idx + 1;
-                        else
-                            end_current = std::min(start_idx + 2* (end_lower_bound - start_idx),
-                                                static_cast<unsigned>( top_orders[proc][step].size() ) - 1);
-                    }
-                    else
+                        } else {
+                            end_current = std::min(start_idx + 2 * (end_lower_bound - start_idx),
+                                                   static_cast<unsigned>(top_orders[proc][step].size()) - 1);
+                        }
+                    } else {
                         end_current = end_lower_bound + (end_upper_bound - end_lower_bound + 1) / 2;
+                    }
 
                     // check if this segment is valid
                     bool valid = true;
 
                     std::map<vertex_idx, bool> neededAfter;
-                    for(unsigned idx = start_idx; idx <= end_current; ++idx)
-                    {
+                    for (unsigned idx = start_idx; idx <= end_current; ++idx) {
                         vertex_idx node = top_orders[proc][step][idx];
                         neededAfter[node] = false;
-                        if(needs_blue_at_end.empty())
+                        if (needs_blue_at_end.empty()) {
                             neededAfter[node] = (instance->getComputationalDag().out_degree(node) == 0);
-                        else
+                        } else {
                             neededAfter[node] = (needs_blue_at_end.find(node) != needs_blue_at_end.end());
-                        for(vertex_idx succ : instance->getComputationalDag().children(node))
-                        {
-                            if(schedule.assignedSuperstep(succ)>step)
+                        }
+                        for (vertex_idx succ : instance->getComputationalDag().children(node)) {
+                            if (schedule.assignedSuperstep(succ) > step) {
                                 neededAfter[node] = true;
-                            if(schedule.assignedSuperstep(succ) == step && top_order_idx[succ] <= end_current)
+                            }
+                            if (schedule.assignedSuperstep(succ) == step && top_order_idx[succ] <= end_current) {
                                 neededAfter[node] = true;
+                            }
                         }
-
                     }
 
                     std::map<vertex_idx, vertex_idx> lastUsedBy;
                     std::set<vertex_idx> values_needed;
-                    for(unsigned idx = start_idx; idx <= end_current; ++idx)
-                    {
+                    for (unsigned idx = start_idx; idx <= end_current; ++idx) {
                         vertex_idx node = top_orders[proc][step][idx];
-                        for(vertex_idx pred : instance->getComputationalDag().parents(node))
-                        {
-                            if(schedule.assignedSuperstep(pred)<step || (schedule.assignedSuperstep(pred)==step && !neededAfter[pred]))
+                        for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
+                            if (schedule.assignedSuperstep(pred) < step
+                                || (schedule.assignedSuperstep(pred) == step && !neededAfter[pred])) {
                                 lastUsedBy[pred] = node;
-                            if(schedule.assignedSuperstep(pred)<step || (schedule.assignedSuperstep(pred)==step && top_order_idx[pred] < start_idx)
-                                || (need_to_load_inputs && instance->getComputationalDag().in_degree(pred)==0) 
-                                || external_sources.find(pred) != external_sources.end() )
+                            }
+                            if (schedule.assignedSuperstep(pred) < step
+                                || (schedule.assignedSuperstep(pred) == step && top_order_idx[pred] < start_idx)
+                                || (need_to_load_inputs && instance->getComputationalDag().in_degree(pred) == 0)
+                                || external_sources.find(pred) != external_sources.end()) {
                                 values_needed.insert(pred);
+                            }
                         }
                     }
 
                     memweight_type mem_needed = 0;
-                    for(vertex_idx node : values_needed)
+                    for (vertex_idx node : values_needed) {
                         mem_needed += instance->getComputationalDag().vertex_mem_weight(node);
+                    }
 
-
-                    for(unsigned idx = start_idx; idx <= end_current; ++idx)
-                    {
+                    for (unsigned idx = start_idx; idx <= end_current; ++idx) {
                         vertex_idx node = top_orders[proc][step][idx];
 
-                        if(need_to_load_inputs && instance->getComputationalDag().in_degree(node) == 0)
+                        if (need_to_load_inputs && instance->getComputationalDag().in_degree(node) == 0) {
                             continue;
+                        }
 
                         mem_needed += instance->getComputationalDag().vertex_mem_weight(node);
-                        if(mem_needed > instance->getArchitecture().memoryBound(proc))
-                        {
+                        if (mem_needed > instance->getArchitecture().memoryBound(proc)) {
                             valid = false;
                             break;
                         }
 
-                        for(vertex_idx pred : instance->getComputationalDag().parents(node))
-                            if(lastUsedBy[pred] == node)
+                        for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
+                            if (lastUsedBy[pred] == node) {
                                 mem_needed -= instance->getComputationalDag().vertex_mem_weight(pred);
+                            }
+                        }
                     }
 
-                    if(valid)
-                    {
+                    if (valid) {
                         end_lower_bound = end_current;
-                        if(end_current == top_orders[proc][step].size()-1)
-                        {
+                        if (end_current == top_orders[proc][step].size() - 1) {
                             doubling_phase = false;
                             end_upper_bound = end_current;
                         }
-                    }
-                    else
-                    {
+                    } else {
                         doubling_phase = false;
                         end_upper_bound = end_current - 1;
                     }
-
                 }
                 segments.emplace_back(start_idx, end_lower_bound);
                 start_idx = end_lower_bound + 1;
             }
 
             unsigned step_idx = 0;
-            for(auto segment : segments)
-            {
-                for(unsigned idx = segment.first; idx <= segment.second; ++idx)
+            for (auto segment : segments) {
+                for (unsigned idx = segment.first; idx <= segment.second; ++idx) {
                     new_superstep_ID[top_orders[proc][step][idx]] = superstep_index + step_idx;
+                }
 
                 ++step_idx;
             }
 
-            if(step_idx>max_segments_in_superstep)
+            if (step_idx > max_segments_in_superstep) {
                 max_segments_in_superstep = step_idx;
+            }
         }
         superstep_index += max_segments_in_superstep;
     }
 
     std::vector<unsigned> reindex_to_shrink(superstep_index);
     std::vector<bool> has_compute(superstep_index, false);
-    for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-        if(!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0)
+    for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+        if (!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0) {
             has_compute[new_superstep_ID[node]] = true;
-    
+        }
+    }
+
     unsigned current_index = 0;
-    for(unsigned superstep = 0; superstep < superstep_index; ++superstep)
-        if(has_compute[superstep])
-        {
+    for (unsigned superstep = 0; superstep < superstep_index; ++superstep) {
+        if (has_compute[superstep]) {
             reindex_to_shrink[superstep] = current_index;
             ++current_index;
         }
+    }
 
     unsigned offset = need_to_load_inputs ? 1 : 0;
-    updateNumberOfSupersteps(current_index+offset);
-    std::cout<<schedule.numberOfSupersteps()<<" -> "<<number_of_supersteps<<std::endl;
+    updateNumberOfSupersteps(current_index + offset);
+    std::cout << schedule.numberOfSupersteps() << " -> " << number_of_supersteps << std::endl;
 
     // TODO: might not need offset for first step when beginning with red pebbles
 
-    for(unsigned step=0; step<schedule.numberOfSupersteps(); ++step)
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(vertex_idx node : top_orders[proc][step])
-                if(!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0)
-                    compute_steps_for_proc_superstep[proc][reindex_to_shrink[new_superstep_ID[node]]+offset].emplace_back(node);
-    
+    for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (vertex_idx node : top_orders[proc][step]) {
+                if (!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0) {
+                    compute_steps_for_proc_superstep[proc][reindex_to_shrink[new_superstep_ID[node]] + offset].emplace_back(node);
+                }
+            }
+        }
+    }
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_rule)
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_rule) {
     const size_t N = instance->getComputationalDag().num_vertices();
 
     std::vector<memweight_type> mem_used(instance->numberOfProcessors(), 0);
-    std::vector<std::set<vertex_idx> > in_mem(instance->numberOfProcessors());
+    std::vector<std::set<vertex_idx>> in_mem(instance->numberOfProcessors());
 
     std::vector<bool> in_slow_mem(N, false);
-    if(need_to_load_inputs)
-        for(vertex_idx node=0; node<N; ++node)
-            if(instance->getComputationalDag().in_degree(node) == 0)
+    if (need_to_load_inputs) {
+        for (vertex_idx node = 0; node < N; ++node) {
+            if (instance->getComputationalDag().in_degree(node) == 0) {
                 in_slow_mem[node] = true;
+            }
+        }
+    }
+
+    std::vector<std::set<std::pair<std::pair<unsigned, unsigned>, vertex_idx>>> evictable(instance->numberOfProcessors());
+    std::vector<std::set<vertex_idx>> non_evictable(instance->numberOfProcessors());
 
-    std::vector<std::set<std::pair<std::pair<unsigned, unsigned>, vertex_idx>> > evictable(instance->numberOfProcessors());
-    std::vector<std::set<vertex_idx> > non_evictable(instance->numberOfProcessors());
-        
     // iterator to its position in "evictable" - for efficient delete
-    std::vector<std::vector<decltype(evictable[0].begin()) > > place_in_evictable(N,
-            std::vector<decltype(evictable[0].begin())>(instance->numberOfProcessors()));
-    for(vertex_idx node=0; node<N; ++node)
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
+    std::vector<std::vector<decltype(evictable[0].begin())>> place_in_evictable(
+        N, std::vector<decltype(evictable[0].begin())>(instance->numberOfProcessors()));
+    for (vertex_idx node = 0; node < N; ++node) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             place_in_evictable[node][proc] = evictable[proc].end();
+        }
+    }
 
     // utility for LRU eviction strategy
-    std::vector<std::vector<unsigned> > node_last_used_on_proc;
-    if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED)
+    std::vector<std::vector<unsigned>> node_last_used_on_proc;
+    if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) {
         node_last_used_on_proc.resize(N, std::vector<unsigned>(instance->numberOfProcessors(), 0));
+    }
     std::vector<unsigned> total_step_count_on_proc(instance->numberOfProcessors(), 0);
 
     // select a representative compute step for each node, in case of being computed multiple times
@@ -816,135 +873,139 @@ void PebblingSchedule<Graph_t>::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_
     // its representative step, if the value in question is ever needed on another processor/superster
     // without being recomputed there - otherwise, it would be even hard to decide whether a solution exists)
     std::vector<unsigned> selected_processor(N);
-    std::vector<std::pair<unsigned, unsigned> > selected_step(N, std::make_pair(number_of_supersteps, 0));
-    for(unsigned superstep=0; superstep<number_of_supersteps; ++superstep)
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
-            {
+    std::vector<std::pair<unsigned, unsigned>> selected_step(N, std::make_pair(number_of_supersteps, 0));
+    for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node;
-                if(selected_step[node].first > superstep || (selected_step[node].first == superstep && selected_step[node].second < stepIndex))
-                {
+                if (selected_step[node].first > superstep
+                    || (selected_step[node].first == superstep && selected_step[node].second < stepIndex)) {
                     selected_processor[node] = proc;
                     selected_step[node] = std::make_pair(superstep, stepIndex);
                 }
             }
+        }
+    }
 
     // check if the node needs to be kept until the end of its representative superstep
     std::vector<bool> must_be_preserved(N, false);
     std::vector<bool> computed_in_current_superstep(N, false);
-    for(unsigned superstep=0; superstep<number_of_supersteps; ++superstep)
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
-            {
+    for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node;
                 computed_in_current_superstep[node] = true;
-                for(vertex_idx pred : instance->getComputationalDag().parents(node))
-                    if(!computed_in_current_superstep[pred])
+                for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
+                    if (!computed_in_current_superstep[pred]) {
                         must_be_preserved[pred] = true;
+                    }
+                }
             }
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 computed_in_current_superstep[compute_steps_for_proc_superstep[proc][superstep][stepIndex].node] = false;
+            }
         }
-    if(needs_blue_at_end.empty())
-    {
-        for(vertex_idx node = 0; node < N; ++node)
-            if(instance->getComputationalDag().out_degree(node) == 0)
-                must_be_preserved[node] = true;
     }
-    else
-    {
-        for(vertex_idx node : needs_blue_at_end)
+    if (needs_blue_at_end.empty()) {
+        for (vertex_idx node = 0; node < N; ++node) {
+            if (instance->getComputationalDag().out_degree(node) == 0) {
+                must_be_preserved[node] = true;
+            }
+        }
+    } else {
+        for (vertex_idx node : needs_blue_at_end) {
             must_be_preserved[node] = true;
+        }
     }
 
     // superstep-step pairs where a node is required (on a given proc) - opening a separate queue after each time it's recomputed
-    std::vector<std::vector<std::deque<std::deque<std::pair<unsigned, unsigned> > > > > node_used_at_proc_lists(N, std::vector<std::deque<std::deque<std::pair<unsigned, unsigned> > > >(instance->numberOfProcessors(), std::deque<std::deque<std::pair<unsigned, unsigned> > >(1)));
-    for(unsigned superstep=0; superstep<number_of_supersteps; ++superstep)
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
-            {
+    std::vector<std::vector<std::deque<std::deque<std::pair<unsigned, unsigned>>>>> node_used_at_proc_lists(
+        N,
+        std::vector<std::deque<std::deque<std::pair<unsigned, unsigned>>>>(
+            instance->numberOfProcessors(), std::deque<std::deque<std::pair<unsigned, unsigned>>>(1)));
+    for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node;
-                for(vertex_idx pred : instance->getComputationalDag().parents(node))
+                for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
                     node_used_at_proc_lists[pred][proc].back().emplace_back(superstep, stepIndex);
-                
+                }
+
                 node_used_at_proc_lists[node][proc].emplace_back();
             }
+        }
+    }
 
     // set up initial content of fast memories
-    if(!has_red_in_beginning.empty())
-    {
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
+    if (!has_red_in_beginning.empty()) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             in_mem = has_red_in_beginning;
-            for(vertex_idx node : in_mem[proc])
-            {
+            for (vertex_idx node : in_mem[proc]) {
                 mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node);
 
                 std::pair<unsigned, unsigned> prio;
-                if(evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT)
+                if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) {
                     prio = node_used_at_proc_lists[node][proc].front().front();
-                else if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED)
+                } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) {
                     prio = std::make_pair(UINT_MAX - node_last_used_on_proc[node][proc], static_cast<unsigned>(node));
-                else if(evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID)
+                } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) {
                     prio = std::make_pair(static_cast<unsigned>(node), 0);
+                }
 
                 place_in_evictable[node][proc] = evictable[proc].emplace(prio, node).first;
             }
         }
     }
-    
+
     // iterate through schedule
-    for(unsigned superstep=0; superstep<number_of_supersteps; ++superstep)
-    {
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
-            if(compute_steps_for_proc_superstep[proc][superstep].empty())
+    for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            if (compute_steps_for_proc_superstep[proc][superstep].empty()) {
                 continue;
+            }
 
             // before compute phase, evict data in comm phase of previous superstep
             std::set<vertex_idx> new_values_needed;
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
-            {
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node;
                 computed_in_current_superstep[node] = true;
-                for(vertex_idx pred : instance->getComputationalDag().parents(node))
-                    if(!computed_in_current_superstep[pred])
-                    {
+                for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
+                    if (!computed_in_current_superstep[pred]) {
                         non_evictable[proc].insert(pred);
 
-                        if(place_in_evictable[pred][proc] != evictable[proc].end())
-                        {
+                        if (place_in_evictable[pred][proc] != evictable[proc].end()) {
                             evictable[proc].erase(place_in_evictable[pred][proc]);
                             place_in_evictable[pred][proc] = evictable[proc].end();
                         }
 
-                        if(in_mem[proc].find(pred) == in_mem[proc].end())
+                        if (in_mem[proc].find(pred) == in_mem[proc].end()) {
                             new_values_needed.insert(pred);
+                        }
                     }
+                }
             }
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 computed_in_current_superstep[compute_steps_for_proc_superstep[proc][superstep][stepIndex].node] = false;
-            
-            for(vertex_idx node : new_values_needed)
-            {
+            }
+
+            for (vertex_idx node : new_values_needed) {
                 in_mem[proc].insert(node);
                 mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node);
-                nodes_sent_down[proc][superstep-1].push_back(node);
-                if(!in_slow_mem[node])
-                {
+                nodes_sent_down[proc][superstep - 1].push_back(node);
+                if (!in_slow_mem[node]) {
                     in_slow_mem[node] = true;
                     nodes_sent_up[selected_processor[node]][selected_step[node].first].push_back(node);
                 }
             }
 
-            memweight_type first_node_weight = instance->getComputationalDag().vertex_mem_weight(compute_steps_for_proc_superstep[proc][superstep][0].node);
+            memweight_type first_node_weight
+                = instance->getComputationalDag().vertex_mem_weight(compute_steps_for_proc_superstep[proc][superstep][0].node);
 
-            while(mem_used[proc] + first_node_weight > instance->getArchitecture().memoryBound(proc)) // no sliding pebbles for now
+            while (mem_used[proc] + first_node_weight
+                   > instance->getArchitecture().memoryBound(proc))    // no sliding pebbles for now
             {
-                if(evictable[proc].empty())
-                {
-                    std::cout<<"ERROR: Cannot create valid memory movement for these superstep lists."<<std::endl;
+                if (evictable[proc].empty()) {
+                    std::cout << "ERROR: Cannot create valid memory movement for these superstep lists." << std::endl;
                     return;
                 }
                 vertex_idx evicted = (--evictable[proc].end())->second;
@@ -954,26 +1015,22 @@ void PebblingSchedule<Graph_t>::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_
                 mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(evicted);
                 in_mem[proc].erase(evicted);
 
-                nodes_evicted_in_comm[proc][superstep-1].push_back(evicted);
+                nodes_evicted_in_comm[proc][superstep - 1].push_back(evicted);
             }
 
             // indicates if the node will be needed after (and thus cannot be deleted during) this compute phase
-            std::map<vertex_idx, bool> needed_after;            
+            std::map<vertex_idx, bool> needed_after;
 
             // during compute phase
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
-            {
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node;
                 memweight_type node_weight = instance->getComputationalDag().vertex_mem_weight(node);
 
-                if(stepIndex > 0)
-                {
-                    //evict nodes to make space
-                    while(mem_used[proc] + node_weight > instance->getArchitecture().memoryBound(proc))
-                    {
-                        if(evictable[proc].empty())
-                        {
-                            std::cout<<"ERROR: Cannot create valid memory movement for these superstep lists."<<std::endl;
+                if (stepIndex > 0) {
+                    // evict nodes to make space
+                    while (mem_used[proc] + node_weight > instance->getArchitecture().memoryBound(proc)) {
+                        if (evictable[proc].empty()) {
+                            std::cout << "ERROR: Cannot create valid memory movement for these superstep lists." << std::endl;
                             return;
                         }
                         vertex_idx evicted = (--evictable[proc].end())->second;
@@ -981,9 +1038,9 @@ void PebblingSchedule<Graph_t>::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_
                         place_in_evictable[evicted][proc] = evictable[proc].end();
 
                         mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(evicted);
-                        in_mem[proc].erase(evicted);              
+                        in_mem[proc].erase(evicted);
 
-                        compute_steps_for_proc_superstep[proc][superstep][stepIndex-1].nodes_evicted_after.push_back(evicted);
+                        compute_steps_for_proc_superstep[proc][superstep][stepIndex - 1].nodes_evicted_after.push_back(evicted);
                     }
                 }
 
@@ -992,83 +1049,79 @@ void PebblingSchedule<Graph_t>::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_
 
                 non_evictable[proc].insert(node);
 
-                if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) // update usage times for LRU strategy
+                if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED)    // update usage times for LRU strategy
                 {
                     ++total_step_count_on_proc[proc];
                     node_last_used_on_proc[node][proc] = total_step_count_on_proc[proc];
-                    for(vertex_idx pred : instance->getComputationalDag().parents(node))
+                    for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
                         node_last_used_on_proc[pred][proc] = total_step_count_on_proc[proc];
+                    }
                 }
 
-                if(selected_processor[node] == proc && selected_step[node] == std::make_pair(superstep, stepIndex) && must_be_preserved[node])
+                if (selected_processor[node] == proc && selected_step[node] == std::make_pair(superstep, stepIndex)
+                    && must_be_preserved[node]) {
                     needed_after[node] = true;
-                else
+                } else {
                     needed_after[node] = false;
+                }
 
                 node_used_at_proc_lists[node][proc].pop_front();
-                
-                for(vertex_idx pred : instance->getComputationalDag().parents(node))
-                {
+
+                for (vertex_idx pred : instance->getComputationalDag().parents(node)) {
                     node_used_at_proc_lists[pred][proc].front().pop_front();
 
-                    if(needed_after[pred])
+                    if (needed_after[pred]) {
                         continue;
+                    }
 
                     // autoevict
-                    if(node_used_at_proc_lists[pred][proc].front().empty())
-                    {
+                    if (node_used_at_proc_lists[pred][proc].front().empty()) {
                         in_mem[proc].erase(pred);
                         non_evictable[proc].erase(pred);
                         mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(pred);
-                        compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after.push_back(pred);            
-                    }
-                    else if(node_used_at_proc_lists[pred][proc].front().front().first > superstep)
-                    {
+                        compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after.push_back(pred);
+                    } else if (node_used_at_proc_lists[pred][proc].front().front().first > superstep) {
                         non_evictable[proc].erase(pred);
 
                         std::pair<unsigned, unsigned> prio;
-                        if(evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT)
+                        if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) {
                             prio = node_used_at_proc_lists[pred][proc].front().front();
-                        else if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED)
+                        } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) {
                             prio = std::make_pair(UINT_MAX - node_last_used_on_proc[pred][proc], static_cast<unsigned>(pred));
-                        else if(evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID)
+                        } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) {
                             prio = std::make_pair(static_cast<unsigned>(pred), 0);
+                        }
 
                         place_in_evictable[pred][proc] = evictable[proc].emplace(prio, pred).first;
                     }
                 }
-                
             }
 
             // after compute phase
-            for(vertex_idx node : non_evictable[proc])
-            {
-                if(node_used_at_proc_lists[node][proc].front().empty())
-                {
+            for (vertex_idx node : non_evictable[proc]) {
+                if (node_used_at_proc_lists[node][proc].front().empty()) {
                     mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node);
                     in_mem[proc].erase(node);
                     nodes_evicted_in_comm[proc][superstep].push_back(node);
-                    if((instance->getComputationalDag().out_degree(node) == 0 || needs_blue_at_end.find(node) != needs_blue_at_end.end())
-                        && !in_slow_mem[node])
-                    {
+                    if ((instance->getComputationalDag().out_degree(node) == 0
+                         || needs_blue_at_end.find(node) != needs_blue_at_end.end())
+                        && !in_slow_mem[node]) {
                         in_slow_mem[node] = true;
                         nodes_sent_up[proc][superstep].push_back(node);
                     }
-                }
-                else
-                {
+                } else {
                     std::pair<unsigned, unsigned> prio;
-                    if(evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT)
+                    if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) {
                         prio = node_used_at_proc_lists[node][proc].front().front();
-                    else if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED)
+                    } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) {
                         prio = std::make_pair(UINT_MAX - node_last_used_on_proc[node][proc], static_cast<unsigned>(node));
-                    else if(evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID)
+                    } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) {
                         prio = std::make_pair(static_cast<unsigned>(node), 0);
+                    }
 
                     place_in_evictable[node][proc] = evictable[proc].emplace(prio, node).first;
 
-                    if(needs_blue_at_end.find(node) != needs_blue_at_end.end() && !in_slow_mem[node])
-                    {
+                    if (needs_blue_at_end.find(node) != needs_blue_at_end.end() && !in_slow_mem[node]) {
                         in_slow_mem[node] = true;
                         nodes_sent_up[proc][superstep].push_back(node);
                     }
@@ -1077,94 +1130,95 @@ void PebblingSchedule<Graph_t>::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_
             non_evictable[proc].clear();
         }
     }
-
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::ResetToForesight()
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::ResetToForesight() {
     nodes_evicted_in_comm.clear();
-    nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
+    nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
 
     nodes_sent_down.clear();
-    nodes_sent_down.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
+    nodes_sent_down.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
 
     nodes_sent_up.clear();
-    nodes_sent_up.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx> >(number_of_supersteps));
+    nodes_sent_up.resize(instance->numberOfProcessors(), std::vector<std::vector<vertex_idx>>(number_of_supersteps));
 
     SetMemoryMovement(CACHE_EVICTION_STRATEGY::FORESIGHT);
 }
 
-template<typename Graph_t>
-bool PebblingSchedule<Graph_t>::isValid() const
-{
+template <typename Graph_t>
+bool PebblingSchedule<Graph_t>::isValid() const {
     std::vector<memweight_type> mem_used(instance->numberOfProcessors(), 0);
-    std::vector<std::vector<vertex_idx> > in_fast_mem(instance->getComputationalDag().num_vertices(),
-        std::vector<vertex_idx>(instance->numberOfProcessors(), false));
+    std::vector<std::vector<vertex_idx>> in_fast_mem(instance->getComputationalDag().num_vertices(),
+                                                     std::vector<vertex_idx>(instance->numberOfProcessors(), false));
     std::vector<vertex_idx> in_slow_mem(instance->getComputationalDag().num_vertices(), false);
 
-    if(need_to_load_inputs)
-        for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-            if(instance->getComputationalDag().in_degree(node) == 0)
+    if (need_to_load_inputs) {
+        for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+            if (instance->getComputationalDag().in_degree(node) == 0) {
                 in_slow_mem[node] = true;
-    
-    if(!has_red_in_beginning.empty())
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(vertex_idx node : has_red_in_beginning[proc])
-            {
+            }
+        }
+    }
+
+    if (!has_red_in_beginning.empty()) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (vertex_idx node : has_red_in_beginning[proc]) {
                 mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node);
                 in_fast_mem[node][proc] = true;
             }
+        }
+    }
 
-    for(unsigned step=0; step<number_of_supersteps; ++step)
-    {
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             // computation phase
-            for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step])
-            {                
-                if(!instance->isCompatible(computeStep.node, proc))
+            for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) {
+                if (!instance->isCompatible(computeStep.node, proc)) {
                     return false;
+                }
 
-                for(vertex_idx pred : instance->getComputationalDag().parents(computeStep.node))
-                    if(!in_fast_mem[pred][proc])
+                for (vertex_idx pred : instance->getComputationalDag().parents(computeStep.node)) {
+                    if (!in_fast_mem[pred][proc]) {
                         return false;
+                    }
+                }
 
-                if(need_to_load_inputs && instance->getComputationalDag().in_degree(computeStep.node) == 0)
+                if (need_to_load_inputs && instance->getComputationalDag().in_degree(computeStep.node) == 0) {
                     return false;
-                
-                if(!in_fast_mem[computeStep.node][proc])
-                {            
+                }
+
+                if (!in_fast_mem[computeStep.node][proc]) {
                     in_fast_mem[computeStep.node][proc] = true;
                     mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(computeStep.node);
                 }
 
-                if(mem_used[proc] > instance->getArchitecture().memoryBound(proc))
+                if (mem_used[proc] > instance->getArchitecture().memoryBound(proc)) {
                     return false;
+                }
 
-                for(vertex_idx to_remove : computeStep.nodes_evicted_after)
-                {
-                    if(!in_fast_mem[to_remove][proc])
+                for (vertex_idx to_remove : computeStep.nodes_evicted_after) {
+                    if (!in_fast_mem[to_remove][proc]) {
                         return false;
+                    }
 
                     in_fast_mem[to_remove][proc] = false;
                     mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_remove);
-
                 }
             }
 
-            //communication phase - sendup and eviction
-            for(vertex_idx node : nodes_sent_up[proc][step])
-            {
-                if(!in_fast_mem[node][proc])
+            // communication phase - sendup and eviction
+            for (vertex_idx node : nodes_sent_up[proc][step]) {
+                if (!in_fast_mem[node][proc]) {
                     return false;
-                
+                }
+
                 in_slow_mem[node] = true;
             }
-            for(vertex_idx node : nodes_evicted_in_comm[proc][step])
-            {
-                if(!in_fast_mem[node][proc])
+            for (vertex_idx node : nodes_evicted_in_comm[proc][step]) {
+                if (!in_fast_mem[node][proc]) {
                     return false;
+                }
 
                 in_fast_mem[node][proc] = false;
                 mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node);
@@ -1172,115 +1226,120 @@ bool PebblingSchedule<Graph_t>::isValid() const
         }
 
         // communication phase - senddown
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
-            for(vertex_idx node : nodes_sent_down[proc][step])
-            {
-                if(!in_slow_mem[node])
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (vertex_idx node : nodes_sent_down[proc][step]) {
+                if (!in_slow_mem[node]) {
                     return false;
+                }
 
-                if(!in_fast_mem[node][proc])
-                {
+                if (!in_fast_mem[node][proc]) {
                     in_fast_mem[node][proc] = true;
                     mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node);
                 }
             }
         }
 
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            if(mem_used[proc] > instance->getArchitecture().memoryBound(proc))
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            if (mem_used[proc] > instance->getArchitecture().memoryBound(proc)) {
                 return false;
+            }
+        }
     }
 
-    if(needs_blue_at_end.empty())
-    {
-        for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-            if(instance->getComputationalDag().out_degree(node) == 0 && !in_slow_mem[node])
+    if (needs_blue_at_end.empty()) {
+        for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+            if (instance->getComputationalDag().out_degree(node) == 0 && !in_slow_mem[node]) {
                 return false;
-    }
-    else
-    {
-        for(vertex_idx node : needs_blue_at_end)
-            if(!in_slow_mem[node])
+            }
+        }
+    } else {
+        for (vertex_idx node : needs_blue_at_end) {
+            if (!in_slow_mem[node]) {
                 return false;
+            }
+        }
     }
 
     return true;
 }
 
-template<typename Graph_t>
-std::vector<v_memw_t<Graph_t> > PebblingSchedule<Graph_t>::minimumMemoryRequiredPerNodeType(const BspInstance<Graph_t>& instance, const std::set<vertex_idx>& external_sources)
-{
-    std::vector<v_memw_t<Graph_t> > max_needed(instance.getComputationalDag().num_vertex_types(), 0);
-    for(vertex_idx_t<Graph_t> node=0; node<instance.getComputationalDag().num_vertices(); ++node)
-    {
-        if(external_sources.find(node) != external_sources.end())
+template <typename Graph_t>
+std::vector<v_memw_t<Graph_t>> PebblingSchedule<Graph_t>::minimumMemoryRequiredPerNodeType(
+    const BspInstance<Graph_t> &instance, const std::set<vertex_idx> &external_sources) {
+    std::vector<v_memw_t<Graph_t>> max_needed(instance.getComputationalDag().num_vertex_types(), 0);
+    for (vertex_idx_t<Graph_t> node = 0; node < instance.getComputationalDag().num_vertices(); ++node) {
+        if (external_sources.find(node) != external_sources.end()) {
             continue;
+        }
 
         v_memw_t<Graph_t> needed = instance.getComputationalDag().vertex_mem_weight(node);
         const v_type_t<Graph_t> type = instance.getComputationalDag().vertex_type(node);
-        for(vertex_idx_t<Graph_t> pred : instance.getComputationalDag().parents(node))
+        for (vertex_idx_t<Graph_t> pred : instance.getComputationalDag().parents(node)) {
             needed += instance.getComputationalDag().vertex_mem_weight(pred);
-        
-        if(needed>max_needed[type])
-            max_needed[type]=needed;
+        }
+
+        if (needed > max_needed[type]) {
+            max_needed[type] = needed;
+        }
     }
     return max_needed;
 }
 
-template<typename Graph_t>
-std::vector<std::vector<std::vector<vertex_idx_t<Graph_t> > > > PebblingSchedule<Graph_t>::computeTopOrdersDFS(const BspSchedule<Graph_t> &schedule) const
-{
+template <typename Graph_t>
+std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> PebblingSchedule<Graph_t>::computeTopOrdersDFS(
+    const BspSchedule<Graph_t> &schedule) const {
     size_t n = schedule.getInstance().getComputationalDag().num_vertices();
     unsigned num_procs = schedule.getInstance().numberOfProcessors();
     unsigned num_supsteps = schedule.numberOfSupersteps();
 
-    std::vector<std::vector<std::vector<vertex_idx> > > top_orders(num_procs, std::vector<std::vector<vertex_idx> >(num_supsteps));
+    std::vector<std::vector<std::vector<vertex_idx>>> top_orders(num_procs, std::vector<std::vector<vertex_idx>>(num_supsteps));
 
-    std::vector<std::vector<std::deque<vertex_idx> > > Q(num_procs, std::vector<std::deque<vertex_idx> >(num_supsteps));
-    std::vector<std::vector<std::vector<vertex_idx> > > nodesUpdated(num_procs, std::vector<std::vector<vertex_idx> >(num_supsteps));
+    std::vector<std::vector<std::deque<vertex_idx>>> Q(num_procs, std::vector<std::deque<vertex_idx>>(num_supsteps));
+    std::vector<std::vector<std::vector<vertex_idx>>> nodesUpdated(num_procs, std::vector<std::vector<vertex_idx>>(num_supsteps));
     std::vector<unsigned> nr_pred(n);
     std::vector<unsigned> pred_done(n, 0);
-    for(vertex_idx node=0; node<n; ++node)
-    {
+    for (vertex_idx node = 0; node < n; ++node) {
         unsigned predecessors = 0;
-        for(vertex_idx pred : schedule.getInstance().getComputationalDag().parents(node))
-            if(external_sources.find(pred) == external_sources.end()
-            && schedule.assignedProcessor(node)==schedule.assignedProcessor(pred)
-            && schedule.assignedSuperstep(node)==schedule.assignedSuperstep(pred))
+        for (vertex_idx pred : schedule.getInstance().getComputationalDag().parents(node)) {
+            if (external_sources.find(pred) == external_sources.end()
+                && schedule.assignedProcessor(node) == schedule.assignedProcessor(pred)
+                && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(pred)) {
                 ++predecessors;
+            }
+        }
         nr_pred[node] = predecessors;
-        if(predecessors==0 && external_sources.find(node) == external_sources.end())
+        if (predecessors == 0 && external_sources.find(node) == external_sources.end()) {
             Q[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)].push_back(node);
+        }
     }
-    for(unsigned proc=0; proc<num_procs; ++proc)
-        for(unsigned step=0; step<num_supsteps; ++step)
-        {
-            while(!Q[proc][step].empty())
-            {
+    for (unsigned proc = 0; proc < num_procs; ++proc) {
+        for (unsigned step = 0; step < num_supsteps; ++step) {
+            while (!Q[proc][step].empty()) {
                 vertex_idx node = Q[proc][step].front();
                 Q[proc][step].pop_front();
                 top_orders[proc][step].push_back(node);
-                for(vertex_idx succ : schedule.getInstance().getComputationalDag().children(node))
-                    if(schedule.assignedProcessor(node)==schedule.assignedProcessor(succ)
-                    && schedule.assignedSuperstep(node)==schedule.assignedSuperstep(succ))
-                    {
+                for (vertex_idx succ : schedule.getInstance().getComputationalDag().children(node)) {
+                    if (schedule.assignedProcessor(node) == schedule.assignedProcessor(succ)
+                        && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(succ)) {
                         ++pred_done[succ];
-                        if(pred_done[succ]==nr_pred[succ])
+                        if (pred_done[succ] == nr_pred[succ]) {
                             Q[proc][step].push_front(succ);
+                        }
                     }
+                }
             }
         }
+    }
 
     return top_orders;
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::getDataForMultiprocessorPebbling(std::vector<std::vector<std::vector<vertex_idx> > >& computeSteps,
-                                            std::vector<std::vector<std::vector<vertex_idx> > >& sendUpSteps,
-                                            std::vector<std::vector<std::vector<vertex_idx> > >& sendDownSteps,
-                                            std::vector<std::vector<std::vector<vertex_idx> > >& nodesEvictedAfterStep) const
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::getDataForMultiprocessorPebbling(
+    std::vector<std::vector<std::vector<vertex_idx>>> &computeSteps,
+    std::vector<std::vector<std::vector<vertex_idx>>> &sendUpSteps,
+    std::vector<std::vector<std::vector<vertex_idx>>> &sendDownSteps,
+    std::vector<std::vector<std::vector<vertex_idx>>> &nodesEvictedAfterStep) const {
     computeSteps.clear();
     computeSteps.resize(instance->numberOfProcessors());
     sendUpSteps.clear();
@@ -1291,48 +1350,49 @@ void PebblingSchedule<Graph_t>::getDataForMultiprocessorPebbling(std::vector<std
     nodesEvictedAfterStep.resize(instance->numberOfProcessors());
 
     std::vector<memweight_type> mem_used(instance->numberOfProcessors(), 0);
-    std::vector<std::set<vertex_idx> > in_mem(instance->numberOfProcessors());
-    if(!has_red_in_beginning.empty())
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(vertex_idx node : has_red_in_beginning[proc])
-            {
+    std::vector<std::set<vertex_idx>> in_mem(instance->numberOfProcessors());
+    if (!has_red_in_beginning.empty()) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (vertex_idx node : has_red_in_beginning[proc]) {
                 in_mem[proc].insert(node);
                 mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node);
             }
+        }
+    }
 
     unsigned step = 0;
 
-    for(unsigned superstep=0; superstep<number_of_supersteps; ++superstep)
-    {
+    for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) {
         std::vector<unsigned> step_on_proc(instance->numberOfProcessors(), step);
         bool any_compute = false;
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            if(!compute_steps_for_proc_superstep[proc][superstep].empty())
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            if (!compute_steps_for_proc_superstep[proc][superstep].empty()) {
                 any_compute = true;
-        
-        if(any_compute)
-            for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            {
+            }
+        }
+
+        if (any_compute) {
+            for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
                 computeSteps[proc].emplace_back();
                 sendUpSteps[proc].emplace_back();
                 sendDownSteps[proc].emplace_back();
                 nodesEvictedAfterStep[proc].emplace_back();
             }
+        }
 
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             std::vector<vertex_idx> evict_list;
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex)
-            {
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) {
                 vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node;
-                if(mem_used[proc] + instance->getComputationalDag().vertex_mem_weight(node) > instance->getArchitecture().memoryBound(proc))
-                {
-                    //open new step
+                if (mem_used[proc] + instance->getComputationalDag().vertex_mem_weight(node)
+                    > instance->getArchitecture().memoryBound(proc)) {
+                    // open new step
                     nodesEvictedAfterStep[proc][step_on_proc[proc]] = evict_list;
                     ++step_on_proc[proc];
-                    for(vertex_idx to_evict : evict_list)
+                    for (vertex_idx to_evict : evict_list) {
                         mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict);
-                    
+                    }
+
                     evict_list.clear();
                     computeSteps[proc].emplace_back();
                     sendUpSteps[proc].emplace_back();
@@ -1342,193 +1402,192 @@ void PebblingSchedule<Graph_t>::getDataForMultiprocessorPebbling(std::vector<std
 
                 computeSteps[proc][step_on_proc[proc]].emplace_back(node);
                 mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node);
-                for(vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after)
+                for (vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after) {
                     evict_list.emplace_back(to_evict);
-                
+                }
             }
 
-            if(!evict_list.empty())
-            {
+            if (!evict_list.empty()) {
                 nodesEvictedAfterStep[proc][step_on_proc[proc]] = evict_list;
-                for(vertex_idx to_evict : evict_list)
+                for (vertex_idx to_evict : evict_list) {
                     mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict);
+                }
             }
-            
         }
-        if(any_compute)
-            for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
+        if (any_compute) {
+            for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
                 ++step_on_proc[proc];
+            }
+        }
 
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             step = std::max(step, step_on_proc[proc]);
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(; step_on_proc[proc]<step; ++step_on_proc[proc])
-            {
+        }
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (; step_on_proc[proc] < step; ++step_on_proc[proc]) {
                 computeSteps[proc].emplace_back();
                 sendUpSteps[proc].emplace_back();
                 sendDownSteps[proc].emplace_back();
                 nodesEvictedAfterStep[proc].emplace_back();
             }
-        
+        }
+
         bool any_send_up = false;
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            if(!nodes_sent_up[proc][superstep].empty() || !nodes_evicted_in_comm[proc][superstep].empty())
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            if (!nodes_sent_up[proc][superstep].empty() || !nodes_evicted_in_comm[proc][superstep].empty()) {
                 any_send_up = true;
-        
-        if(any_send_up)
-        {
-            for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            {
+            }
+        }
+
+        if (any_send_up) {
+            for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
                 computeSteps[proc].emplace_back();
                 sendUpSteps[proc].emplace_back(nodes_sent_up[proc][superstep]);
                 sendDownSteps[proc].emplace_back();
                 nodesEvictedAfterStep[proc].emplace_back(nodes_evicted_in_comm[proc][superstep]);
-                for(vertex_idx to_evict : nodes_evicted_in_comm[proc][superstep])
+                for (vertex_idx to_evict : nodes_evicted_in_comm[proc][superstep]) {
                     mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict);
+                }
                 ++step_on_proc[proc];
             }
             ++step;
         }
 
         bool any_send_down = false;
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            if(!nodes_sent_down[proc][superstep].empty())
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            if (!nodes_sent_down[proc][superstep].empty()) {
                 any_send_down = true;
+            }
+        }
 
-        if(any_send_down)
-        {
-            for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            {
+        if (any_send_down) {
+            for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
                 computeSteps[proc].emplace_back();
                 sendUpSteps[proc].emplace_back();
                 sendDownSteps[proc].emplace_back(nodes_sent_down[proc][superstep]);
-                for(vertex_idx send_down : nodes_sent_down[proc][superstep])
+                for (vertex_idx send_down : nodes_sent_down[proc][superstep]) {
                     mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(send_down);
+                }
                 nodesEvictedAfterStep[proc].emplace_back();
                 ++step_on_proc[proc];
             }
             ++step;
         }
-
     }
 }
 
-template<typename Graph_t>
-std::vector<std::set<vertex_idx_t<Graph_t> > > PebblingSchedule<Graph_t>::getMemContentAtEnd() const
-{
-    std::vector<std::set<vertex_idx> > mem_content(instance->numberOfProcessors());
-    if(!has_red_in_beginning.empty())
+template <typename Graph_t>
+std::vector<std::set<vertex_idx_t<Graph_t>>> PebblingSchedule<Graph_t>::getMemContentAtEnd() const {
+    std::vector<std::set<vertex_idx>> mem_content(instance->numberOfProcessors());
+    if (!has_red_in_beginning.empty()) {
         mem_content = has_red_in_beginning;
+    }
 
-    for(unsigned step=0; step<number_of_supersteps; ++step)
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             // computation phase
-            for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step])
-            {
+            for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) {
                 mem_content[proc].insert(computeStep.node);
-                for(vertex_idx to_remove : computeStep.nodes_evicted_after)
+                for (vertex_idx to_remove : computeStep.nodes_evicted_after) {
                     mem_content[proc].erase(to_remove);
+                }
             }
 
-            //communication phase - eviction
-            for(vertex_idx node : nodes_evicted_in_comm[proc][step])
+            // communication phase - eviction
+            for (vertex_idx node : nodes_evicted_in_comm[proc][step]) {
                 mem_content[proc].erase(node);
+            }
 
             // communication phase - senddown
-            for(vertex_idx node : nodes_sent_down[proc][step])
+            for (vertex_idx node : nodes_sent_down[proc][step]) {
                 mem_content[proc].insert(node);
+            }
         }
+    }
 
     return mem_content;
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::removeEvictStepsFromEnd()
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::removeEvictStepsFromEnd() {
     std::vector<memweight_type> mem_used(instance->numberOfProcessors(), 0);
     std::vector<memweight_type> bottleneck(instance->numberOfProcessors(), 0);
-    std::vector<std::set<vertex_idx> > fast_mem_end = getMemContentAtEnd();
-    for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-    {
-        for(vertex_idx node : fast_mem_end[proc])
+    std::vector<std::set<vertex_idx>> fast_mem_end = getMemContentAtEnd();
+    for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+        for (vertex_idx node : fast_mem_end[proc]) {
             mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node);
+        }
 
         bottleneck[proc] = instance->getArchitecture().memoryBound(proc) - mem_used[proc];
     }
 
-    for(unsigned step=number_of_supersteps; step>0;)
-    {
+    for (unsigned step = number_of_supersteps; step > 0;) {
         --step;
 
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             // communication phase - senddown
-            for(vertex_idx node : nodes_sent_down[proc][step])
+            for (vertex_idx node : nodes_sent_down[proc][step]) {
                 mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node);
+            }
 
-            //communication phase - eviction
+            // communication phase - eviction
             std::vector<vertex_idx> remaining;
-            for(vertex_idx node : nodes_evicted_in_comm[proc][step])
-            {
+            for (vertex_idx node : nodes_evicted_in_comm[proc][step]) {
                 mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node);
-                if(instance->getComputationalDag().vertex_mem_weight(node) <= bottleneck[proc]
-                    && fast_mem_end[proc].find(node) == fast_mem_end[proc].end())
-                {
+                if (instance->getComputationalDag().vertex_mem_weight(node) <= bottleneck[proc]
+                    && fast_mem_end[proc].find(node) == fast_mem_end[proc].end()) {
                     fast_mem_end[proc].insert(node);
                     bottleneck[proc] -= instance->getComputationalDag().vertex_mem_weight(node);
-                }
-                else
+                } else {
                     remaining.push_back(node);
+                }
             }
             nodes_evicted_in_comm[proc][step] = remaining;
             bottleneck[proc] = std::min(bottleneck[proc], instance->getArchitecture().memoryBound(proc) - mem_used[proc]);
 
             // computation phase
-            for(unsigned stepIndex = static_cast<unsigned>(compute_steps_for_proc_superstep[proc][step].size()); stepIndex > 0;)
-            {
+            for (unsigned stepIndex = static_cast<unsigned>(compute_steps_for_proc_superstep[proc][step].size()); stepIndex > 0;) {
                 --stepIndex;
                 auto &computeStep = compute_steps_for_proc_superstep[proc][step][stepIndex];
 
                 std::vector<vertex_idx> remaining_2;
-                for(vertex_idx to_remove : computeStep.nodes_evicted_after)
-                {
+                for (vertex_idx to_remove : computeStep.nodes_evicted_after) {
                     mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(to_remove);
-                    if( instance->getComputationalDag().vertex_mem_weight(to_remove) <= bottleneck[proc]
-                        && fast_mem_end[proc].find(to_remove) == fast_mem_end[proc].end())
-                    {
+                    if (instance->getComputationalDag().vertex_mem_weight(to_remove) <= bottleneck[proc]
+                        && fast_mem_end[proc].find(to_remove) == fast_mem_end[proc].end()) {
                         fast_mem_end[proc].insert(to_remove);
                         bottleneck[proc] -= instance->getComputationalDag().vertex_mem_weight(to_remove);
-                    }
-                    else
+                    } else {
                         remaining_2.push_back(to_remove);
+                    }
                 }
                 computeStep.nodes_evicted_after = remaining_2;
                 bottleneck[proc] = std::min(bottleneck[proc], instance->getArchitecture().memoryBound(proc) - mem_used[proc]);
-                
+
                 mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(computeStep.node);
             }
         }
     }
 
-    if(!isValid())
-        std::cout<<"ERROR: eviction removal process created an invalid schedule."<<std::endl;
+    if (!isValid()) {
+        std::cout << "ERROR: eviction removal process created an invalid schedule." << std::endl;
+    }
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::CreateFromPartialPebblings(const BspInstance<Graph_t> &bsp_instance, 
-                                                    const std::vector<PebblingSchedule<Graph_t> >& pebblings,
-                                                    const std::vector<std::set<unsigned> >& processors_to_parts,
-                                                    const std::vector<std::map<vertex_idx, vertex_idx> >& original_node_id,
-                                                    const std::vector<std::map<unsigned, unsigned> >& original_proc_id,
-                                                    const std::vector<std::vector<std::set<vertex_idx> > >& has_reds_in_beginning)
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::CreateFromPartialPebblings(
+    const BspInstance<Graph_t> &bsp_instance,
+    const std::vector<PebblingSchedule<Graph_t>> &pebblings,
+    const std::vector<std::set<unsigned>> &processors_to_parts,
+    const std::vector<std::map<vertex_idx, vertex_idx>> &original_node_id,
+    const std::vector<std::map<unsigned, unsigned>> &original_proc_id,
+    const std::vector<std::vector<std::set<vertex_idx>>> &has_reds_in_beginning) {
     instance = &bsp_instance;
 
     unsigned nr_parts = static_cast<unsigned>(processors_to_parts.size());
 
-    std::vector<std::set<vertex_idx> > in_mem(instance->numberOfProcessors());
-    std::vector<std::tuple<vertex_idx, unsigned, unsigned> > force_evicts;
+    std::vector<std::set<vertex_idx>> in_mem(instance->numberOfProcessors());
+    std::vector<std::tuple<vertex_idx, unsigned, unsigned>> force_evicts;
 
     compute_steps_for_proc_superstep.clear();
     nodes_sent_up.clear();
@@ -1542,50 +1601,54 @@ void PebblingSchedule<Graph_t>::CreateFromPartialPebblings(const BspInstance<Gra
     std::vector<unsigned> supstep_idx(instance->numberOfProcessors(), 0);
 
     std::vector<unsigned> gets_blue_in_superstep(instance->numberOfVertices(), UINT_MAX);
-    for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-        if(instance->getComputationalDag().in_degree(node) == 0)
+    for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+        if (instance->getComputationalDag().in_degree(node) == 0) {
             gets_blue_in_superstep[node] = 0;
+        }
+    }
 
-    for(unsigned part = 0; part < nr_parts; ++part)
-    {
+    for (unsigned part = 0; part < nr_parts; ++part) {
         unsigned starting_step_index = 0;
 
         // find dependencies on previous subschedules
-        for(vertex_idx node = 0; node < pebblings[part].instance->numberOfVertices(); ++node)
-            if(pebblings[part].instance->getComputationalDag().in_degree(node) == 0)
+        for (vertex_idx node = 0; node < pebblings[part].instance->numberOfVertices(); ++node) {
+            if (pebblings[part].instance->getComputationalDag().in_degree(node) == 0) {
                 starting_step_index = std::max(starting_step_index, gets_blue_in_superstep[original_node_id[part].at(node)]);
+            }
+        }
 
         // sync starting points for the subset of processors
-        for(unsigned proc : processors_to_parts[part])
+        for (unsigned proc : processors_to_parts[part]) {
             starting_step_index = std::max(starting_step_index, supstep_idx[proc]);
-        for(unsigned proc : processors_to_parts[part])
-            while(supstep_idx[proc] < starting_step_index)
-            {
+        }
+        for (unsigned proc : processors_to_parts[part]) {
+            while (supstep_idx[proc] < starting_step_index) {
                 compute_steps_for_proc_superstep[proc].emplace_back();
                 nodes_sent_up[proc].emplace_back();
                 nodes_sent_down[proc].emplace_back();
                 nodes_evicted_in_comm[proc].emplace_back();
                 ++supstep_idx[proc];
             }
-        
+        }
+
         // check and update according to initial states of red pebbles
-        for(unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc)
-        {
+        for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) {
             unsigned proc_id = original_proc_id[part].at(proc);
             std::set<vertex_idx> needed_in_red, add_before, remove_before;
-            for(vertex_idx node : has_reds_in_beginning[part][proc])
-            {
+            for (vertex_idx node : has_reds_in_beginning[part][proc]) {
                 vertex_idx node_id = original_node_id[part].at(node);
                 needed_in_red.insert(node_id);
-                if(in_mem[proc_id].find(node_id) == in_mem[proc_id].end())
+                if (in_mem[proc_id].find(node_id) == in_mem[proc_id].end()) {
                     add_before.insert(node_id);
+                }
             }
-            for(vertex_idx node : in_mem[proc_id])
-                if(needed_in_red.find(node) == needed_in_red.end())
+            for (vertex_idx node : in_mem[proc_id]) {
+                if (needed_in_red.find(node) == needed_in_red.end()) {
                     remove_before.insert(node);
+                }
+            }
 
-            if((!add_before.empty() || !remove_before.empty()) && supstep_idx[proc_id] == 0)
-            {
+            if ((!add_before.empty() || !remove_before.empty()) && supstep_idx[proc_id] == 0) {
                 // this code is added just in case - this shouldn't happen in normal schedules
                 compute_steps_for_proc_superstep[proc_id].emplace_back();
                 nodes_sent_up[proc_id].emplace_back();
@@ -1594,22 +1657,19 @@ void PebblingSchedule<Graph_t>::CreateFromPartialPebblings(const BspInstance<Gra
                 ++supstep_idx[proc_id];
             }
 
-            for(vertex_idx node : add_before)
-            {
+            for (vertex_idx node : add_before) {
                 in_mem[proc_id].insert(node);
                 nodes_sent_down[proc_id].back().push_back(node);
             }
-            for(vertex_idx node : remove_before)
-            {
+            for (vertex_idx node : remove_before) {
                 in_mem[proc_id].erase(node);
                 nodes_evicted_in_comm[proc_id].back().push_back(node);
-                force_evicts.push_back(std::make_tuple(node, proc_id, nodes_evicted_in_comm[proc_id].size()-1));
-            } 
+                force_evicts.push_back(std::make_tuple(node, proc_id, nodes_evicted_in_comm[proc_id].size() - 1));
+            }
         }
-        
-        for(unsigned supstep = 0; supstep < pebblings[part].numberOfSupersteps(); ++supstep)
-            for(unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc)
-            {
+
+        for (unsigned supstep = 0; supstep < pebblings[part].numberOfSupersteps(); ++supstep) {
+            for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) {
                 unsigned proc_id = original_proc_id[part].at(proc);
                 compute_steps_for_proc_superstep[proc_id].emplace_back();
                 nodes_sent_up[proc_id].emplace_back();
@@ -1617,52 +1677,50 @@ void PebblingSchedule<Graph_t>::CreateFromPartialPebblings(const BspInstance<Gra
                 nodes_evicted_in_comm[proc_id].emplace_back();
 
                 // copy schedule with translated indeces
-                for(const compute_step& computeStep : pebblings[part].GetComputeStepsForProcSuperstep(proc, supstep))
-                {
+                for (const compute_step &computeStep : pebblings[part].GetComputeStepsForProcSuperstep(proc, supstep)) {
                     compute_steps_for_proc_superstep[proc_id].back().emplace_back();
                     compute_steps_for_proc_superstep[proc_id].back().back().node = original_node_id[part].at(computeStep.node);
                     in_mem[proc_id].insert(original_node_id[part].at(computeStep.node));
-                    
-                    for(vertex_idx local_id : computeStep.nodes_evicted_after)
-                    {
-                        compute_steps_for_proc_superstep[proc_id].back().back().nodes_evicted_after.push_back(original_node_id[part].at(local_id));
+
+                    for (vertex_idx local_id : computeStep.nodes_evicted_after) {
+                        compute_steps_for_proc_superstep[proc_id].back().back().nodes_evicted_after.push_back(
+                            original_node_id[part].at(local_id));
                         in_mem[proc_id].erase(original_node_id[part].at(local_id));
+                    }
                 }
-                }
-                for(vertex_idx node : pebblings[part].GetNodesSentUp(proc, supstep))
-                {
+                for (vertex_idx node : pebblings[part].GetNodesSentUp(proc, supstep)) {
                     vertex_idx node_id = original_node_id[part].at(node);
                     nodes_sent_up[proc_id].back().push_back(node_id);
                     gets_blue_in_superstep[node_id] = std::min(gets_blue_in_superstep[node_id], supstep_idx[proc_id]);
                 }
-                for(vertex_idx node : pebblings[part].GetNodesEvictedInComm(proc, supstep))
-                {
+                for (vertex_idx node : pebblings[part].GetNodesEvictedInComm(proc, supstep)) {
                     nodes_evicted_in_comm[proc_id].back().push_back(original_node_id[part].at(node));
                     in_mem[proc_id].erase(original_node_id[part].at(node));
                 }
-                for(vertex_idx node : pebblings[part].GetNodesSentDown(proc, supstep))
-                {
+                for (vertex_idx node : pebblings[part].GetNodesSentDown(proc, supstep)) {
                     nodes_sent_down[proc_id].back().push_back(original_node_id[part].at(node));
                     in_mem[proc_id].insert(original_node_id[part].at(node));
                 }
 
                 ++supstep_idx[proc_id];
-            }    
+            }
+        }
     }
 
     // padding supersteps in the end
     unsigned max_step_index = 0;
-    for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc)
+    for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
         max_step_index = std::max(max_step_index, supstep_idx[proc]);
-    for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc)
-        while(supstep_idx[proc] < max_step_index)
-        {
+    }
+    for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+        while (supstep_idx[proc] < max_step_index) {
             compute_steps_for_proc_superstep[proc].emplace_back();
             nodes_sent_up[proc].emplace_back();
             nodes_sent_down[proc].emplace_back();
             nodes_evicted_in_comm[proc].emplace_back();
             ++supstep_idx[proc];
         }
+    }
     number_of_supersteps = max_step_index;
     need_to_load_inputs = true;
 
@@ -1670,12 +1728,11 @@ void PebblingSchedule<Graph_t>::CreateFromPartialPebblings(const BspInstance<Gra
     TryToMergeSupersteps();
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::FixForceEvicts(const std::vector<std::tuple<vertex_idx, unsigned, unsigned> > force_evict_node_proc_step)
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::FixForceEvicts(
+    const std::vector<std::tuple<vertex_idx, unsigned, unsigned>> force_evict_node_proc_step) {
     // Some values were evicted only because they weren't present in the next part - see if we can undo those evictions
-    for(auto force_evict : force_evict_node_proc_step)
-    {
+    for (auto force_evict : force_evict_node_proc_step) {
         vertex_idx node = std::get<0>(force_evict);
         unsigned proc = std::get<1>(force_evict);
         unsigned superstep = std::get<2>(force_evict);
@@ -1684,79 +1741,82 @@ void PebblingSchedule<Graph_t>::FixForceEvicts(const std::vector<std::tuple<vert
         bool next_in_comm = false;
         std::pair<unsigned, unsigned> where;
 
-        for(unsigned find_supstep = superstep + 1; find_supstep < numberOfSupersteps(); ++find_supstep)
-        {
-            for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][find_supstep].size(); ++stepIndex)
-                if(compute_steps_for_proc_superstep[proc][find_supstep][stepIndex].node == node)
-                {
+        for (unsigned find_supstep = superstep + 1; find_supstep < numberOfSupersteps(); ++find_supstep) {
+            for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][find_supstep].size(); ++stepIndex) {
+                if (compute_steps_for_proc_superstep[proc][find_supstep][stepIndex].node == node) {
                     next_in_comp = true;
                     where = std::make_pair(find_supstep, stepIndex);
                     break;
                 }
-            if(next_in_comp)
+            }
+            if (next_in_comp) {
                 break;
-            for(vertex_idx send_down : nodes_sent_down[proc][find_supstep])
-                if(send_down == node)
-                {
+            }
+            for (vertex_idx send_down : nodes_sent_down[proc][find_supstep]) {
+                if (send_down == node) {
                     next_in_comm = true;
                     where = std::make_pair(find_supstep, 0);
                     break;
                 }
-            if(next_in_comm)
+            }
+            if (next_in_comm) {
                 break;
+            }
         }
 
         // check new schedule for validity
-        if(!next_in_comp && !next_in_comm)
+        if (!next_in_comp && !next_in_comm) {
             continue;
-        
+        }
+
         PebblingSchedule<Graph_t> test_schedule = *this;
-        for(auto itr = test_schedule.nodes_evicted_in_comm[proc][superstep].begin(); itr != test_schedule.nodes_evicted_in_comm[proc][superstep].end(); ++itr)
-            if(*itr == node)
-            {
+        for (auto itr = test_schedule.nodes_evicted_in_comm[proc][superstep].begin();
+             itr != test_schedule.nodes_evicted_in_comm[proc][superstep].end();
+             ++itr) {
+            if (*itr == node) {
                 test_schedule.nodes_evicted_in_comm[proc][superstep].erase(itr);
                 break;
             }
+        }
 
-        if(next_in_comp)
-        {            
-            for(auto itr = test_schedule.compute_steps_for_proc_superstep[proc][where.first].begin(); itr != test_schedule.compute_steps_for_proc_superstep[proc][where.first].end(); ++itr)
-                if(itr->node == node)
-                {
-                    if(where.second > 0)
-                    {
+        if (next_in_comp) {
+            for (auto itr = test_schedule.compute_steps_for_proc_superstep[proc][where.first].begin();
+                 itr != test_schedule.compute_steps_for_proc_superstep[proc][where.first].end();
+                 ++itr) {
+                if (itr->node == node) {
+                    if (where.second > 0) {
                         auto previous_step = itr;
                         --previous_step;
-                        for(vertex_idx to_evict : itr->nodes_evicted_after)
+                        for (vertex_idx to_evict : itr->nodes_evicted_after) {
                             previous_step->nodes_evicted_after.push_back(to_evict);
-                    }
-                    else
-                    {
-                        for(vertex_idx to_evict : itr->nodes_evicted_after)
-                            test_schedule.nodes_evicted_in_comm[proc][where.first-1].push_back(to_evict);
+                        }
+                    } else {
+                        for (vertex_idx to_evict : itr->nodes_evicted_after) {
+                            test_schedule.nodes_evicted_in_comm[proc][where.first - 1].push_back(to_evict);
+                        }
                     }
                     test_schedule.compute_steps_for_proc_superstep[proc][where.first].erase(itr);
                     break;
                 }
+            }
 
-            if(test_schedule.isValid())
-            {
+            if (test_schedule.isValid()) {
                 nodes_evicted_in_comm[proc][superstep] = test_schedule.nodes_evicted_in_comm[proc][superstep];
-                compute_steps_for_proc_superstep[proc][where.first] = test_schedule.compute_steps_for_proc_superstep[proc][where.first];
-                nodes_evicted_in_comm[proc][where.first-1] = test_schedule.nodes_evicted_in_comm[proc][where.first-1];
+                compute_steps_for_proc_superstep[proc][where.first]
+                    = test_schedule.compute_steps_for_proc_superstep[proc][where.first];
+                nodes_evicted_in_comm[proc][where.first - 1] = test_schedule.nodes_evicted_in_comm[proc][where.first - 1];
             }
-        }
-        else if(next_in_comm)
-        {
-            for(auto itr = test_schedule.nodes_sent_down[proc][where.first].begin(); itr != test_schedule.nodes_sent_down[proc][where.first].end(); ++itr)
-                if(*itr == node)
-                {
+        } else if (next_in_comm) {
+            for (auto itr = test_schedule.nodes_sent_down[proc][where.first].begin();
+                 itr != test_schedule.nodes_sent_down[proc][where.first].end();
+                 ++itr) {
+                if (*itr == node) {
                     test_schedule.nodes_sent_down[proc][where.first].erase(itr);
                     break;
                 }
-                
-            if(test_schedule.isValid())
-            {
+            }
+
+            if (test_schedule.isValid()) {
                 nodes_evicted_in_comm[proc][superstep] = test_schedule.nodes_evicted_in_comm[proc][superstep];
                 nodes_sent_down[proc][where.first] = test_schedule.nodes_sent_down[proc][where.first];
             }
@@ -1764,61 +1824,54 @@ void PebblingSchedule<Graph_t>::FixForceEvicts(const std::vector<std::tuple<vert
     }
 }
 
-template<typename Graph_t>
-void PebblingSchedule<Graph_t>::TryToMergeSupersteps()
-{
+template <typename Graph_t>
+void PebblingSchedule<Graph_t>::TryToMergeSupersteps() {
     std::vector<bool> is_removed(number_of_supersteps, false);
 
-    for(unsigned step = 1; step < number_of_supersteps; ++step)
-    {
-        if(is_removed[step])
+    for (unsigned step = 1; step < number_of_supersteps; ++step) {
+        if (is_removed[step]) {
             continue;
+        }
 
         unsigned prev_step = step - 1;
-        while(is_removed[prev_step])
+        while (is_removed[prev_step]) {
             --prev_step;
+        }
 
-        for(unsigned next_step = step + 1; next_step < number_of_supersteps; ++next_step)
-        {
+        for (unsigned next_step = step + 1; next_step < number_of_supersteps; ++next_step) {
             // Try to merge step and next_step
             PebblingSchedule test_schedule = *this;
 
-            for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc)
-            {
+            for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
                 test_schedule.compute_steps_for_proc_superstep[proc][step].insert(
-                        test_schedule.compute_steps_for_proc_superstep[proc][step].end(),
-                        test_schedule.compute_steps_for_proc_superstep[proc][next_step].begin(),
-                        test_schedule.compute_steps_for_proc_superstep[proc][next_step].end());
+                    test_schedule.compute_steps_for_proc_superstep[proc][step].end(),
+                    test_schedule.compute_steps_for_proc_superstep[proc][next_step].begin(),
+                    test_schedule.compute_steps_for_proc_superstep[proc][next_step].end());
                 test_schedule.compute_steps_for_proc_superstep[proc][next_step].clear();
-                
-                test_schedule.nodes_sent_up[proc][step].insert(
-                        test_schedule.nodes_sent_up[proc][step].end(),
-                        test_schedule.nodes_sent_up[proc][next_step].begin(),
-                        test_schedule.nodes_sent_up[proc][next_step].end());
+
+                test_schedule.nodes_sent_up[proc][step].insert(test_schedule.nodes_sent_up[proc][step].end(),
+                                                               test_schedule.nodes_sent_up[proc][next_step].begin(),
+                                                               test_schedule.nodes_sent_up[proc][next_step].end());
                 test_schedule.nodes_sent_up[proc][next_step].clear();
 
-                test_schedule.nodes_sent_down[proc][prev_step].insert(
-                        test_schedule.nodes_sent_down[proc][prev_step].end(),
-                        test_schedule.nodes_sent_down[proc][step].begin(),
-                        test_schedule.nodes_sent_down[proc][step].end());
+                test_schedule.nodes_sent_down[proc][prev_step].insert(test_schedule.nodes_sent_down[proc][prev_step].end(),
+                                                                      test_schedule.nodes_sent_down[proc][step].begin(),
+                                                                      test_schedule.nodes_sent_down[proc][step].end());
                 test_schedule.nodes_sent_down[proc][step].clear();
 
                 test_schedule.nodes_evicted_in_comm[proc][step].insert(
-                        test_schedule.nodes_evicted_in_comm[proc][step].end(),
-                        test_schedule.nodes_evicted_in_comm[proc][next_step].begin(),
-                        test_schedule.nodes_evicted_in_comm[proc][next_step].end());
+                    test_schedule.nodes_evicted_in_comm[proc][step].end(),
+                    test_schedule.nodes_evicted_in_comm[proc][next_step].begin(),
+                    test_schedule.nodes_evicted_in_comm[proc][next_step].end());
                 test_schedule.nodes_evicted_in_comm[proc][next_step].clear();
-
             }
 
-            if(test_schedule.isValid())
-            {
+            if (test_schedule.isValid()) {
                 is_removed[next_step] = true;
-                for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc)
-                {
+                for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
                     compute_steps_for_proc_superstep[proc][step] = test_schedule.compute_steps_for_proc_superstep[proc][step];
                     compute_steps_for_proc_superstep[proc][next_step].clear();
-                    
+
                     nodes_sent_up[proc][step] = test_schedule.nodes_sent_up[proc][step];
                     nodes_sent_up[proc][next_step].clear();
 
@@ -1829,31 +1882,33 @@ void PebblingSchedule<Graph_t>::TryToMergeSupersteps()
                     nodes_evicted_in_comm[proc][step] = test_schedule.nodes_evicted_in_comm[proc][step];
                     nodes_evicted_in_comm[proc][next_step].clear();
                 }
-            }
-            else
+            } else {
                 break;
+            }
         }
     }
 
     unsigned new_nr_supersteps = 0;
-    for(unsigned step = 0; step < number_of_supersteps; ++step)
-        if(!is_removed[step])
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
+        if (!is_removed[step]) {
             ++new_nr_supersteps;
-    
-    if(new_nr_supersteps == number_of_supersteps)
+        }
+    }
+
+    if (new_nr_supersteps == number_of_supersteps) {
         return;
+    }
 
     PebblingSchedule<Graph_t> shortened_schedule = *this;
     shortened_schedule.updateNumberOfSupersteps(new_nr_supersteps);
 
     unsigned new_index = 0;
-    for(unsigned step = 0; step < number_of_supersteps; ++step)
-    {
-        if(is_removed[step])
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
+        if (is_removed[step]) {
             continue;
+        }
 
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             shortened_schedule.compute_steps_for_proc_superstep[proc][new_index] = compute_steps_for_proc_superstep[proc][step];
             shortened_schedule.nodes_sent_up[proc][new_index] = nodes_sent_up[proc][step];
             shortened_schedule.nodes_sent_down[proc][new_index] = nodes_sent_down[proc][step];
@@ -1862,53 +1917,61 @@ void PebblingSchedule<Graph_t>::TryToMergeSupersteps()
 
         ++new_index;
     }
-    
-    *this = shortened_schedule;
 
-    if(!isValid())
-        std::cout<<"ERROR: schedule is not valid after superstep merging."<<std::endl;
+    *this = shortened_schedule;
 
+    if (!isValid()) {
+        std::cout << "ERROR: schedule is not valid after superstep merging." << std::endl;
+    }
 }
 
-template<typename Graph_t>
-PebblingSchedule<Graph_t> PebblingSchedule<Graph_t>::ExpandMemSchedule(const BspInstance<Graph_t>& original_instance, const std::vector<vertex_idx> mapping_to_coarse) const
-{
-    std::map<vertex_idx, std::set<vertex_idx> > original_vertices_for_coarse_ID;
-    for(vertex_idx node = 0; node < original_instance.numberOfVertices(); ++node)
+template <typename Graph_t>
+PebblingSchedule<Graph_t> PebblingSchedule<Graph_t>::ExpandMemSchedule(const BspInstance<Graph_t> &original_instance,
+                                                                       const std::vector<vertex_idx> mapping_to_coarse) const {
+    std::map<vertex_idx, std::set<vertex_idx>> original_vertices_for_coarse_ID;
+    for (vertex_idx node = 0; node < original_instance.numberOfVertices(); ++node) {
         original_vertices_for_coarse_ID[mapping_to_coarse[node]].insert(node);
+    }
 
     PebblingSchedule<Graph_t> fine_schedule;
     fine_schedule.instance = &original_instance;
     fine_schedule.updateNumberOfSupersteps(number_of_supersteps);
 
-    for(unsigned step=0; step<number_of_supersteps; ++step)
-    {
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-        {
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
             // computation phase
-            for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step])
-            {
+            for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) {
                 vertex_idx node = computeStep.node;
-                for(vertex_idx original_node : original_vertices_for_coarse_ID[node])
+                for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) {
                     fine_schedule.compute_steps_for_proc_superstep[proc][step].emplace_back(original_node);
+                }
 
-                for(vertex_idx to_remove : computeStep.nodes_evicted_after)
-                    for(vertex_idx original_node : original_vertices_for_coarse_ID[to_remove])
-                        fine_schedule.compute_steps_for_proc_superstep[proc][step].back().nodes_evicted_after.push_back(original_node);
+                for (vertex_idx to_remove : computeStep.nodes_evicted_after) {
+                    for (vertex_idx original_node : original_vertices_for_coarse_ID[to_remove]) {
+                        fine_schedule.compute_steps_for_proc_superstep[proc][step].back().nodes_evicted_after.push_back(
+                            original_node);
+                    }
+                }
             }
 
-            //communication phase
-            for(vertex_idx node : nodes_sent_up[proc][step])
-                for(vertex_idx original_node : original_vertices_for_coarse_ID[node])
+            // communication phase
+            for (vertex_idx node : nodes_sent_up[proc][step]) {
+                for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) {
                     fine_schedule.nodes_sent_up[proc][step].push_back(original_node);
-            
-            for(vertex_idx node : nodes_evicted_in_comm[proc][step])
-                for(vertex_idx original_node : original_vertices_for_coarse_ID[node])
+                }
+            }
+
+            for (vertex_idx node : nodes_evicted_in_comm[proc][step]) {
+                for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) {
                     fine_schedule.nodes_evicted_in_comm[proc][step].push_back(original_node);
+                }
+            }
 
-            for(vertex_idx node : nodes_sent_down[proc][step])
-                for(vertex_idx original_node : original_vertices_for_coarse_ID[node])
+            for (vertex_idx node : nodes_sent_down[proc][step]) {
+                for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) {
                     fine_schedule.nodes_sent_down[proc][step].push_back(original_node);
+                }
+            }
         }
     }
 
@@ -1916,49 +1979,46 @@ PebblingSchedule<Graph_t> PebblingSchedule<Graph_t>::ExpandMemSchedule(const Bsp
     return fine_schedule;
 }
 
-template<typename Graph_t>
-BspSchedule<Graph_t> PebblingSchedule<Graph_t>::ConvertToBsp() const
-{
-    std::vector<unsigned> node_to_proc(instance->numberOfVertices(), UINT_MAX), node_to_supstep(instance->numberOfVertices(), UINT_MAX);
+template <typename Graph_t>
+BspSchedule<Graph_t> PebblingSchedule<Graph_t>::ConvertToBsp() const {
+    std::vector<unsigned> node_to_proc(instance->numberOfVertices(), UINT_MAX),
+        node_to_supstep(instance->numberOfVertices(), UINT_MAX);
 
-    for(unsigned step=0; step<number_of_supersteps; ++step)
-        for(unsigned proc=0; proc<instance->numberOfProcessors(); ++proc)
-            for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step])
-            {
-                const vertex_idx& node = computeStep.node;             
-                if(node_to_proc[node] == UINT_MAX)
-                {
+    for (unsigned step = 0; step < number_of_supersteps; ++step) {
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) {
+            for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) {
+                const vertex_idx &node = computeStep.node;
+                if (node_to_proc[node] == UINT_MAX) {
                     node_to_proc[node] = proc;
                     node_to_supstep[node] = step;
                 }
             }
-    if(need_to_load_inputs)
-        for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node)
-            if(instance->getComputationalDag().in_degree(node) == 0)
-            {
+        }
+    }
+    if (need_to_load_inputs) {
+        for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) {
+            if (instance->getComputationalDag().in_degree(node) == 0) {
                 unsigned min_superstep = UINT_MAX, proc_chosen = 0;
-                for(vertex_idx succ : instance->getComputationalDag().children(node))
-                    if(node_to_supstep[succ] < min_superstep)
-                    {
+                for (vertex_idx succ : instance->getComputationalDag().children(node)) {
+                    if (node_to_supstep[succ] < min_superstep) {
                         min_superstep = node_to_supstep[succ];
                         proc_chosen = node_to_proc[succ];
                     }
+                }
                 node_to_supstep[node] = min_superstep;
                 node_to_proc[node] = proc_chosen;
             }
+        }
+    }
 
     BspSchedule<Graph_t> schedule(*instance, node_to_proc, node_to_supstep);
-    if(schedule.satisfiesPrecedenceConstraints() && schedule.satisfiesNodeTypeConstraints())
-    {
+    if (schedule.satisfiesPrecedenceConstraints() && schedule.satisfiesNodeTypeConstraints()) {
         schedule.setAutoCommunicationSchedule();
         return schedule;
-    }
-    else
-    {
-        std::cout<<"ERROR: no direct conversion to Bsp schedule exists, using dummy schedule instead."<<std::endl;
+    } else {
+        std::cout << "ERROR: no direct conversion to Bsp schedule exists, using dummy schedule instead." << std::endl;
         return BspSchedule<Graph_t>(*instance);
     }
 }
 
-}
-
+}    // namespace osp
diff --git a/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp b/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp
index 04931cec..71044414 100644
--- a/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp
+++ b/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp
@@ -13,17 +13,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
 
-#include <coptcpp_pch.h>
 #include <callbackbase.h>
+#include <coptcpp_pch.h>
 
 struct COPTEnv {
     static Envr getInstance() {
         static Envr env;
         return env;
     }
-};
\ No newline at end of file
+};
diff --git a/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp b/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp
index bb40a004..dce14592 100644
--- a/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp
+++ b/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp
@@ -13,24 +13,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
 
-#include "osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp"
 #include "callbackbase.h"
 #include "coptcpp_pch.h"
 #include "osp/bsp/scheduler/Scheduler.hpp"
-#include "osp/pebbling/PebblingSchedule.hpp"
 #include "osp/graph_algorithms/computational_dag_util.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
+#include "osp/pebbling/PebblingSchedule.hpp"
+#include "osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp"
 
-namespace osp{
+namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class MultiProcessorPebbling : public Scheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs.");
 
   private:
@@ -44,7 +43,6 @@ class MultiProcessorPebbling : public Scheduler<Graph_t> {
     bool write_solutions_found;
 
     class WriteSolutionCallback : public CallbackBase {
-
       private:
         unsigned counter;
         unsigned max_number_solution;
@@ -53,14 +51,16 @@ class MultiProcessorPebbling : public Scheduler<Graph_t> {
 
       public:
         WriteSolutionCallback()
-            : counter(0), max_number_solution(500), best_obj(COPT_INFINITY), write_solutions_path_cb(""),
+            : counter(0),
+              max_number_solution(500),
+              best_obj(COPT_INFINITY),
+              write_solutions_path_cb(""),
               solution_file_prefix_cb("") {}
 
         std::string write_solutions_path_cb;
         std::string solution_file_prefix_cb;
 
         void callback() override;
-
     };
 
     WriteSolutionCallback solution_callback;
@@ -98,21 +98,21 @@ class MultiProcessorPebbling : public Scheduler<Graph_t> {
     unsigned compute_steps_per_cycle = 3;
     bool need_to_load_inputs = true;
     std::set<vertex_idx> needs_blue_at_end;
-    std::vector<std::set<vertex_idx> > has_red_in_beginning;
+    std::vector<std::set<vertex_idx>> has_red_in_beginning;
     bool verbose = false;
 
     void constructPebblingScheduleFromSolution(PebblingSchedule<Graph_t> &schedule);
 
     void setInitialSolution(const BspInstance<Graph_t> &instance,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& computeSteps,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& sendUpSteps,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& sendDownSteps,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& nodesEvictedAfterStep);
+                            const std::vector<std::vector<std::vector<vertex_idx>>> &computeSteps,
+                            const std::vector<std::vector<std::vector<vertex_idx>>> &sendUpSteps,
+                            const std::vector<std::vector<std::vector<vertex_idx>>> &sendDownSteps,
+                            const std::vector<std::vector<std::vector<vertex_idx>>> &nodesEvictedAfterStep);
 
     unsigned computeMaxTimeForInitialSolution(const BspInstance<Graph_t> &instance,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& computeSteps,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& sendUpSteps,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& sendDownSteps) const;
+                                              const std::vector<std::vector<std::vector<vertex_idx>>> &computeSteps,
+                                              const std::vector<std::vector<std::vector<vertex_idx>>> &sendUpSteps,
+                                              const std::vector<std::vector<std::vector<vertex_idx>>> &sendDownSteps) const;
 
     void setupBaseVariablesConstraints(const BspInstance<Graph_t> &instance);
 
@@ -135,7 +135,9 @@ class MultiProcessorPebbling : public Scheduler<Graph_t> {
 
     virtual RETURN_STATUS computePebbling(PebblingSchedule<Graph_t> &schedule, bool use_async = false);
 
-    virtual RETURN_STATUS computePebblingWithInitialSolution(const PebblingSchedule<Graph_t>& initial_solution, PebblingSchedule<Graph_t> &out_schedule, bool use_async = false);
+    virtual RETURN_STATUS computePebblingWithInitialSolution(const PebblingSchedule<Graph_t> &initial_solution,
+                                                             PebblingSchedule<Graph_t> &out_schedule,
+                                                             bool use_async = false);
 
     /**
      * @brief Enables writing intermediate solutions.
@@ -193,25 +195,48 @@ class MultiProcessorPebbling : public Scheduler<Graph_t> {
 
     // getters and setters for problem parameters
     inline bool allowsSlidingPebbles() const { return slidingPebbles; }
+
     inline bool allowsMergingSteps() const { return mergeSteps; }
+
     inline bool isUpAndDownCostSummed() const { return up_and_down_cost_summed; }
+
     inline bool allowsRecomputation() const { return allows_recomputation; }
+
     inline bool hasRestrictedStepTypes() const { return restrict_step_types; }
+
     inline bool needsToLoadInputs() const { return need_to_load_inputs; }
+
     inline unsigned getComputeStepsPerCycle() const { return compute_steps_per_cycle; }
+
     inline unsigned getMaxTime() const { return max_time; }
 
-    inline void setSlidingPebbles (const bool slidingPebbles_) {slidingPebbles = slidingPebbles_; }
-    inline void setMergingSteps (const bool mergeSteps_) {mergeSteps = mergeSteps_; }
-    inline void setUpAndDownCostSummed (const bool is_summed_) {up_and_down_cost_summed = is_summed_; }
+    inline void setSlidingPebbles(const bool slidingPebbles_) { slidingPebbles = slidingPebbles_; }
+
+    inline void setMergingSteps(const bool mergeSteps_) { mergeSteps = mergeSteps_; }
+
+    inline void setUpAndDownCostSummed(const bool is_summed_) { up_and_down_cost_summed = is_summed_; }
+
     inline void setRecomputation(const bool allow_recompute_) { allows_recomputation = allow_recompute_; }
-    inline void setRestrictStepTypes(const bool restrict_) { restrict_step_types = restrict_; if(restrict_){mergeSteps = true;} }
-    inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_;}
-    inline void setComputeStepsPerCycle (const unsigned steps_per_cycle_) {compute_steps_per_cycle = steps_per_cycle_; }
-    inline void setMaxTime (const unsigned max_time_) {max_time = max_time_; }
-    inline void setNeedsBlueAtEnd (const std::set<vertex_idx>& needs_blue_) {needs_blue_at_end = needs_blue_; }
-    inline void setHasRedInBeginning (const std::vector<std::set<vertex_idx> >& has_red_) {has_red_in_beginning = has_red_; }
-    inline void setVerbose (const bool verbose_) {verbose = verbose_; }
+
+    inline void setRestrictStepTypes(const bool restrict_) {
+        restrict_step_types = restrict_;
+        if (restrict_) {
+            mergeSteps = true;
+        }
+    }
+
+    inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_; }
+
+    inline void setComputeStepsPerCycle(const unsigned steps_per_cycle_) { compute_steps_per_cycle = steps_per_cycle_; }
+
+    inline void setMaxTime(const unsigned max_time_) { max_time = max_time_; }
+
+    inline void setNeedsBlueAtEnd(const std::set<vertex_idx> &needs_blue_) { needs_blue_at_end = needs_blue_; }
+
+    inline void setHasRedInBeginning(const std::vector<std::set<vertex_idx>> &has_red_) { has_red_in_beginning = has_red_; }
+
+    inline void setVerbose(const bool verbose_) { verbose = verbose_; }
+
     inline void setTimeLimitSeconds(unsigned time_limit_seconds_) { time_limit_seconds = time_limit_seconds_; }
 
     bool hasEmptyStep(const BspInstance<Graph_t> &instance);
@@ -219,11 +244,11 @@ class MultiProcessorPebbling : public Scheduler<Graph_t> {
 
 // implementation
 
-template<typename Graph_t>
+template <typename Graph_t>
 void MultiProcessorPebbling<Graph_t>::solveILP() {
-
-    if(!verbose)
+    if (!verbose) {
         model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0);
+    }
 
     model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds);
     model.SetIntParam(COPT_INTPARAM_THREADS, 128);
@@ -241,11 +266,11 @@ void MultiProcessorPebbling<Graph_t>::solveILP() {
     model.Solve();
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 RETURN_STATUS MultiProcessorPebbling<Graph_t>::computeSchedule(BspSchedule<Graph_t> &schedule) {
-
-    if(max_time == 0)
+    if (max_time == 0) {
         max_time = 2 * static_cast<unsigned>(schedule.getInstance().numberOfVertices());
+    }
 
     setupBaseVariablesConstraints(schedule.getInstance());
     setupSyncPhaseVariablesConstraints(schedule.getInstance());
@@ -254,17 +279,13 @@ RETURN_STATUS MultiProcessorPebbling<Graph_t>::computeSchedule(BspSchedule<Graph
     solveILP();
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
         return RETURN_STATUS::OSP_SUCCESS;
 
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
         return RETURN_STATUS::ERROR;
 
     } else {
-
         if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
             return RETURN_STATUS::BEST_FOUND;
 
         } else {
@@ -273,14 +294,14 @@ RETURN_STATUS MultiProcessorPebbling<Graph_t>::computeSchedule(BspSchedule<Graph
     }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 RETURN_STATUS MultiProcessorPebbling<Graph_t>::computeSynchPebbling(PebblingSchedule<Graph_t> &schedule) {
+    const BspInstance<Graph_t> &instance = schedule.getInstance();
 
-    const BspInstance<Graph_t>& instance = schedule.getInstance();
-    
-    if(max_time == 0)
+    if (max_time == 0) {
         max_time = 2 * static_cast<unsigned>(instance.numberOfVertices());
-    
+    }
+
     mergeSteps = false;
 
     setupBaseVariablesConstraints(instance);
@@ -290,18 +311,14 @@ RETURN_STATUS MultiProcessorPebbling<Graph_t>::computeSynchPebbling(PebblingSche
     solveILP();
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
         constructPebblingScheduleFromSolution(schedule);
         return RETURN_STATUS::OSP_SUCCESS;
 
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
         return RETURN_STATUS::ERROR;
 
     } else {
-
         if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
             constructPebblingScheduleFromSolution(schedule);
             return RETURN_STATUS::OSP_SUCCESS;
 
@@ -311,40 +328,35 @@ RETURN_STATUS MultiProcessorPebbling<Graph_t>::computeSynchPebbling(PebblingSche
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 RETURN_STATUS MultiProcessorPebbling<Graph_t>::computePebbling(PebblingSchedule<Graph_t> &schedule, bool use_async) {
+    const BspInstance<Graph_t> &instance = schedule.getInstance();
 
-    const BspInstance<Graph_t>& instance = schedule.getInstance();
-    
-    if(max_time == 0)
+    if (max_time == 0) {
         max_time = 2 * static_cast<unsigned>(instance.numberOfVertices());
+    }
 
     synchronous = !use_async;
 
     setupBaseVariablesConstraints(instance);
-    if(synchronous)
-    {
+    if (synchronous) {
         setupSyncPhaseVariablesConstraints(instance);
         setupBspVariablesConstraintsObjective(instance);
-    }
-    else
+    } else {
         setupAsyncVariablesConstraintsObjective(instance);
+    }
 
     solveILP();
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
         constructPebblingScheduleFromSolution(schedule);
         return schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR;
 
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
         return RETURN_STATUS::ERROR;
 
     } else {
-
         if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
             constructPebblingScheduleFromSolution(schedule);
             return schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR;
 
@@ -354,54 +366,52 @@ RETURN_STATUS MultiProcessorPebbling<Graph_t>::computePebbling(PebblingSchedule<
     }
 }
 
-template<typename Graph_t>
-RETURN_STATUS MultiProcessorPebbling<Graph_t>::computePebblingWithInitialSolution(const PebblingSchedule<Graph_t>& initial_solution, PebblingSchedule<Graph_t> &out_schedule, bool use_async)
-{
-    const BspInstance<Graph_t>& instance = initial_solution.getInstance();
+template <typename Graph_t>
+RETURN_STATUS MultiProcessorPebbling<Graph_t>::computePebblingWithInitialSolution(const PebblingSchedule<Graph_t> &initial_solution,
+                                                                                  PebblingSchedule<Graph_t> &out_schedule,
+                                                                                  bool use_async) {
+    const BspInstance<Graph_t> &instance = initial_solution.getInstance();
 
-    std::vector<std::vector<std::vector<vertex_idx> > > computeSteps;
-    std::vector<std::vector<std::vector<vertex_idx> > > sendUpSteps;
-    std::vector<std::vector<std::vector<vertex_idx> > > sendDownSteps;
-    std::vector<std::vector<std::vector<vertex_idx> > > nodesEvictedAfterStep;
+    std::vector<std::vector<std::vector<vertex_idx>>> computeSteps;
+    std::vector<std::vector<std::vector<vertex_idx>>> sendUpSteps;
+    std::vector<std::vector<std::vector<vertex_idx>>> sendDownSteps;
+    std::vector<std::vector<std::vector<vertex_idx>>> nodesEvictedAfterStep;
 
     synchronous = !use_async;
-    
+
     initial_solution.getDataForMultiprocessorPebbling(computeSteps, sendUpSteps, sendDownSteps, nodesEvictedAfterStep);
 
     max_time = computeMaxTimeForInitialSolution(instance, computeSteps, sendUpSteps, sendDownSteps);
 
-    if(verbose)
-        std::cout<<"Max time set at "<<max_time<<std::endl;
+    if (verbose) {
+        std::cout << "Max time set at " << max_time << std::endl;
+    }
 
     setupBaseVariablesConstraints(instance);
-    if(synchronous)
-    {
+    if (synchronous) {
         setupSyncPhaseVariablesConstraints(instance);
         setupBspVariablesConstraintsObjective(instance);
-    }
-    else
+    } else {
         setupAsyncVariablesConstraintsObjective(instance);
+    }
 
     setInitialSolution(instance, computeSteps, sendUpSteps, sendDownSteps, nodesEvictedAfterStep);
 
-    if(verbose)
-        std::cout<<"Initial solution set."<<std::endl;
+    if (verbose) {
+        std::cout << "Initial solution set." << std::endl;
+    }
 
     solveILP();
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
         constructPebblingScheduleFromSolution(out_schedule);
         return out_schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR;
 
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
         return RETURN_STATUS::ERROR;
 
     } else {
-
         if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
             constructPebblingScheduleFromSolution(out_schedule);
             return out_schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR;
 
@@ -411,47 +421,38 @@ RETURN_STATUS MultiProcessorPebbling<Graph_t>::computePebblingWithInitialSolutio
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void MultiProcessorPebbling<Graph_t>::setupBaseVariablesConstraints(const BspInstance<Graph_t> &instance) {
-
     /*
         Variables
     */
-    compute = std::vector<std::vector<VarArray>>(instance.numberOfVertices(),
-                                                 std::vector<VarArray>(instance.numberOfProcessors()));
+    compute = std::vector<std::vector<VarArray>>(instance.numberOfVertices(), std::vector<VarArray>(instance.numberOfProcessors()));
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             compute[node][processor] = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "node_processor_time");
         }
     }
 
     compute_exists.resize(instance.numberOfVertices(),
-                            std::vector<std::vector<bool>>(instance.numberOfProcessors(), std::vector<bool>(max_time, true)));
+                          std::vector<std::vector<bool>>(instance.numberOfProcessors(), std::vector<bool>(max_time, true)));
 
-    send_up = std::vector<std::vector<VarArray>>(instance.numberOfVertices(),
-                                                 std::vector<VarArray>(instance.numberOfProcessors()));
+    send_up = std::vector<std::vector<VarArray>>(instance.numberOfVertices(), std::vector<VarArray>(instance.numberOfProcessors()));
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             send_up[node][processor] = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "send_up");
         }
     }
 
     send_up_exists.resize(instance.numberOfVertices(),
-                            std::vector<std::vector<bool>>(instance.numberOfProcessors(), std::vector<bool>(max_time, true)));
+                          std::vector<std::vector<bool>>(instance.numberOfProcessors(), std::vector<bool>(max_time, true)));
 
-    send_down = std::vector<std::vector<VarArray>>(instance.numberOfVertices(),
-                                                   std::vector<VarArray>(instance.numberOfProcessors()));
+    send_down
+        = std::vector<std::vector<VarArray>>(instance.numberOfVertices(), std::vector<VarArray>(instance.numberOfProcessors()));
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             send_down[node][processor] = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "send_down");
         }
     }
@@ -462,18 +463,15 @@ void MultiProcessorPebbling<Graph_t>::setupBaseVariablesConstraints(const BspIns
     has_blue = std::vector<VarArray>(instance.numberOfVertices());
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         has_blue[node] = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "blue_pebble");
     }
 
     has_blue_exists.resize(instance.numberOfVertices(), std::vector<bool>(max_time, true));
 
-    has_red = std::vector<std::vector<VarArray>>(instance.numberOfVertices(),
-                                                 std::vector<VarArray>(instance.numberOfProcessors()));
+    has_red = std::vector<std::vector<VarArray>>(instance.numberOfVertices(), std::vector<VarArray>(instance.numberOfProcessors()));
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             has_red[node][processor] = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "red_pebble");
         }
     }
@@ -482,199 +480,202 @@ void MultiProcessorPebbling<Graph_t>::setupBaseVariablesConstraints(const BspIns
         Invalidate variables based on various factors (node types, input loading, step type restriction)
     */
 
-   for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-        for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++)
-            if(!instance.isCompatible(node, processor))
-                for (unsigned t = 0; t < max_time; t++)
-                {
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+        for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
+            if (!instance.isCompatible(node, processor)) {
+                for (unsigned t = 0; t < max_time; t++) {
                     compute_exists[node][processor][t] = false;
                     send_up_exists[node][processor][t] = false;
                 }
-    
+            }
+        }
+    }
+
     // restrict source nodes if they need to be loaded
-    if(need_to_load_inputs)
-        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-            if (instance.getComputationalDag().in_degree(node) == 0)
-                for (unsigned t = 0; t < max_time; t++)
-                {
-                    for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++)
-                    {
+    if (need_to_load_inputs) {
+        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+            if (instance.getComputationalDag().in_degree(node) == 0) {
+                for (unsigned t = 0; t < max_time; t++) {
+                    for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
                         compute_exists[node][processor][t] = false;
                         send_up_exists[node][processor][t] = false;
                     }
                     has_blue_exists[node][t] = false;
                 }
+            }
+        }
+    }
 
     // restrict step types for simpler ILP
-    if(restrict_step_types)
-        for (unsigned t = 0; t < max_time; t++)
-        {
+    if (restrict_step_types) {
+        for (unsigned t = 0; t < max_time; t++) {
             bool this_is_a_comm_step = (t % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1);
-            if(!need_to_load_inputs && t % (compute_steps_per_cycle + 2) == compute_steps_per_cycle)
+            if (!need_to_load_inputs && t % (compute_steps_per_cycle + 2) == compute_steps_per_cycle) {
                 this_is_a_comm_step = true;
-            if(need_to_load_inputs && t % (compute_steps_per_cycle + 2) == 0)
+            }
+            if (need_to_load_inputs && t % (compute_steps_per_cycle + 2) == 0) {
                 this_is_a_comm_step = true;
-            if(this_is_a_comm_step)
-                for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-                    for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++)
+            }
+            if (this_is_a_comm_step) {
+                for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+                    for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
                         compute_exists[node][processor][t] = false;
-            else
-                for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-                    for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++)
-                    {
+                    }
+                }
+            } else {
+                for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+                    for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
                         send_up_exists[node][processor][t] = false;
                         send_down_exists[node][processor][t] = false;
                     }
+                }
+            }
         }
+    }
 
     /*
         Constraints
     */
 
-    if(!mergeSteps)
-    {
+    if (!mergeSteps) {
         for (unsigned t = 0; t < max_time; t++) {
-
             for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
                 Expr expr;
                 for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
-                    if(compute_exists[node][processor][t])
+                    if (compute_exists[node][processor][t]) {
                         expr += compute[node][processor][static_cast<int>(t)];
-                    if(send_up_exists[node][processor][t])
+                    }
+                    if (send_up_exists[node][processor][t]) {
                         expr += send_up[node][processor][static_cast<int>(t)];
-                    if(send_down_exists[node][processor][t])
+                    }
+                    if (send_down_exists[node][processor][t]) {
                         expr += send_down[node][processor][static_cast<int>(t)];
+                    }
                 }
                 model.AddConstr(expr <= 1);
             }
         }
-    }
-    else
-    {
-        //extra variables to indicate step types in step merging
+    } else {
+        // extra variables to indicate step types in step merging
         std::vector<VarArray> comp_step_on_proc = std::vector<VarArray>(instance.numberOfProcessors());
         std::vector<VarArray> comm_step_on_proc = std::vector<VarArray>(instance.numberOfProcessors());
 
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             comp_step_on_proc[processor] = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "comp_step_on_proc");
             comm_step_on_proc[processor] = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "comm_step_on_proc");
         }
 
         const unsigned M = static_cast<unsigned>(instance.numberOfVertices());
 
-        for (unsigned t = 0; t < max_time; t++)
-            for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++)
-            {
+        for (unsigned t = 0; t < max_time; t++) {
+            for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
                 Expr expr_comp, expr_comm;
                 for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
-                    if(compute_exists[node][processor][t])
+                    if (compute_exists[node][processor][t]) {
                         expr_comp += compute[node][processor][static_cast<int>(t)];
-                    if(send_up_exists[node][processor][t])
+                    }
+                    if (send_up_exists[node][processor][t]) {
                         expr_comm += send_up[node][processor][static_cast<int>(t)];
-                    if(send_down_exists[node][processor][t])
+                    }
+                    if (send_down_exists[node][processor][t]) {
                         expr_comm += send_down[node][processor][static_cast<int>(t)];
+                    }
                 }
 
                 model.AddConstr(M * comp_step_on_proc[processor][static_cast<int>(t)] >= expr_comp);
                 model.AddConstr(2 * M * comm_step_on_proc[processor][static_cast<int>(t)] >= expr_comm);
 
-                model.AddConstr(comp_step_on_proc[processor][static_cast<int>(t)] + comm_step_on_proc[processor][static_cast<int>(t)] <= 1);
+                model.AddConstr(
+                    comp_step_on_proc[processor][static_cast<int>(t)] + comm_step_on_proc[processor][static_cast<int>(t)] <= 1);
             }
+        }
     }
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned t = 1; t < max_time; t++) {
-
-            if(!has_blue_exists[node][t])
+            if (!has_blue_exists[node][t]) {
                 continue;
+            }
 
             Expr expr;
 
             for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
-                if(send_up_exists[node][processor][t-1])
+                if (send_up_exists[node][processor][t - 1]) {
                     expr += send_up[node][processor][static_cast<int>(t) - 1];
+                }
             }
             model.AddConstr(has_blue[node][static_cast<int>(t)] <= has_blue[node][static_cast<int>(t) - 1] + expr);
         }
     }
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             for (unsigned t = 1; t < max_time; t++) {
-
                 Expr expr;
 
-                if(compute_exists[node][processor][t-1])
+                if (compute_exists[node][processor][t - 1]) {
                     expr += compute[node][processor][static_cast<int>(t) - 1];
+                }
 
-                if(send_down_exists[node][processor][t-1])
+                if (send_down_exists[node][processor][t - 1]) {
                     expr += send_down[node][processor][static_cast<int>(t) - 1];
+                }
 
-                model.AddConstr(has_red[node][processor][static_cast<int>(t)] <= has_red[node][processor][static_cast<int>(t) - 1] + expr);
+                model.AddConstr(has_red[node][processor][static_cast<int>(t)]
+                                <= has_red[node][processor][static_cast<int>(t) - 1] + expr);
             }
         }
     }
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             for (unsigned t = 0; t < max_time; t++) {
-
-                if(!compute_exists[node][processor][t])
+                if (!compute_exists[node][processor][t]) {
                     continue;
+                }
 
                 for (const auto &source : instance.getComputationalDag().parents(node)) {
-
-                    if(!mergeSteps || !compute_exists[source][processor][t])
-                        model.AddConstr(compute[node][processor][static_cast<int>(t)] <= has_red[source][processor][static_cast<int>(t)]);
-                    else
-                        model.AddConstr(compute[node][processor][static_cast<int>(t)] <= has_red[source][processor][static_cast<int>(t)] + compute[source][processor][static_cast<int>(t)]);
+                    if (!mergeSteps || !compute_exists[source][processor][t]) {
+                        model.AddConstr(compute[node][processor][static_cast<int>(t)]
+                                        <= has_red[source][processor][static_cast<int>(t)]);
+                    } else {
+                        model.AddConstr(compute[node][processor][static_cast<int>(t)]
+                                        <= has_red[source][processor][static_cast<int>(t)]
+                                               + compute[source][processor][static_cast<int>(t)]);
+                    }
                 }
             }
         }
     }
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             for (unsigned t = 0; t < max_time; t++) {
-
-                if(send_up_exists[node][processor][t])
+                if (send_up_exists[node][processor][t]) {
                     model.AddConstr(send_up[node][processor][static_cast<int>(t)] <= has_red[node][processor][static_cast<int>(t)]);
+                }
             }
         }
     }
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             for (unsigned t = 0; t < max_time; t++) {
-
-                if(send_down_exists[node][processor][t] && has_blue_exists[node][t])
+                if (send_down_exists[node][processor][t] && has_blue_exists[node][t]) {
                     model.AddConstr(send_down[node][processor][static_cast<int>(t)] <= has_blue[node][static_cast<int>(t)]);
+                }
             }
         }
     }
 
     for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
         for (unsigned t = 0; t < max_time; t++) {
             Expr expr;
             for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
                 expr += has_red[node][processor][static_cast<int>(t)] * instance.getComputationalDag().vertex_mem_weight(node);
-                if(!slidingPebbles && compute_exists[node][processor][t])
+                if (!slidingPebbles && compute_exists[node][processor][t]) {
                     expr += compute[node][processor][static_cast<int>(t)] * instance.getComputationalDag().vertex_mem_weight(node);
+                }
             }
 
             model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor));
@@ -682,58 +683,62 @@ void MultiProcessorPebbling<Graph_t>::setupBaseVariablesConstraints(const BspIns
     }
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
-            if(has_red_in_beginning.empty() || has_red_in_beginning[processor].find(node) == has_red_in_beginning[processor].end())
+            if (has_red_in_beginning.empty()
+                || has_red_in_beginning[processor].find(node) == has_red_in_beginning[processor].end()) {
                 model.AddConstr(has_red[node][processor][0] == 0);
-            else
+            } else {
                 model.AddConstr(has_red[node][processor][0] == 1);
+            }
         }
     }
 
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-        if(!need_to_load_inputs || instance.getComputationalDag().in_degree(node) > 0)
+        if (!need_to_load_inputs || instance.getComputationalDag().in_degree(node) > 0) {
             model.AddConstr(has_blue[node][0] == 0);
+        }
     }
 
-    if(needs_blue_at_end.empty()) // default case: blue pebbles required on sinks at the end
+    if (needs_blue_at_end.empty())    // default case: blue pebbles required on sinks at the end
     {
-        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-            if (instance.getComputationalDag().out_degree(node) == 0 && has_blue_exists[node][max_time-1])
-                model.AddConstr(has_blue[node][static_cast<int>(max_time)-1] == 1);
-    }
-    else // otherwise: specified set of nodes that need blue at the end
+        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+            if (instance.getComputationalDag().out_degree(node) == 0 && has_blue_exists[node][max_time - 1]) {
+                model.AddConstr(has_blue[node][static_cast<int>(max_time) - 1] == 1);
+            }
+        }
+    } else    // otherwise: specified set of nodes that need blue at the end
     {
-        for (vertex_idx node : needs_blue_at_end)
-            if(has_blue_exists[node][max_time-1])
-                model.AddConstr(has_blue[node][static_cast<int>(max_time)-1] == 1);
+        for (vertex_idx node : needs_blue_at_end) {
+            if (has_blue_exists[node][max_time - 1]) {
+                model.AddConstr(has_blue[node][static_cast<int>(max_time) - 1] == 1);
+            }
+        }
     }
-    
+
     // disable recomputation if needed
-    if(!allows_recomputation)
-        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-        {
+    if (!allows_recomputation) {
+        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
             Expr expr;
-            for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++)
-                for (unsigned t = 0; t < max_time; t++)
-                    if(compute_exists[node][processor][t])
+            for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
+                for (unsigned t = 0; t < max_time; t++) {
+                    if (compute_exists[node][processor][t]) {
                         expr += compute[node][processor][static_cast<int>(t)];
+                    }
+                }
+            }
 
             model.AddConstr(expr <= 1);
         }
-       
+    }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 void MultiProcessorPebbling<Graph_t>::setupSyncPhaseVariablesConstraints(const BspInstance<Graph_t> &instance) {
-
     comp_phase = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "comp_phase");
 
-    if(mergeSteps)
+    if (mergeSteps) {
         comm_phase = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "comm_phase");
-    else
-    {
+    } else {
         send_up_phase = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "send_up_phase");
         send_down_phase = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "send_down_phase");
     }
@@ -741,50 +746,47 @@ void MultiProcessorPebbling<Graph_t>::setupSyncPhaseVariablesConstraints(const B
     const unsigned M = static_cast<unsigned>(instance.numberOfProcessors() * instance.numberOfVertices());
 
     for (unsigned t = 0; t < max_time; t++) {
-
         Expr expr_comp, expr_comm, expr_send_up, expr_send_down;
         for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
             for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-                if(compute_exists[node][processor][t])
+                if (compute_exists[node][processor][t]) {
                     expr_comp += compute[node][processor][static_cast<int>(t)];
-                if(mergeSteps)
-                {
-                    if(send_up_exists[node][processor][t])
+                }
+                if (mergeSteps) {
+                    if (send_up_exists[node][processor][t]) {
                         expr_comm += send_up[node][processor][static_cast<int>(t)];
-                    
-                    if(send_down_exists[node][processor][t])
+                    }
+
+                    if (send_down_exists[node][processor][t]) {
                         expr_comm += send_down[node][processor][static_cast<int>(t)];
-                }
-                else
-                {
-                    if(send_up_exists[node][processor][t])
+                    }
+                } else {
+                    if (send_up_exists[node][processor][t]) {
                         expr_send_up += send_up[node][processor][static_cast<int>(t)];
+                    }
 
-                    if(send_down_exists[node][processor][t])
+                    if (send_down_exists[node][processor][t]) {
                         expr_send_down += send_down[node][processor][static_cast<int>(t)];
+                    }
                 }
             }
         }
 
         model.AddConstr(M * comp_phase[static_cast<int>(t)] >= expr_comp);
-        if(mergeSteps)
-        {
+        if (mergeSteps) {
             model.AddConstr(2 * M * comm_phase[static_cast<int>(t)] >= expr_comm);
             model.AddConstr(comp_phase[static_cast<int>(t)] + comm_phase[static_cast<int>(t)] <= 1);
-        }
-        else
-        {
+        } else {
             model.AddConstr(M * send_up_phase[static_cast<int>(t)] >= expr_send_up);
             model.AddConstr(M * send_down_phase[static_cast<int>(t)] >= expr_send_down);
-            model.AddConstr(comp_phase[static_cast<int>(t)] + send_up_phase[static_cast<int>(t)] + send_down_phase[static_cast<int>(t)] <= 1);
+            model.AddConstr(
+                comp_phase[static_cast<int>(t)] + send_up_phase[static_cast<int>(t)] + send_down_phase[static_cast<int>(t)] <= 1);
         }
     }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 void MultiProcessorPebbling<Graph_t>::setupBspVariablesConstraintsObjective(const BspInstance<Graph_t> &instance) {
-
     comp_phase_ends = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "comp_phase_ends");
 
     comm_phase_ends = model.AddVars(static_cast<int>(max_time), COPT_BINARY, "comm_phase_ends");
@@ -799,8 +801,7 @@ void MultiProcessorPebbling<Graph_t>::setupBspVariablesConstraintsObjective(cons
 
     VarArray send_up_induced;
     VarArray send_down_induced;
-    if(up_and_down_cost_summed)
-    {
+    if (up_and_down_cost_summed) {
         send_up_induced = model.AddVars(static_cast<int>(max_time), COPT_CONTINUOUS, "send_up_induced");
         send_down_induced = model.AddVars(static_cast<int>(max_time), COPT_CONTINUOUS, "send_down_induced");
     }
@@ -812,85 +813,101 @@ void MultiProcessorPebbling<Graph_t>::setupBspVariablesConstraintsObjective(cons
     }
 
     for (unsigned t = 0; t < max_time; t++) {
-
         model.AddConstr(comp_phase[static_cast<int>(t)] >= comp_phase_ends[static_cast<int>(t)]);
-        if(mergeSteps)
+        if (mergeSteps) {
             model.AddConstr(comm_phase[static_cast<int>(t)] >= comm_phase_ends[static_cast<int>(t)]);
-        else
-            model.AddConstr(send_down_phase[static_cast<int>(t)] + send_up_phase[static_cast<int>(t)] >= comm_phase_ends[static_cast<int>(t)]);
+        } else {
+            model.AddConstr(send_down_phase[static_cast<int>(t)] + send_up_phase[static_cast<int>(t)]
+                            >= comm_phase_ends[static_cast<int>(t)]);
+        }
     }
     for (unsigned t = 0; t < max_time - 1; t++) {
-
-        model.AddConstr(comp_phase_ends[static_cast<int>(t)] >= comp_phase[static_cast<int>(t)] - comp_phase[static_cast<int>(t)+1]);
-        if(mergeSteps)
-            model.AddConstr(comm_phase_ends[static_cast<int>(t)] >= comm_phase[static_cast<int>(t)] - comm_phase[static_cast<int>(t)+1]);
-        else
-            model.AddConstr(comm_phase_ends[static_cast<int>(t)] >= send_down_phase[static_cast<int>(t)] + send_up_phase[static_cast<int>(t)] - send_down_phase[static_cast<int>(t)+1] - send_up_phase[static_cast<int>(t)+1]);
+        model.AddConstr(comp_phase_ends[static_cast<int>(t)]
+                        >= comp_phase[static_cast<int>(t)] - comp_phase[static_cast<int>(t) + 1]);
+        if (mergeSteps) {
+            model.AddConstr(comm_phase_ends[static_cast<int>(t)]
+                            >= comm_phase[static_cast<int>(t)] - comm_phase[static_cast<int>(t) + 1]);
+        } else {
+            model.AddConstr(comm_phase_ends[static_cast<int>(t)]
+                            >= send_down_phase[static_cast<int>(t)] + send_up_phase[static_cast<int>(t)]
+                                   - send_down_phase[static_cast<int>(t) + 1] - send_up_phase[static_cast<int>(t) + 1]);
+        }
     }
 
-    model.AddConstr(comp_phase_ends[static_cast<int>(max_time)-1] >= comp_phase[static_cast<int>(max_time)-1]);
-    if(mergeSteps)
-        model.AddConstr(comm_phase_ends[static_cast<int>(max_time)-1] >= comm_phase[static_cast<int>(max_time)-1]);
-    else
-        model.AddConstr(comm_phase_ends[static_cast<int>(max_time)-1] >= send_down_phase[static_cast<int>(max_time)-1] + send_up_phase[static_cast<int>(max_time)-1]);
+    model.AddConstr(comp_phase_ends[static_cast<int>(max_time) - 1] >= comp_phase[static_cast<int>(max_time) - 1]);
+    if (mergeSteps) {
+        model.AddConstr(comm_phase_ends[static_cast<int>(max_time) - 1] >= comm_phase[static_cast<int>(max_time) - 1]);
+    } else {
+        model.AddConstr(comm_phase_ends[static_cast<int>(max_time) - 1]
+                        >= send_down_phase[static_cast<int>(max_time) - 1] + send_up_phase[static_cast<int>(max_time) - 1]);
+    }
 
-    const unsigned M = static_cast<unsigned>(instance.numberOfProcessors() * (
-                                                        sumOfVerticesWorkWeights(instance.getComputationalDag()) +
-                                                        sumOfVerticesCommunicationWeights(instance.getComputationalDag()) ) );
+    const unsigned M = static_cast<unsigned>(instance.numberOfProcessors()
+                                             * (sumOfVerticesWorkWeights(instance.getComputationalDag())
+                                                + sumOfVerticesCommunicationWeights(instance.getComputationalDag())));
 
     for (unsigned t = 1; t < max_time; t++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             Expr expr_work;
             Expr expr_send_up;
             Expr expr_send_down;
             for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-                if(compute_exists[node][processor][t])
-                    expr_work += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][static_cast<int>(t)];
-                if(send_up_exists[node][processor][t])
-                    expr_send_up += instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][static_cast<int>(t)];
-                if(send_down_exists[node][processor][t])
-                    expr_send_down += instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][static_cast<int>(t)];
-            }
-
-            model.AddConstr(M * comm_phase_ends[static_cast<int>(t)] + work_step_until[processor][static_cast<int>(t)] >=
-                            work_step_until[processor][static_cast<int>(t) - 1] + expr_work);
-
-            model.AddConstr(M * comp_phase_ends[static_cast<int>(t)] + send_up_step_until[processor][static_cast<int>(t)] >=
-                            send_up_step_until[processor][static_cast<int>(t) - 1] + expr_send_up);
-
-            model.AddConstr(M * comp_phase_ends[static_cast<int>(t)] + send_down_step_until[processor][static_cast<int>(t)] >=
-                            send_down_step_until[processor][static_cast<int>(t) - 1] + expr_send_down);
-
-            model.AddConstr(work_induced[static_cast<int>(t)] >= work_step_until[processor][static_cast<int>(t)] - M * (1 - comp_phase_ends[static_cast<int>(t)]));
-            if(up_and_down_cost_summed)
-            {
-                model.AddConstr(send_up_induced[static_cast<int>(t)] >= send_up_step_until[processor][static_cast<int>(t)] - M * (1 - comm_phase_ends[static_cast<int>(t)]));
-                model.AddConstr(send_down_induced[static_cast<int>(t)] >= send_down_step_until[processor][static_cast<int>(t)] - M * (1 - comm_phase_ends[static_cast<int>(t)]));
-                model.AddConstr(comm_induced[static_cast<int>(t)] >= send_up_induced[static_cast<int>(t)] + send_down_induced[static_cast<int>(t)]);
-             }
-            else
-            {
-                model.AddConstr(comm_induced[static_cast<int>(t)] >= send_down_step_until[processor][static_cast<int>(t)] - M * (1 - comm_phase_ends[static_cast<int>(t)]));
-                model.AddConstr(comm_induced[static_cast<int>(t)] >= send_up_step_until[processor][static_cast<int>(t)] - M * (1 - comm_phase_ends[static_cast<int>(t)]));
+                if (compute_exists[node][processor][t]) {
+                    expr_work += instance.getComputationalDag().vertex_work_weight(node)
+                                 * compute[node][processor][static_cast<int>(t)];
+                }
+                if (send_up_exists[node][processor][t]) {
+                    expr_send_up += instance.getComputationalDag().vertex_comm_weight(node)
+                                    * send_up[node][processor][static_cast<int>(t)];
+                }
+                if (send_down_exists[node][processor][t]) {
+                    expr_send_down += instance.getComputationalDag().vertex_comm_weight(node)
+                                      * send_down[node][processor][static_cast<int>(t)];
+                }
+            }
+
+            model.AddConstr(M * comm_phase_ends[static_cast<int>(t)] + work_step_until[processor][static_cast<int>(t)]
+                            >= work_step_until[processor][static_cast<int>(t) - 1] + expr_work);
+
+            model.AddConstr(M * comp_phase_ends[static_cast<int>(t)] + send_up_step_until[processor][static_cast<int>(t)]
+                            >= send_up_step_until[processor][static_cast<int>(t) - 1] + expr_send_up);
+
+            model.AddConstr(M * comp_phase_ends[static_cast<int>(t)] + send_down_step_until[processor][static_cast<int>(t)]
+                            >= send_down_step_until[processor][static_cast<int>(t) - 1] + expr_send_down);
+
+            model.AddConstr(work_induced[static_cast<int>(t)]
+                            >= work_step_until[processor][static_cast<int>(t)] - M * (1 - comp_phase_ends[static_cast<int>(t)]));
+            if (up_and_down_cost_summed) {
+                model.AddConstr(send_up_induced[static_cast<int>(t)] >= send_up_step_until[processor][static_cast<int>(t)]
+                                                                            - M * (1 - comm_phase_ends[static_cast<int>(t)]));
+                model.AddConstr(send_down_induced[static_cast<int>(t)] >= send_down_step_until[processor][static_cast<int>(t)]
+                                                                              - M * (1 - comm_phase_ends[static_cast<int>(t)]));
+                model.AddConstr(comm_induced[static_cast<int>(t)]
+                                >= send_up_induced[static_cast<int>(t)] + send_down_induced[static_cast<int>(t)]);
+            } else {
+                model.AddConstr(comm_induced[static_cast<int>(t)] >= send_down_step_until[processor][static_cast<int>(t)]
+                                                                         - M * (1 - comm_phase_ends[static_cast<int>(t)]));
+                model.AddConstr(comm_induced[static_cast<int>(t)] >= send_up_step_until[processor][static_cast<int>(t)]
+                                                                         - M * (1 - comm_phase_ends[static_cast<int>(t)]));
             }
         }
     }
 
     // t = 0
     for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
         Expr expr_work;
         Expr expr_send_up;
         Expr expr_send_down;
         for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-            if(compute_exists[node][processor][0])
+            if (compute_exists[node][processor][0]) {
                 expr_work += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][0];
-            if(send_up_exists[node][processor][0])
+            }
+            if (send_up_exists[node][processor][0]) {
                 expr_send_up += instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][0];
-            if(send_down_exists[node][processor][0])
+            }
+            if (send_down_exists[node][processor][0]) {
                 expr_send_down += instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][0];
+            }
         }
 
         model.AddConstr(M * comm_phase_ends[0] + work_step_until[processor][0] >= expr_work);
@@ -900,14 +917,11 @@ void MultiProcessorPebbling<Graph_t>::setupBspVariablesConstraintsObjective(cons
         model.AddConstr(M * comp_phase_ends[0] + send_down_step_until[processor][0] >= expr_send_down);
 
         model.AddConstr(work_induced[0] >= work_step_until[processor][0] - M * (1 - comp_phase_ends[0]));
-        if(up_and_down_cost_summed)
-        {
+        if (up_and_down_cost_summed) {
             model.AddConstr(send_up_induced[0] >= send_up_step_until[processor][0] - M * (1 - comm_phase_ends[0]));
             model.AddConstr(send_down_induced[0] >= send_down_step_until[processor][0] - M * (1 - comm_phase_ends[0]));
             model.AddConstr(comm_induced[0] >= send_up_induced[0] + send_down_induced[0]);
-        }
-        else
-        {
+        } else {
             model.AddConstr(comm_induced[0] >= send_down_step_until[processor][0] - M * (1 - comm_phase_ends[0]));
             model.AddConstr(comm_induced[0] >= send_up_step_until[processor][0] - M * (1 - comm_phase_ends[0]));
         }
@@ -919,25 +933,21 @@ void MultiProcessorPebbling<Graph_t>::setupBspVariablesConstraintsObjective(cons
 
     Expr expr;
     for (unsigned t = 0; t < max_time; t++) {
-        expr += work_induced[static_cast<int>(t)] + instance.synchronisationCosts() * comm_phase_ends[static_cast<int>(t)] +
-                instance.communicationCosts() * comm_induced[static_cast<int>(t)];
+        expr += work_induced[static_cast<int>(t)] + instance.synchronisationCosts() * comm_phase_ends[static_cast<int>(t)]
+                + instance.communicationCosts() * comm_induced[static_cast<int>(t)];
     }
 
     model.SetObjective(expr, COPT_MINIMIZE);
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 void MultiProcessorPebbling<Graph_t>::setupSyncObjective(const BspInstance<Graph_t> &instance) {
-
     Expr expr;
     for (unsigned t = 0; t < max_time; t++) {
-        if(!mergeSteps)
-        {
-            expr += comp_phase[static_cast<int>(t)] + instance.communicationCosts() * send_up_phase[static_cast<int>(t)] +
-                instance.communicationCosts() * send_down_phase[static_cast<int>(t)];
-        }
-        else
-        {
+        if (!mergeSteps) {
+            expr += comp_phase[static_cast<int>(t)] + instance.communicationCosts() * send_up_phase[static_cast<int>(t)]
+                    + instance.communicationCosts() * send_down_phase[static_cast<int>(t)];
+        } else {
             // this objective+parameter combination is not very meaningful, but still defined here to avoid a segfault otherwise
             expr += comp_phase[static_cast<int>(t)] + instance.communicationCosts() * comm_phase[static_cast<int>(t)];
         }
@@ -946,9 +956,8 @@ void MultiProcessorPebbling<Graph_t>::setupSyncObjective(const BspInstance<Graph
     model.SetObjective(expr, COPT_MINIMIZE);
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void MultiProcessorPebbling<Graph_t>::setupAsyncVariablesConstraintsObjective(const BspInstance<Graph_t> &instance) {
-
     std::vector<VarArray> finish_times(instance.numberOfProcessors());
 
     for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
@@ -959,77 +968,86 @@ void MultiProcessorPebbling<Graph_t>::setupAsyncVariablesConstraintsObjective(co
 
     VarArray gets_blue = model.AddVars(static_cast<int>(instance.numberOfVertices()), COPT_CONTINUOUS, "gets_blue");
 
-    const unsigned M = static_cast<unsigned>(instance.numberOfProcessors() * (
-                                                        sumOfVerticesWorkWeights(instance.getComputationalDag()) +
-                                                        sumOfVerticesCommunicationWeights(instance.getComputationalDag()) ) );
+    const unsigned M = static_cast<unsigned>(instance.numberOfProcessors()
+                                             * (sumOfVerticesWorkWeights(instance.getComputationalDag())
+                                                + sumOfVerticesCommunicationWeights(instance.getComputationalDag())));
 
     for (unsigned t = 0; t < max_time; t++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             Expr send_down_step_length;
-            for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-                if(send_down_exists[node][processor][t])
-                    send_down_step_length += instance.communicationCosts() *
-                        instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][static_cast<int>(t)];
-            
             for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+                if (send_down_exists[node][processor][t]) {
+                    send_down_step_length += instance.communicationCosts()
+                                             * instance.getComputationalDag().vertex_comm_weight(node)
+                                             * send_down[node][processor][static_cast<int>(t)];
+                }
+            }
 
-                if(send_up_exists[node][processor][t])
-                    model.AddConstr(gets_blue[static_cast<int>(node)] >= finish_times[processor][static_cast<int>(t)] - (1 - send_up[node][processor][static_cast<int>(t)]) * M);
-                if(send_down_exists[node][processor][t])
-                    model.AddConstr(gets_blue[static_cast<int>(node)] <=
-                                finish_times[processor][static_cast<int>(t)] + (1 - send_down[node][processor][static_cast<int>(t)]) * M - send_down_step_length);
+            for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+                if (send_up_exists[node][processor][t]) {
+                    model.AddConstr(gets_blue[static_cast<int>(node)]
+                                    >= finish_times[processor][static_cast<int>(t)]
+                                           - (1 - send_up[node][processor][static_cast<int>(t)]) * M);
+                }
+                if (send_down_exists[node][processor][t]) {
+                    model.AddConstr(gets_blue[static_cast<int>(node)]
+                                    <= finish_times[processor][static_cast<int>(t)]
+                                           + (1 - send_down[node][processor][static_cast<int>(t)]) * M - send_down_step_length);
+                }
             }
         }
     }
 
     // makespan constraint
     for (unsigned t = 0; t < max_time; t++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             model.AddConstr(makespan >= finish_times[processor][static_cast<int>(t)]);
         }
     }
 
     // t = 0
     for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
         Expr expr;
         for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
-            if(compute_exists[node][processor][0])
+            if (compute_exists[node][processor][0]) {
                 expr += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][0];
+            }
 
-            if(send_up_exists[node][processor][0])
-                expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][0];
+            if (send_up_exists[node][processor][0]) {
+                expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node)
+                        * send_up[node][processor][0];
+            }
 
-            if(send_down_exists[node][processor][0])
-                expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][0];
+            if (send_down_exists[node][processor][0]) {
+                expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node)
+                        * send_down[node][processor][0];
+            }
         }
 
         model.AddConstr(finish_times[processor][0] >= expr);
     }
 
     for (unsigned t = 1; t < max_time; t++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             Expr expr;
             for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
-                if(compute_exists[node][processor][t])
+                if (compute_exists[node][processor][t]) {
                     expr += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][static_cast<int>(t)];
+                }
 
-                if(send_up_exists[node][processor][t])
-                    expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][static_cast<int>(t)];
+                if (send_up_exists[node][processor][t]) {
+                    expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node)
+                            * send_up[node][processor][static_cast<int>(t)];
+                }
 
-                if(send_down_exists[node][processor][t])
-                    expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][static_cast<int>(t)];
+                if (send_down_exists[node][processor][t]) {
+                    expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node)
+                            * send_down[node][processor][static_cast<int>(t)];
+                }
             }
 
-            model.AddConstr(finish_times[processor][static_cast<int>(t)] >= finish_times[processor][static_cast<int>(t) - 1] + expr);
+            model.AddConstr(finish_times[processor][static_cast<int>(t)]
+                            >= finish_times[processor][static_cast<int>(t) - 1] + expr);
         }
     }
 
@@ -1040,15 +1058,11 @@ void MultiProcessorPebbling<Graph_t>::setupAsyncVariablesConstraintsObjective(co
     model.SetObjective(makespan, COPT_MINIMIZE);
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void MultiProcessorPebbling<Graph_t>::WriteSolutionCallback::callback() {
-
     if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) {
-
         try {
-
             if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) {
-
                 best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ);
 
                 //    auto sched = constructBspScheduleFromCallback();
@@ -1059,94 +1073,105 @@ void MultiProcessorPebbling<Graph_t>::WriteSolutionCallback::callback() {
                 counter++;
             }
 
-        } catch (const std::exception &e) {
-        }
+        } catch (const std::exception &e) {}
     }
 };
 
-template<typename Graph_t>
-void MultiProcessorPebbling<Graph_t>::constructPebblingScheduleFromSolution(PebblingSchedule<Graph_t> &schedule)
-{
-    const BspInstance<Graph_t>& instance = schedule.getInstance();
+template <typename Graph_t>
+void MultiProcessorPebbling<Graph_t>::constructPebblingScheduleFromSolution(PebblingSchedule<Graph_t> &schedule) {
+    const BspInstance<Graph_t> &instance = schedule.getInstance();
 
-    std::vector<std::vector<std::set< std::pair<unsigned, vertex_idx> > > > nodes_computed(instance.numberOfProcessors(), std::vector<std::set<std::pair<unsigned, vertex_idx> > >(max_time));
-    std::vector<std::vector<std::deque<vertex_idx> > > nodes_sent_up(instance.numberOfProcessors(), std::vector<std::deque<vertex_idx> >(max_time));
-    std::vector<std::vector<std::deque<vertex_idx> > > nodes_sent_down(instance.numberOfProcessors(), std::vector<std::deque<vertex_idx> >(max_time));
-    std::vector<std::vector<std::set<vertex_idx> > > evicted_after(instance.numberOfProcessors(), std::vector<std::set<vertex_idx> >(max_time));
+    std::vector<std::vector<std::set<std::pair<unsigned, vertex_idx>>>> nodes_computed(
+        instance.numberOfProcessors(), std::vector<std::set<std::pair<unsigned, vertex_idx>>>(max_time));
+    std::vector<std::vector<std::deque<vertex_idx>>> nodes_sent_up(instance.numberOfProcessors(),
+                                                                   std::vector<std::deque<vertex_idx>>(max_time));
+    std::vector<std::vector<std::deque<vertex_idx>>> nodes_sent_down(instance.numberOfProcessors(),
+                                                                     std::vector<std::deque<vertex_idx>>(max_time));
+    std::vector<std::vector<std::set<vertex_idx>>> evicted_after(instance.numberOfProcessors(),
+                                                                 std::vector<std::set<vertex_idx>>(max_time));
 
     // used to remove unneeded steps when a node is sent down and then up (which becomes invalid after reordering the comm phases)
-    std::vector<std::vector<bool > > sent_down_already(instance.numberOfVertices(), std::vector<bool>(instance.numberOfProcessors(), false));
-    std::vector<std::vector<bool > > ignore_red(instance.numberOfVertices(), std::vector<bool>(instance.numberOfProcessors(), false));
+    std::vector<std::vector<bool>> sent_down_already(instance.numberOfVertices(),
+                                                     std::vector<bool>(instance.numberOfProcessors(), false));
+    std::vector<std::vector<bool>> ignore_red(instance.numberOfVertices(), std::vector<bool>(instance.numberOfProcessors(), false));
 
     std::vector<vertex_idx> topOrder = GetTopOrder(instance.getComputationalDag());
     std::vector<unsigned> topOrderPosition(instance.numberOfVertices());
-    for(unsigned index = 0; index < instance.numberOfVertices(); ++index)
+    for (unsigned index = 0; index < instance.numberOfVertices(); ++index) {
         topOrderPosition[topOrder[index]] = index;
+    }
 
     std::vector<bool> empty_step(max_time, true);
-    std::vector<std::vector<unsigned> > step_type_on_proc(instance.numberOfProcessors(), std::vector<unsigned>(max_time, 0));
-    
-    for (unsigned step = 0; step < max_time; step++) 
-        for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++)
-            for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-                if (compute_exists[node][processor][step] && compute[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99)
-                    step_type_on_proc[processor][step] = 1;
-
+    std::vector<std::vector<unsigned>> step_type_on_proc(instance.numberOfProcessors(), std::vector<unsigned>(max_time, 0));
 
     for (unsigned step = 0; step < max_time; step++) {
-
         for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
-
             for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+                if (compute_exists[node][processor][step]
+                    && compute[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
+                    step_type_on_proc[processor][step] = 1;
+                }
+            }
+        }
+    }
 
-                if (step>0 && has_red[node][processor][static_cast<int>(step)-1].Get(COPT_DBLINFO_VALUE) >= .99 && has_red[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) <= .01 && !ignore_red[node][processor])
-                {
-                    for(size_t previous_step = step - 1; previous_step < step; --previous_step)
-                        if(!nodes_computed[processor][previous_step].empty() || !nodes_sent_up[processor][previous_step].empty() || !nodes_sent_down[processor][previous_step].empty() || previous_step == 0)
-                        {
+    for (unsigned step = 0; step < max_time; step++) {
+        for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
+            for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+                if (step > 0 && has_red[node][processor][static_cast<int>(step) - 1].Get(COPT_DBLINFO_VALUE) >= .99
+                    && has_red[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) <= .01
+                    && !ignore_red[node][processor]) {
+                    for (size_t previous_step = step - 1; previous_step < step; --previous_step) {
+                        if (!nodes_computed[processor][previous_step].empty() || !nodes_sent_up[processor][previous_step].empty()
+                            || !nodes_sent_down[processor][previous_step].empty() || previous_step == 0) {
                             evicted_after[processor][previous_step].insert(node);
                             empty_step[previous_step] = false;
                             break;
                         }
+                    }
                 }
-                
-                if (compute_exists[node][processor][step] && compute[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99)
-                {
+
+                if (compute_exists[node][processor][step]
+                    && compute[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
                     nodes_computed[processor][step].emplace(topOrderPosition[node], node);
                     empty_step[step] = false;
                     ignore_red[node][processor] = false;
 
-                    //implicit eviction in case of mergesteps - never having "has_red=1"
-                    if(step + 1 < max_time && has_red[node][processor][static_cast<int>(step)+1].Get(COPT_DBLINFO_VALUE) <= .01)
+                    // implicit eviction in case of mergesteps - never having "has_red=1"
+                    if (step + 1 < max_time && has_red[node][processor][static_cast<int>(step) + 1].Get(COPT_DBLINFO_VALUE) <= .01) {
                         evicted_after[processor][step].insert(node);
+                    }
                 }
 
-                if (send_down_exists[node][processor][step] && send_down[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99)
-                {
+                if (send_down_exists[node][processor][step]
+                    && send_down[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
                     bool keep_step = false;
 
-                    for(unsigned next_step = step+1; next_step < max_time && has_red[node][processor][static_cast<int>(next_step)].Get(COPT_DBLINFO_VALUE) >= .99 ; ++next_step)
-                        if(step_type_on_proc[processor][next_step] == 1)
-                        {
+                    for (unsigned next_step = step + 1;
+                         next_step < max_time
+                         && has_red[node][processor][static_cast<int>(next_step)].Get(COPT_DBLINFO_VALUE) >= .99;
+                         ++next_step) {
+                        if (step_type_on_proc[processor][next_step] == 1) {
                             keep_step = true;
                             break;
                         }
+                    }
 
-                    if(keep_step)
-                    {
+                    if (keep_step) {
                         nodes_sent_down[processor][step].push_back(node);
                         empty_step[step] = false;
                         step_type_on_proc[processor][step] = 3;
                         ignore_red[node][processor] = false;
-                    }
-                    else
+                    } else {
                         ignore_red[node][processor] = true;
+                    }
 
                     sent_down_already[node][processor] = true;
                 }
 
-                if (send_up_exists[node][processor][step] && send_up[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99 && !sent_down_already[node][processor])
-                {
+                if (send_up_exists[node][processor][step]
+                    && send_up[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99
+                    && !sent_down_already[node][processor]) {
                     nodes_sent_up[processor][step].push_back(node);
                     empty_step[step] = false;
                     step_type_on_proc[processor][step] = 2;
@@ -1156,60 +1181,57 @@ void MultiProcessorPebbling<Graph_t>::constructPebblingScheduleFromSolution(Pebb
     }
 
     // components of the final PebblingSchedule - the first two dimensions are always processor and superstep
-    std::vector<std::vector<std::vector<vertex_idx> > > compute_steps_per_supstep(instance.numberOfProcessors());
-    std::vector<std::vector<std::vector<std::vector<vertex_idx> > > > nodes_evicted_after_compute(instance.numberOfProcessors());
-    std::vector<std::vector<std::vector<vertex_idx> > > nodes_sent_up_in_supstep(instance.numberOfProcessors());
-    std::vector<std::vector<std::vector<vertex_idx> > > nodes_sent_down_in_supstep(instance.numberOfProcessors());
-    std::vector<std::vector<std::vector<vertex_idx> > > nodes_evicted_in_comm_phase(instance.numberOfProcessors());
+    std::vector<std::vector<std::vector<vertex_idx>>> compute_steps_per_supstep(instance.numberOfProcessors());
+    std::vector<std::vector<std::vector<std::vector<vertex_idx>>>> nodes_evicted_after_compute(instance.numberOfProcessors());
+    std::vector<std::vector<std::vector<vertex_idx>>> nodes_sent_up_in_supstep(instance.numberOfProcessors());
+    std::vector<std::vector<std::vector<vertex_idx>>> nodes_sent_down_in_supstep(instance.numberOfProcessors());
+    std::vector<std::vector<std::vector<vertex_idx>>> nodes_evicted_in_comm_phase(instance.numberOfProcessors());
 
     // edge case: check if an extra superstep must be added in the beginning to evict values that are initially in cache
     bool needs_evict_step_in_beginning = false;
-    for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-        for (unsigned step = 0; step < max_time; step++)
-        {
-            if(step_type_on_proc[proc][step] == 0 && !evicted_after[proc][step].empty())
-            {
+    for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+        for (unsigned step = 0; step < max_time; step++) {
+            if (step_type_on_proc[proc][step] == 0 && !evicted_after[proc][step].empty()) {
                 needs_evict_step_in_beginning = true;
                 break;
-            }
-            else if(step_type_on_proc[proc][step]>0)
+            } else if (step_type_on_proc[proc][step] > 0) {
                 break;
+            }
         }
+    }
 
     // create the actual PebblingSchedule - iterating over the steps
     unsigned superstepIndex = 0;
-    if(synchronous)
-    {
+    if (synchronous) {
         bool in_comm = true;
         superstepIndex = UINT_MAX;
 
-        if(needs_evict_step_in_beginning)
-        {
+        if (needs_evict_step_in_beginning) {
             // artificially insert comm step in beginning, if it would start with compute otherwise
             bool begins_with_compute = false;
-            for (unsigned step = 0; step < max_time; step++)
-            {
+            for (unsigned step = 0; step < max_time; step++) {
                 bool is_comp = false, is_comm = false;
-                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-                {
-                    if(step_type_on_proc[proc][step] == 1)
+                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+                    if (step_type_on_proc[proc][step] == 1) {
                         is_comp = true;
-                    if(step_type_on_proc[proc][step] > 1)
+                    }
+                    if (step_type_on_proc[proc][step] > 1) {
                         is_comm = true;
+                    }
                 }
-                if(is_comp)
+                if (is_comp) {
                     begins_with_compute = true;
-                if(is_comp || is_comm)
+                }
+                if (is_comp || is_comm) {
                     break;
+                }
             }
-            
-            if(begins_with_compute)
-            {
+
+            if (begins_with_compute) {
                 superstepIndex = 0;
-                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-                {
+                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
                     compute_steps_per_supstep[proc].push_back(std::vector<vertex_idx>());
-                    nodes_evicted_after_compute[proc].push_back(std::vector<std::vector<vertex_idx> >());
+                    nodes_evicted_after_compute[proc].push_back(std::vector<std::vector<vertex_idx>>());
                     nodes_sent_up_in_supstep[proc].push_back(std::vector<vertex_idx>());
                     nodes_sent_down_in_supstep[proc].push_back(std::vector<vertex_idx>());
                     nodes_evicted_in_comm_phase[proc].push_back(std::vector<vertex_idx>());
@@ -1218,23 +1240,21 @@ void MultiProcessorPebbling<Graph_t>::constructPebblingScheduleFromSolution(Pebb
         }
 
         // process steps
-        for (unsigned step = 0; step < max_time; step++)
-        {
-            if(empty_step[step])
+        for (unsigned step = 0; step < max_time; step++) {
+            if (empty_step[step]) {
                 continue;
+            }
 
             unsigned step_type = 0;
-            for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
+            for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
                 step_type = std::max(step_type, step_type_on_proc[proc][step]);
+            }
 
-            if (step_type == 1)
-            {
-                if(in_comm)
-                {
-                    for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-                    {
+            if (step_type == 1) {
+                if (in_comm) {
+                    for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
                         compute_steps_per_supstep[proc].push_back(std::vector<vertex_idx>());
-                        nodes_evicted_after_compute[proc].push_back(std::vector<std::vector<vertex_idx> >());
+                        nodes_evicted_after_compute[proc].push_back(std::vector<std::vector<vertex_idx>>());
                         nodes_sent_up_in_supstep[proc].push_back(std::vector<vertex_idx>());
                         nodes_sent_down_in_supstep[proc].push_back(std::vector<vertex_idx>());
                         nodes_evicted_in_comm_phase[proc].push_back(std::vector<vertex_idx>());
@@ -1242,34 +1262,27 @@ void MultiProcessorPebbling<Graph_t>::constructPebblingScheduleFromSolution(Pebb
                     ++superstepIndex;
                     in_comm = false;
                 }
-                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-                {
-                    for(auto index_and_node : nodes_computed[proc][step])
-                    {
+                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+                    for (auto index_and_node : nodes_computed[proc][step]) {
                         compute_steps_per_supstep[proc][superstepIndex].push_back(index_and_node.second);
                         nodes_evicted_after_compute[proc][superstepIndex].push_back(std::vector<vertex_idx>());
                     }
-                    for(vertex_idx node : evicted_after[proc][step])
-                    {
-                        if(!nodes_evicted_after_compute[proc][superstepIndex].empty())
+                    for (vertex_idx node : evicted_after[proc][step]) {
+                        if (!nodes_evicted_after_compute[proc][superstepIndex].empty()) {
                             nodes_evicted_after_compute[proc][superstepIndex].back().push_back(node);
-                        else
-                        {
+                        } else {
                             // can only happen in special case: eviction in the very beginning
                             nodes_evicted_in_comm_phase[proc][0].push_back(node);
                         }
                     }
                 }
             }
-            
-            if (step_type == 2 || step_type == 3)
-            {
-                if(superstepIndex == UINT_MAX)
-                {
-                    for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-                    {
+
+            if (step_type == 2 || step_type == 3) {
+                if (superstepIndex == UINT_MAX) {
+                    for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
                         compute_steps_per_supstep[proc].push_back(std::vector<vertex_idx>());
-                        nodes_evicted_after_compute[proc].push_back(std::vector<std::vector<vertex_idx> >());
+                        nodes_evicted_after_compute[proc].push_back(std::vector<std::vector<vertex_idx>>());
                         nodes_sent_up_in_supstep[proc].push_back(std::vector<vertex_idx>());
                         nodes_sent_down_in_supstep[proc].push_back(std::vector<vertex_idx>());
                         nodes_evicted_in_comm_phase[proc].push_back(std::vector<vertex_idx>());
@@ -1278,91 +1291,93 @@ void MultiProcessorPebbling<Graph_t>::constructPebblingScheduleFromSolution(Pebb
                 }
 
                 in_comm = true;
-                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-                {
-                    for(vertex_idx node : nodes_sent_up[proc][step])
+                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+                    for (vertex_idx node : nodes_sent_up[proc][step]) {
                         nodes_sent_up_in_supstep[proc][superstepIndex].push_back(node);
-                    for(vertex_idx node : evicted_after[proc][step])
+                    }
+                    for (vertex_idx node : evicted_after[proc][step]) {
                         nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node);
-                    for(vertex_idx node : nodes_sent_down[proc][step])
+                    }
+                    for (vertex_idx node : nodes_sent_down[proc][step]) {
                         nodes_sent_down_in_supstep[proc][superstepIndex].push_back(node);
+                    }
                 }
             }
         }
-    }
-    else
-    {
+    } else {
         std::vector<unsigned> step_idx_on_proc(instance.numberOfProcessors(), 0);
 
         std::vector<bool> already_has_blue(instance.numberOfVertices(), false);
-        if(need_to_load_inputs)
-            for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-                if(instance.getComputationalDag().in_degree(node) == 0)
+        if (need_to_load_inputs) {
+            for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+                if (instance.getComputationalDag().in_degree(node) == 0) {
                     already_has_blue[node] = true;
+                }
+            }
+        }
 
         std::vector<bool> proc_finished(instance.numberOfProcessors(), false);
         unsigned nr_proc_finished = 0;
-        while(nr_proc_finished < instance.numberOfProcessors())
-        {
+        while (nr_proc_finished < instance.numberOfProcessors()) {
             // preliminary sweep of superstep, to see if we need to wait for other processors
             std::vector<unsigned> idx_limit_on_proc = step_idx_on_proc;
 
             // first add compute steps
-            if(!needs_evict_step_in_beginning || superstepIndex > 0)
-            {
-                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-                    while(idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] <= 1)
+            if (!needs_evict_step_in_beginning || superstepIndex > 0) {
+                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+                    while (idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] <= 1) {
                         ++idx_limit_on_proc[proc];
+                    }
+                }
             }
 
             // then add communications step until possible (note - they might not be valid if all put into a single superstep!)
             std::set<vertex_idx> new_blues;
             bool still_making_progress = true;
-            while(still_making_progress)
-            {
+            while (still_making_progress) {
                 still_making_progress = false;
-                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-                    while(idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] != 1)
-                    {
+                for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+                    while (idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] != 1) {
                         bool accept_step = true;
-                        for(vertex_idx node : nodes_sent_down[proc][idx_limit_on_proc[proc]])
-                            if(!already_has_blue[node] && new_blues.find(node) == new_blues.end())
+                        for (vertex_idx node : nodes_sent_down[proc][idx_limit_on_proc[proc]]) {
+                            if (!already_has_blue[node] && new_blues.find(node) == new_blues.end()) {
                                 accept_step = false;
-                        
-                        if(!accept_step)
+                            }
+                        }
+
+                        if (!accept_step) {
                             break;
+                        }
 
-                        for(vertex_idx node : nodes_sent_up[proc][idx_limit_on_proc[proc]])
-                            if(!already_has_blue[node])
+                        for (vertex_idx node : nodes_sent_up[proc][idx_limit_on_proc[proc]]) {
+                            if (!already_has_blue[node]) {
                                 new_blues.insert(node);
-                        
+                            }
+                        }
+
                         still_making_progress = true;
                         ++idx_limit_on_proc[proc];
-                    }   
+                    }
+                }
             }
 
             // actually process the superstep
-            for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++)
-            {
+            for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
                 compute_steps_per_supstep[proc].push_back(std::vector<vertex_idx>());
-                nodes_evicted_after_compute[proc].push_back(std::vector<std::vector<vertex_idx> >());
+                nodes_evicted_after_compute[proc].push_back(std::vector<std::vector<vertex_idx>>());
                 nodes_sent_up_in_supstep[proc].push_back(std::vector<vertex_idx>());
                 nodes_sent_down_in_supstep[proc].push_back(std::vector<vertex_idx>());
                 nodes_evicted_in_comm_phase[proc].push_back(std::vector<vertex_idx>());
 
-                while(step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] <= 1)
-                {
-                    for(auto index_and_node : nodes_computed[proc][step_idx_on_proc[proc]])
-                    {
+                while (step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] <= 1) {
+                    for (auto index_and_node : nodes_computed[proc][step_idx_on_proc[proc]]) {
                         compute_steps_per_supstep[proc][superstepIndex].push_back(index_and_node.second);
                         nodes_evicted_after_compute[proc][superstepIndex].push_back(std::vector<vertex_idx>());
                     }
-                    for(vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]])
-                    {
-                        if(!nodes_evicted_after_compute[proc][superstepIndex].empty())
+                    for (vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]]) {
+                        if (!nodes_evicted_after_compute[proc][superstepIndex].empty()) {
                             nodes_evicted_after_compute[proc][superstepIndex].back().push_back(node);
-                        else
-                        {
+                        } else {
                             // can only happen in special case: eviction in the very beginning
                             nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node);
                         }
@@ -1370,22 +1385,21 @@ void MultiProcessorPebbling<Graph_t>::constructPebblingScheduleFromSolution(Pebb
 
                     ++step_idx_on_proc[proc];
                 }
-                while(step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] != 1)
-                {
-                    for(vertex_idx node : nodes_sent_up[proc][step_idx_on_proc[proc]])
-                    {
+                while (step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] != 1) {
+                    for (vertex_idx node : nodes_sent_up[proc][step_idx_on_proc[proc]]) {
                         nodes_sent_up_in_supstep[proc][superstepIndex].push_back(node);
                         already_has_blue[node] = true;
                     }
-                    for(vertex_idx node : nodes_sent_down[proc][step_idx_on_proc[proc]])
+                    for (vertex_idx node : nodes_sent_down[proc][step_idx_on_proc[proc]]) {
                         nodes_sent_down_in_supstep[proc][superstepIndex].push_back(node);
-                    for(vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]])
+                    }
+                    for (vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]]) {
                         nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node);
+                    }
 
                     ++step_idx_on_proc[proc];
                 }
-                if(step_idx_on_proc[proc] == max_time && !proc_finished[proc])
-                {
+                if (step_idx_on_proc[proc] == max_time && !proc_finished[proc]) {
                     proc_finished[proc] = true;
                     ++nr_proc_finished;
                 }
@@ -1394,217 +1408,240 @@ void MultiProcessorPebbling<Graph_t>::constructPebblingScheduleFromSolution(Pebb
         }
     }
 
-    std::cout<<"MPP ILP best solution value: "<<model.GetDblAttr(COPT_DBLATTR_BESTOBJ)<<", best lower bound: "<<model.GetDblAttr(COPT_DBLATTR_BESTBND)<<std::endl;
-
-    schedule = PebblingSchedule<Graph_t>(instance, compute_steps_per_supstep, nodes_evicted_after_compute,
-                            nodes_sent_up_in_supstep, nodes_sent_down_in_supstep, nodes_evicted_in_comm_phase, needs_blue_at_end, has_red_in_beginning, need_to_load_inputs); 
+    std::cout << "MPP ILP best solution value: " << model.GetDblAttr(COPT_DBLATTR_BESTOBJ)
+              << ", best lower bound: " << model.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl;
+
+    schedule = PebblingSchedule<Graph_t>(instance,
+                                         compute_steps_per_supstep,
+                                         nodes_evicted_after_compute,
+                                         nodes_sent_up_in_supstep,
+                                         nodes_sent_down_in_supstep,
+                                         nodes_evicted_in_comm_phase,
+                                         needs_blue_at_end,
+                                         has_red_in_beginning,
+                                         need_to_load_inputs);
 }
 
-template<typename Graph_t>
-void MultiProcessorPebbling<Graph_t>::setInitialSolution(const BspInstance<Graph_t> &instance,
-                                                const std::vector<std::vector<std::vector<vertex_idx> > >& computeSteps,
-                                                const std::vector<std::vector<std::vector<vertex_idx> > >& sendUpSteps,
-                                                const std::vector<std::vector<std::vector<vertex_idx> > >& sendDownSteps,
-                                                const std::vector<std::vector<std::vector<vertex_idx> > >& nodesEvictedAfterStep)
-{
+template <typename Graph_t>
+void MultiProcessorPebbling<Graph_t>::setInitialSolution(
+    const BspInstance<Graph_t> &instance,
+    const std::vector<std::vector<std::vector<vertex_idx>>> &computeSteps,
+    const std::vector<std::vector<std::vector<vertex_idx>>> &sendUpSteps,
+    const std::vector<std::vector<std::vector<vertex_idx>>> &sendDownSteps,
+    const std::vector<std::vector<std::vector<vertex_idx>>> &nodesEvictedAfterStep) {
     const unsigned N = static_cast<unsigned>(instance.numberOfVertices());
 
     std::vector<bool> in_slow_mem(N, false);
-    if(need_to_load_inputs)
-        for(vertex_idx node=0; node < N; ++node)
-            if(instance.getComputationalDag().in_degree(node) == 0)
+    if (need_to_load_inputs) {
+        for (vertex_idx node = 0; node < N; ++node) {
+            if (instance.getComputationalDag().in_degree(node) == 0) {
                 in_slow_mem[node] = true;
+            }
+        }
+    }
 
-    std::vector<std::vector<unsigned> > in_fast_mem(N, std::vector<unsigned>(instance.numberOfProcessors(), false));
-    if(!has_red_in_beginning.empty())
-        for(unsigned proc=0; proc<instance.numberOfProcessors(); ++proc)
-            for(vertex_idx node : has_red_in_beginning[proc])
-                in_fast_mem[node][proc] = true;            
+    std::vector<std::vector<unsigned>> in_fast_mem(N, std::vector<unsigned>(instance.numberOfProcessors(), false));
+    if (!has_red_in_beginning.empty()) {
+        for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
+            for (vertex_idx node : has_red_in_beginning[proc]) {
+                in_fast_mem[node][proc] = true;
+            }
+        }
+    }
 
     unsigned step = 0, new_step_idx = 0;
-    for(; step < computeSteps[0].size(); ++step)
-    {
-        for(vertex_idx node=0; node < N; ++node)
-        {
-            if(has_blue_exists[node][new_step_idx])
+    for (; step < computeSteps[0].size(); ++step) {
+        for (vertex_idx node = 0; node < N; ++node) {
+            if (has_blue_exists[node][new_step_idx]) {
                 model.SetMipStart(has_blue[node][static_cast<int>(new_step_idx)], static_cast<int>(in_slow_mem[node]));
-            for(unsigned proc=0; proc<instance.numberOfProcessors(); ++proc)
+            }
+            for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
                 model.SetMipStart(has_red[node][proc][static_cast<int>(new_step_idx)], static_cast<int>(in_fast_mem[node][proc]));
+            }
         }
 
-        if(restrict_step_types)
-        {
+        if (restrict_step_types) {
             // align step number with step type cycle's phase, if needed
             bool skip_step = true;
-            while(skip_step)
-            {
+            while (skip_step) {
                 skip_step = false;
                 bool is_compute = false, is_send_up = false, is_send_down = false;
-                for(unsigned proc=0; proc<instance.numberOfProcessors(); ++proc)
-                {
-                    if(!computeSteps[proc][step].empty())
+                for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
+                    if (!computeSteps[proc][step].empty()) {
                         is_compute = true;
-                    if(!sendUpSteps[proc][step].empty())
+                    }
+                    if (!sendUpSteps[proc][step].empty()) {
                         is_send_up = true;
-                    if(!sendDownSteps[proc][step].empty())
+                    }
+                    if (!sendDownSteps[proc][step].empty()) {
                         is_send_down = true;
+                    }
                 }
-                
-                bool send_up_step_idx = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1))
-                                        || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle));
-                bool send_down_step_idx = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == 0))
-                                        || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1));
 
-                if(is_compute && (send_up_step_idx || send_down_step_idx))
+                bool send_up_step_idx
+                    = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1))
+                      || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle));
+                bool send_down_step_idx
+                    = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == 0))
+                      || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1));
+
+                if (is_compute && (send_up_step_idx || send_down_step_idx)) {
                     skip_step = true;
-                if(is_send_up && !send_up_step_idx)
+                }
+                if (is_send_up && !send_up_step_idx) {
                     skip_step = true;
-                if(is_send_down && !send_down_step_idx)
+                }
+                if (is_send_down && !send_down_step_idx) {
                     skip_step = true;
-                
-                if(skip_step)
-                {
+                }
+
+                if (skip_step) {
                     ++new_step_idx;
-                    for(vertex_idx node=0; node < N; ++node)
-                    {
-                        if(has_blue_exists[node][new_step_idx])
+                    for (vertex_idx node = 0; node < N; ++node) {
+                        if (has_blue_exists[node][new_step_idx]) {
                             model.SetMipStart(has_blue[node][static_cast<int>(new_step_idx)], static_cast<int>(in_slow_mem[node]));
-                        for(unsigned proc=0; proc<instance.numberOfProcessors(); ++proc)
-                            model.SetMipStart(has_red[node][proc][static_cast<int>(new_step_idx)], static_cast<int>(in_fast_mem[node][proc]));
+                        }
+                        for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
+                            model.SetMipStart(has_red[node][proc][static_cast<int>(new_step_idx)],
+                                              static_cast<int>(in_fast_mem[node][proc]));
+                        }
                     }
                 }
             }
         }
 
-        for(unsigned proc=0; proc<instance.numberOfProcessors(); ++proc)
-        {
+        for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
             std::vector<bool> value_of_node(N, false);
-            for(vertex_idx node : computeSteps[proc][step])
-            {
+            for (vertex_idx node : computeSteps[proc][step]) {
                 value_of_node[node] = true;
-                if(compute_exists[node][proc][new_step_idx])
+                if (compute_exists[node][proc][new_step_idx]) {
                     model.SetMipStart(compute[node][proc][static_cast<int>(new_step_idx)], 1);
+                }
                 in_fast_mem[node][proc] = true;
             }
-            for(vertex_idx node : computeSteps[proc][step])
-            {
-                if(!value_of_node[node])
-                {
-                    if(compute_exists[node][proc][new_step_idx])
+            for (vertex_idx node : computeSteps[proc][step]) {
+                if (!value_of_node[node]) {
+                    if (compute_exists[node][proc][new_step_idx]) {
                         model.SetMipStart(compute[node][proc][static_cast<int>(new_step_idx)], 0);
-                }
-                else
+                    }
+                } else {
                     value_of_node[node] = false;
+                }
             }
 
-            for(vertex_idx node : sendUpSteps[proc][step])
-            {
+            for (vertex_idx node : sendUpSteps[proc][step]) {
                 value_of_node[node] = true;
-                if(send_up_exists[node][proc][new_step_idx])
+                if (send_up_exists[node][proc][new_step_idx]) {
                     model.SetMipStart(send_up[node][proc][static_cast<int>(new_step_idx)], 1);
+                }
                 in_slow_mem[node] = true;
             }
-            for(vertex_idx node : sendUpSteps[proc][step])
-            {
-                if(!value_of_node[node])
-                {
-                    if(send_up_exists[node][proc][new_step_idx])
+            for (vertex_idx node : sendUpSteps[proc][step]) {
+                if (!value_of_node[node]) {
+                    if (send_up_exists[node][proc][new_step_idx]) {
                         model.SetMipStart(send_up[node][proc][static_cast<int>(new_step_idx)], 0);
-                }
-                else
+                    }
+                } else {
                     value_of_node[node] = false;
+                }
             }
 
-            for(vertex_idx node : sendDownSteps[proc][step])
-            {
+            for (vertex_idx node : sendDownSteps[proc][step]) {
                 value_of_node[node] = true;
-                if(send_down_exists[node][proc][new_step_idx])
+                if (send_down_exists[node][proc][new_step_idx]) {
                     model.SetMipStart(send_down[node][proc][static_cast<int>(new_step_idx)], 1);
+                }
                 in_fast_mem[node][proc] = true;
             }
-            for(vertex_idx node : sendDownSteps[proc][step])
-            {
-                if(!value_of_node[node])
-                {
-                    if(send_down_exists[node][proc][new_step_idx])
+            for (vertex_idx node : sendDownSteps[proc][step]) {
+                if (!value_of_node[node]) {
+                    if (send_down_exists[node][proc][new_step_idx]) {
                         model.SetMipStart(send_down[node][proc][static_cast<int>(new_step_idx)], 0);
-                }
-                else
+                    }
+                } else {
                     value_of_node[node] = false;
+                }
             }
 
-            for(vertex_idx node : nodesEvictedAfterStep[proc][step])
+            for (vertex_idx node : nodesEvictedAfterStep[proc][step]) {
                 in_fast_mem[node][proc] = false;
-            
+            }
         }
         ++new_step_idx;
     }
-    for(; new_step_idx < max_time; ++new_step_idx)
-    {
-        for(vertex_idx node=0; node < N; ++node)
-        {
-            if(has_blue_exists[node][new_step_idx])
+    for (; new_step_idx < max_time; ++new_step_idx) {
+        for (vertex_idx node = 0; node < N; ++node) {
+            if (has_blue_exists[node][new_step_idx]) {
                 model.SetMipStart(has_blue[node][static_cast<int>(new_step_idx)], static_cast<int>(in_slow_mem[node]));
-            for(unsigned proc=0; proc < instance.numberOfProcessors(); ++proc)
-            {
+            }
+            for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
                 model.SetMipStart(has_red[node][proc][static_cast<int>(new_step_idx)], 0);
-                if(compute_exists[node][proc][new_step_idx])
+                if (compute_exists[node][proc][new_step_idx]) {
                     model.SetMipStart(compute[node][proc][static_cast<int>(new_step_idx)], 0);
-                if(send_up_exists[node][proc][new_step_idx])
+                }
+                if (send_up_exists[node][proc][new_step_idx]) {
                     model.SetMipStart(send_up[node][proc][static_cast<int>(new_step_idx)], 0);
-                if(send_down_exists[node][proc][new_step_idx])
+                }
+                if (send_down_exists[node][proc][new_step_idx]) {
                     model.SetMipStart(send_down[node][proc][static_cast<int>(new_step_idx)], 0);
+                }
             }
         }
     }
     model.LoadMipStart();
 }
 
-template<typename Graph_t>
-unsigned MultiProcessorPebbling<Graph_t>::computeMaxTimeForInitialSolution(const BspInstance<Graph_t> &instance,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& computeSteps,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& sendUpSteps,
-                            const std::vector<std::vector<std::vector<vertex_idx> > >& sendDownSteps) const
-{
-    if(!restrict_step_types)
+template <typename Graph_t>
+unsigned MultiProcessorPebbling<Graph_t>::computeMaxTimeForInitialSolution(
+    const BspInstance<Graph_t> &instance,
+    const std::vector<std::vector<std::vector<vertex_idx>>> &computeSteps,
+    const std::vector<std::vector<std::vector<vertex_idx>>> &sendUpSteps,
+    const std::vector<std::vector<std::vector<vertex_idx>>> &sendDownSteps) const {
+    if (!restrict_step_types) {
         return static_cast<unsigned>(computeSteps[0].size()) + 3;
-    
+    }
+
     unsigned step = 0, new_step_idx = 0;
-    for(; step < computeSteps[0].size(); ++step)
-    {
+    for (; step < computeSteps[0].size(); ++step) {
         // align step number with step type cycle's phase, if needed
         bool skip_step = true;
-        while(skip_step)
-        {
+        while (skip_step) {
             skip_step = false;
             bool is_compute = false, is_send_up = false, is_send_down = false;
-            for(unsigned proc=0; proc<instance.numberOfProcessors(); ++proc)
-            {
-                if(!computeSteps[proc][step].empty())
+            for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) {
+                if (!computeSteps[proc][step].empty()) {
                     is_compute = true;
-                if(!sendUpSteps[proc][step].empty())
+                }
+                if (!sendUpSteps[proc][step].empty()) {
                     is_send_up = true;
-                if(!sendDownSteps[proc][step].empty())
+                }
+                if (!sendDownSteps[proc][step].empty()) {
                     is_send_down = true;
+                }
             }
 
-            bool send_up_step_idx = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1))
-                                        || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle));
-            bool send_down_step_idx = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == 0))
-                                    || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1));
-
+            bool send_up_step_idx
+                = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1))
+                  || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle));
+            bool send_down_step_idx
+                = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == 0))
+                  || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1));
 
-            if(is_compute && (send_up_step_idx || send_down_step_idx))
+            if (is_compute && (send_up_step_idx || send_down_step_idx)) {
                 skip_step = true;
-            if(is_send_up && !send_up_step_idx)
+            }
+            if (is_send_up && !send_up_step_idx) {
                 skip_step = true;
-            if(is_send_down && !send_down_step_idx)
+            }
+            if (is_send_down && !send_down_step_idx) {
                 skip_step = true;
-            
-            if(skip_step)
+            }
+
+            if (skip_step) {
                 ++new_step_idx;
+            }
         }
-            
+
         ++new_step_idx;
     }
 
@@ -1612,24 +1649,25 @@ unsigned MultiProcessorPebbling<Graph_t>::computeMaxTimeForInitialSolution(const
     return new_step_idx;
 }
 
-template<typename Graph_t>
-bool MultiProcessorPebbling<Graph_t>::hasEmptyStep(const BspInstance<Graph_t> &instance)
-{
-    for (unsigned step = 0; step < max_time; ++step)
-    {
+template <typename Graph_t>
+bool MultiProcessorPebbling<Graph_t>::hasEmptyStep(const BspInstance<Graph_t> &instance) {
+    for (unsigned step = 0; step < max_time; ++step) {
         bool empty = true;
-        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-            for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++)
-            {
-                if((compute_exists[node][processor][step] && compute[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) || 
-                   (send_up_exists[node][processor][step] && send_up[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) ||
-                    (send_down_exists[node][processor][step] && send_down[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99 ))
+        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+            for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
+                if ((compute_exists[node][processor][step] && compute[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99)
+                    || (send_up_exists[node][processor][step] && send_up[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99)
+                    || (send_down_exists[node][processor][step]
+                        && send_down[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99)) {
                     empty = false;
+                }
             }
-        if(empty)
+        }
+        if (empty) {
             return true;
+        }
     }
     return false;
 }
 
-}
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp b/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp
index 1ac8561b..0544eaab 100644
--- a/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp
+++ b/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp
@@ -13,27 +13,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
 
-#include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
+#include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/graph_algorithms/subgraph_algorithms.hpp"
 #include "osp/pebbling/PebblingSchedule.hpp"
+#include "osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp"
 #include "osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp"
 #include "osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp"
-#include "osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp"
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
-#include "osp/graph_algorithms/subgraph_algorithms.hpp"
 
-namespace osp{
+namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class PebblingPartialILP : public Scheduler<Graph_t> {
-
     static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs.");
-    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>, "PebblingSchedule requires work and comm. weights to have the same type.");
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "PebblingSchedule requires work and comm. weights to have the same type.");
 
     using vertex_idx = vertex_idx_t<Graph_t>;
     using cost_type = v_workw_t<Graph_t>;
@@ -67,21 +67,32 @@ class PebblingPartialILP : public Scheduler<Graph_t> {
 
     // getters and setters for problem parameters
     inline std::pair<unsigned, unsigned> getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); }
-    inline void setMinSize(const unsigned min_size) {minPartitionSize = min_size; maxPartitionSize = 2*min_size; }
-    inline void setMinAndMaxSize(const std::pair<unsigned, unsigned> min_and_max) {minPartitionSize = min_and_max.first; maxPartitionSize = min_and_max.second; }
-    inline void setAsync(const bool async_) {asynchronous = async_; }
-    inline void setSecondsForSubILP(const unsigned seconds_) {time_seconds_for_subILPs = seconds_; }
-    inline void setVerbose(const bool verbose_) {verbose = verbose_; }
-};
 
-template<typename Graph_t>
-RETURN_STATUS PebblingPartialILP<Graph_t>::computePebbling(PebblingSchedule<Graph_t> &schedule){
+    inline void setMinSize(const unsigned min_size) {
+        minPartitionSize = min_size;
+        maxPartitionSize = 2 * min_size;
+    }
+
+    inline void setMinAndMaxSize(const std::pair<unsigned, unsigned> min_and_max) {
+        minPartitionSize = min_and_max.first;
+        maxPartitionSize = min_and_max.second;
+    }
+
+    inline void setAsync(const bool async_) { asynchronous = async_; }
 
-    const BspInstance<Graph_t>& instance = schedule.getInstance();
+    inline void setSecondsForSubILP(const unsigned seconds_) { time_seconds_for_subILPs = seconds_; }
 
-    if(!PebblingSchedule<Graph_t>::hasValidSolution(instance))
+    inline void setVerbose(const bool verbose_) { verbose = verbose_; }
+};
+
+template <typename Graph_t>
+RETURN_STATUS PebblingPartialILP<Graph_t>::computePebbling(PebblingSchedule<Graph_t> &schedule) {
+    const BspInstance<Graph_t> &instance = schedule.getInstance();
+
+    if (!PebblingSchedule<Graph_t>::hasValidSolution(instance)) {
         return RETURN_STATUS::ERROR;
-    
+    }
+
     // STEP 1: divide DAG acyclicly with partitioning ILP
 
     AcyclicDagDivider<Graph_t> dag_divider;
@@ -94,130 +105,141 @@ RETURN_STATUS PebblingPartialILP<Graph_t>::computePebbling(PebblingSchedule<Grap
 
     // STEP 2: develop high-level multischedule on parts
 
-    BspInstance<Graph_t> contracted_instance(contracted_dag, instance.getArchitecture(), instance.getNodeProcessorCompatibilityMatrix());
+    BspInstance<Graph_t> contracted_instance(
+        contracted_dag, instance.getArchitecture(), instance.getNodeProcessorCompatibilityMatrix());
 
     SubproblemMultiScheduling<Graph_t> multi_scheduler;
-    std::vector<std::set<unsigned> > processors_to_parts_and_types;
+    std::vector<std::set<unsigned>> processors_to_parts_and_types;
     multi_scheduler.computeMultiSchedule(contracted_instance, processors_to_parts_and_types);
 
-    std::vector<std::set<unsigned> > processors_to_parts(nr_parts);
-    for(unsigned part = 0; part < nr_parts; ++part)
-        for(unsigned type = 0; type < instance.getComputationalDag().num_vertex_types(); ++type)
-            if(part_and_nodetype_to_new_index.find({part, type}) != part_and_nodetype_to_new_index.end())
-            {
+    std::vector<std::set<unsigned>> processors_to_parts(nr_parts);
+    for (unsigned part = 0; part < nr_parts; ++part) {
+        for (unsigned type = 0; type < instance.getComputationalDag().num_vertex_types(); ++type) {
+            if (part_and_nodetype_to_new_index.find({part, type}) != part_and_nodetype_to_new_index.end()) {
                 unsigned new_index = part_and_nodetype_to_new_index[{part, type}];
-                for(unsigned proc : processors_to_parts_and_types[new_index])
+                for (unsigned proc : processors_to_parts_and_types[new_index]) {
                     processors_to_parts[part].insert(proc);
+                }
             }
+        }
+    }
 
     // AUX: check for isomorphism
 
     // create set of nodes & external sources for all parts, and the nodes that need to have blue pebble at the end
-    std::vector<std::set<vertex_idx> > nodes_in_part(nr_parts), extra_sources(nr_parts);
-    std::vector<std::map<vertex_idx, vertex_idx> > original_node_id(nr_parts);
-    std::vector<std::map<unsigned, unsigned> > original_proc_id(nr_parts);
-    for(vertex_idx node = 0; node < instance.numberOfVertices(); ++node)
-    {
-        if(instance.getComputationalDag().in_degree(node) > 0)
+    std::vector<std::set<vertex_idx>> nodes_in_part(nr_parts), extra_sources(nr_parts);
+    std::vector<std::map<vertex_idx, vertex_idx>> original_node_id(nr_parts);
+    std::vector<std::map<unsigned, unsigned>> original_proc_id(nr_parts);
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) {
+        if (instance.getComputationalDag().in_degree(node) > 0) {
             nodes_in_part[assignment_to_parts[node]].insert(node);
-        else
+        } else {
             extra_sources[assignment_to_parts[node]].insert(node);
-        for (const vertex_idx &pred : instance.getComputationalDag().parents(node))
-            if(assignment_to_parts[node] != assignment_to_parts[pred])
+        }
+        for (const vertex_idx &pred : instance.getComputationalDag().parents(node)) {
+            if (assignment_to_parts[node] != assignment_to_parts[pred]) {
                 extra_sources[assignment_to_parts[node]].insert(pred);
+            }
+        }
     }
 
     std::vector<Graph_t> subDags;
-    for(unsigned part = 0; part < nr_parts; ++part)
-    {
+    for (unsigned part = 0; part < nr_parts; ++part) {
         Graph_t dag;
         create_induced_subgraph(instance.getComputationalDag(), dag, nodes_in_part[part], extra_sources[part]);
         subDags.push_back(dag);
-        
+
         // set source nodes to a new type, so that they are compatible with any processor
         unsigned artificial_type_for_sources = subDags.back().num_vertex_types();
-        for(vertex_idx node_idx = 0; node_idx < extra_sources[part].size(); ++node_idx)
+        for (vertex_idx node_idx = 0; node_idx < extra_sources[part].size(); ++node_idx) {
             subDags.back().set_vertex_type(node_idx, artificial_type_for_sources);
+        }
     }
 
     std::vector<unsigned> isomorphicTo(nr_parts, UINT_MAX);
 
-    std::cout<<"Number of parts: "<<nr_parts<<std::endl;
+    std::cout << "Number of parts: " << nr_parts << std::endl;
 
-    for(unsigned part = 0; part < nr_parts; ++part)
-        for(unsigned other_part = part + 1; other_part < nr_parts; ++other_part)
-        {
-            if(isomorphicTo[other_part] < UINT_MAX)
+    for (unsigned part = 0; part < nr_parts; ++part) {
+        for (unsigned other_part = part + 1; other_part < nr_parts; ++other_part) {
+            if (isomorphicTo[other_part] < UINT_MAX) {
                 continue;
+            }
 
             bool isomorphic = true;
-            if(!checkOrderedIsomorphism(subDags[part], subDags[other_part]))
+            if (!checkOrderedIsomorphism(subDags[part], subDags[other_part])) {
                 continue;
-            
+            }
+
             std::vector<unsigned> proc_assigned_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
             std::vector<unsigned> other_proc_assigned_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0);
-            for(unsigned proc : processors_to_parts[part])
+            for (unsigned proc : processors_to_parts[part]) {
                 ++proc_assigned_per_type[instance.getArchitecture().processorType(proc)];
-            for(unsigned proc : processors_to_parts[other_part])
+            }
+            for (unsigned proc : processors_to_parts[other_part]) {
                 ++other_proc_assigned_per_type[instance.getArchitecture().processorType(proc)];
+            }
 
-            for(unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type)
-                if(proc_assigned_per_type[proc_type] != other_proc_assigned_per_type[proc_type])
+            for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) {
+                if (proc_assigned_per_type[proc_type] != other_proc_assigned_per_type[proc_type]) {
                     isomorphic = false;
-            
-            if(!isomorphic)
+                }
+            }
+
+            if (!isomorphic) {
                 continue;
+            }
 
             isomorphicTo[other_part] = part;
-            std::cout<<"Part "<<other_part<<" is isomorphic to "<<part<<std::endl;
+            std::cout << "Part " << other_part << " is isomorphic to " << part << std::endl;
         }
+    }
 
     // PART 3: solve a small ILP for each part
-    std::vector<std::set<vertex_idx> > in_fast_mem(instance.numberOfProcessors());
-    std::vector<PebblingSchedule<Graph_t> > pebbling(nr_parts);
-    std::vector<BspArchitecture<Graph_t> > subArch(nr_parts);
-    std::vector<BspInstance<Graph_t> > subInstance(nr_parts);
+    std::vector<std::set<vertex_idx>> in_fast_mem(instance.numberOfProcessors());
+    std::vector<PebblingSchedule<Graph_t>> pebbling(nr_parts);
+    std::vector<BspArchitecture<Graph_t>> subArch(nr_parts);
+    std::vector<BspInstance<Graph_t>> subInstance(nr_parts);
 
     // to handle the initial memory content for isomorphic parts
-    std::vector<std::vector<std::set<vertex_idx> > > has_reds_in_beginning(nr_parts, std::vector<std::set<vertex_idx> >(instance.numberOfProcessors()));
+    std::vector<std::vector<std::set<vertex_idx>>> has_reds_in_beginning(
+        nr_parts, std::vector<std::set<vertex_idx>>(instance.numberOfProcessors()));
 
-    for(unsigned part = 0; part < nr_parts; ++part)
-    {
-        std::cout<<"part "<<part<<std::endl;
+    for (unsigned part = 0; part < nr_parts; ++part) {
+        std::cout << "part " << part << std::endl;
 
         // set up sub-DAG
-        Graph_t& subDag = subDags[part];
+        Graph_t &subDag = subDags[part];
         std::map<vertex_idx, vertex_idx> local_id;
         vertex_idx node_idx = 0;
-        for(vertex_idx node : extra_sources[part])
-        {
+        for (vertex_idx node : extra_sources[part]) {
             local_id[node] = node_idx;
             original_node_id[part][node_idx] = node;
             ++node_idx;
         }
-        for(vertex_idx node : nodes_in_part[part])
-        {
+        for (vertex_idx node : nodes_in_part[part]) {
             local_id[node] = node_idx;
             original_node_id[part][node_idx] = node;
             ++node_idx;
         }
-        
+
         std::set<vertex_idx> needs_blue_at_end;
-        for(vertex_idx node : nodes_in_part[part])
-        {
-            for (const vertex_idx &succ : instance.getComputationalDag().children(node))
-                if(assignment_to_parts[node] != assignment_to_parts[succ])
+        for (vertex_idx node : nodes_in_part[part]) {
+            for (const vertex_idx &succ : instance.getComputationalDag().children(node)) {
+                if (assignment_to_parts[node] != assignment_to_parts[succ]) {
                     needs_blue_at_end.insert(local_id[node]);
-            
-            if(instance.getComputationalDag().out_degree(node) == 0)
+                }
+            }
+
+            if (instance.getComputationalDag().out_degree(node) == 0) {
                 needs_blue_at_end.insert(local_id[node]);
+            }
         }
 
         // set up sub-architecture
         subArch[part].setNumberOfProcessors(static_cast<unsigned>(processors_to_parts[part].size()));
         unsigned proc_index = 0;
-        for(unsigned proc : processors_to_parts[part])
-        {
+        for (unsigned proc : processors_to_parts[part]) {
             subArch[part].setProcessorType(proc_index, instance.getArchitecture().processorType(proc));
             subArch[part].setMemoryBound(instance.getArchitecture().memoryBound(proc), proc_index);
             original_proc_id[part][proc_index] = proc;
@@ -228,32 +250,30 @@ RETURN_STATUS PebblingPartialILP<Graph_t>::computePebbling(PebblingSchedule<Grap
         // no NUMA parameters for now
 
         // skip if isomorphic to previous part
-        if(isomorphicTo[part] < UINT_MAX)
-        {
+        if (isomorphicTo[part] < UINT_MAX) {
             pebbling[part] = pebbling[isomorphicTo[part]];
             has_reds_in_beginning[part] = has_reds_in_beginning[isomorphicTo[part]];
             continue;
         }
 
         // set node-processor compatibility matrix
-        std::vector<std::vector<bool> > comp_matrix = instance.getNodeProcessorCompatibilityMatrix();
+        std::vector<std::vector<bool>> comp_matrix = instance.getNodeProcessorCompatibilityMatrix();
         comp_matrix.emplace_back(instance.getArchitecture().getNumberOfProcessorTypes(), true);
         subInstance[part] = BspInstance(subDag, subArch[part], comp_matrix);
-        
+
         // currently we only allow the input laoding scenario - the case where this is false is unmaintained/untested
         bool need_to_load_inputs = true;
 
         // keep in fast memory what's relevant, remove the rest
-        for(unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc)
-        {
+        for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) {
             has_reds_in_beginning[part][proc].clear();
             std::set<vertex_idx> new_content_fast_mem;
-            for(vertex_idx node : in_fast_mem[original_proc_id[part][proc]])
-                if(local_id.find(node) != local_id.end())
-                {
+            for (vertex_idx node : in_fast_mem[original_proc_id[part][proc]]) {
+                if (local_id.find(node) != local_id.end()) {
                     has_reds_in_beginning[part][proc].insert(local_id[node]);
                     new_content_fast_mem.insert(node);
                 }
+            }
 
             in_fast_mem[original_proc_id[part][proc]] = new_content_fast_mem;
         }
@@ -263,122 +283,124 @@ RETURN_STATUS PebblingPartialILP<Graph_t>::computePebbling(PebblingSchedule<Grap
         GreedyBspScheduler<Graph_t> greedy_scheduler;
         BspSchedule<Graph_t> bsp_heuristic(subInstance[part]);
         greedy_scheduler.computeSchedule(bsp_heuristic);
-        
+
         std::set<vertex_idx> extra_source_ids;
-        for(vertex_idx idx = 0; idx < extra_sources[part].size(); ++idx)
+        for (vertex_idx idx = 0; idx < extra_sources[part].size(); ++idx) {
             extra_source_ids.insert(idx);
+        }
 
         heuristic_pebbling.setNeedToLoadInputs(true);
         heuristic_pebbling.SetExternalSources(extra_source_ids);
         heuristic_pebbling.SetNeedsBlueAtEnd(needs_blue_at_end);
         heuristic_pebbling.SetHasRedInBeginning(has_reds_in_beginning[part]);
-        heuristic_pebbling.ConvertFromBsp(bsp_heuristic, PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::FORESIGHT);      
+        heuristic_pebbling.ConvertFromBsp(bsp_heuristic, PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::FORESIGHT);
 
         heuristic_pebbling.removeEvictStepsFromEnd();
         pebbling[part] = heuristic_pebbling;
         cost_type heuristicCost = asynchronous ? heuristic_pebbling.computeAsynchronousCost() : heuristic_pebbling.computeCost();
 
-        if(!heuristic_pebbling.isValid())
-            std::cout<<"ERROR: Pebbling heuristic INVALID!"<<std::endl;
+        if (!heuristic_pebbling.isValid()) {
+            std::cout << "ERROR: Pebbling heuristic INVALID!" << std::endl;
+        }
 
         // solution with subILP
         MultiProcessorPebbling<Graph_t> mpp;
         mpp.setVerbose(verbose);
         mpp.setTimeLimitSeconds(time_seconds_for_subILPs);
-        mpp.setMaxTime(2*maxPartitionSize); // just a heuristic choice, does not guarantee feasibility!
+        mpp.setMaxTime(2 * maxPartitionSize);    // just a heuristic choice, does not guarantee feasibility!
         mpp.setNeedsBlueAtEnd(needs_blue_at_end);
         mpp.setNeedToLoadInputs(need_to_load_inputs);
         mpp.setHasRedInBeginning(has_reds_in_beginning[part]);
 
         PebblingSchedule<Graph_t> pebblingILP(subInstance[part]);
         RETURN_STATUS status = mpp.computePebblingWithInitialSolution(heuristic_pebbling, pebblingILP, asynchronous);
-        if(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND)
-        {
-            if(!pebblingILP.isValid())
-                std::cout<<"ERROR: Pebbling ILP INVALID!"<<std::endl;
+        if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) {
+            if (!pebblingILP.isValid()) {
+                std::cout << "ERROR: Pebbling ILP INVALID!" << std::endl;
+            }
 
             pebblingILP.removeEvictStepsFromEnd();
             cost_type ILP_cost = asynchronous ? pebblingILP.computeAsynchronousCost() : pebblingILP.computeCost();
-            if(ILP_cost < heuristicCost)
-            {
+            if (ILP_cost < heuristicCost) {
                 pebbling[part] = pebblingILP;
-                std::cout<<"ILP chosen instead of greedy. ("<<ILP_cost<<" < "<<heuristicCost<<")"<<std::endl;
+                std::cout << "ILP chosen instead of greedy. (" << ILP_cost << " < " << heuristicCost << ")" << std::endl;
+            } else {
+                std::cout << "Greedy chosen instead of ILP. (" << heuristicCost << " < " << ILP_cost << ")" << std::endl;
             }
-            else
-                std::cout<<"Greedy chosen instead of ILP. ("<<heuristicCost<<" < "<<ILP_cost<<")"<<std::endl;
-            
+
             // save fast memory content for next subproblem
-            std::vector<std::set<vertex_idx> > fast_mem_content_at_end = pebbling[part].getMemContentAtEnd();
-            for(unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc)
-            {
+            std::vector<std::set<vertex_idx>> fast_mem_content_at_end = pebbling[part].getMemContentAtEnd();
+            for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) {
                 in_fast_mem[original_proc_id[part][proc]].clear();
-                for(vertex_idx node : fast_mem_content_at_end[proc])
+                for (vertex_idx node : fast_mem_content_at_end[proc]) {
                     in_fast_mem[original_proc_id[part][proc]].insert(original_node_id[part][node]);
+                }
             }
+        } else {
+            std::cout << "ILP found no solution; using greedy instead (cost = " << heuristicCost << ")." << std::endl;
         }
-        else
-            std::cout<<"ILP found no solution; using greedy instead (cost = "<<heuristicCost<<")."<<std::endl;
     }
 
     // AUX: assemble final schedule from subschedules
-    schedule.CreateFromPartialPebblings(instance, pebbling, processors_to_parts, original_node_id, original_proc_id, has_reds_in_beginning);
+    schedule.CreateFromPartialPebblings(
+        instance, pebbling, processors_to_parts, original_node_id, original_proc_id, has_reds_in_beginning);
     schedule.cleanSchedule();
     return schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR;
-
 }
 
-template<typename Graph_t>
-Graph_t PebblingPartialILP<Graph_t>::contractByPartition(const BspInstance<Graph_t> &instance, const std::vector<unsigned> &node_to_part_assignment)
-{
+template <typename Graph_t>
+Graph_t PebblingPartialILP<Graph_t>::contractByPartition(const BspInstance<Graph_t> &instance,
+                                                         const std::vector<unsigned> &node_to_part_assignment) {
     const auto &G = instance.getComputationalDag();
 
     part_and_nodetype_to_new_index.clear();
 
     unsigned nr_new_nodes = 0;
-    for(vertex_idx node = 0; node < instance.numberOfVertices(); ++node)
-    {
-        if(part_and_nodetype_to_new_index.find({node_to_part_assignment[node], G.vertex_type(node)}) == part_and_nodetype_to_new_index.end())
-        {
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) {
+        if (part_and_nodetype_to_new_index.find({node_to_part_assignment[node], G.vertex_type(node)})
+            == part_and_nodetype_to_new_index.end()) {
             part_and_nodetype_to_new_index[{node_to_part_assignment[node], G.vertex_type(node)}] = nr_new_nodes;
             ++nr_new_nodes;
         }
     }
 
     Graph_t contracted;
-    for(vertex_idx node = 0; node < nr_new_nodes; ++node)
-        contracted.add_vertex(0,0,0);
+    for (vertex_idx node = 0; node < nr_new_nodes; ++node) {
+        contracted.add_vertex(0, 0, 0);
+    }
 
-    std::set<std::pair<vertex_idx, vertex_idx> > edges;
+    std::set<std::pair<vertex_idx, vertex_idx>> edges;
 
-    for(vertex_idx node = 0; node < instance.numberOfVertices(); ++node)
-    {
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) {
         vertex_idx node_new_index = part_and_nodetype_to_new_index[{node_to_part_assignment[node], G.vertex_type(node)}];
-        for (const vertex_idx &succ : instance.getComputationalDag().children(node))
-            if(node_to_part_assignment[node] != node_to_part_assignment[succ])
+        for (const vertex_idx &succ : instance.getComputationalDag().children(node)) {
+            if (node_to_part_assignment[node] != node_to_part_assignment[succ]) {
                 edges.emplace(node_new_index, part_and_nodetype_to_new_index[{node_to_part_assignment[succ], G.vertex_type(succ)}]);
+            }
+        }
 
-        contracted.set_vertex_work_weight(node_new_index, contracted.vertex_work_weight(node_new_index) + G.vertex_work_weight(node));
-        contracted.set_vertex_comm_weight(node_new_index, contracted.vertex_comm_weight(node_new_index) + G.vertex_comm_weight(node));
+        contracted.set_vertex_work_weight(node_new_index,
+                                          contracted.vertex_work_weight(node_new_index) + G.vertex_work_weight(node));
+        contracted.set_vertex_comm_weight(node_new_index,
+                                          contracted.vertex_comm_weight(node_new_index) + G.vertex_comm_weight(node));
         contracted.set_vertex_mem_weight(node_new_index, contracted.vertex_mem_weight(node_new_index) + G.vertex_mem_weight(node));
         contracted.set_vertex_type(node_new_index, G.vertex_type(node));
     }
 
-    for(auto edge : edges) {
-     
+    for (auto edge : edges) {
         if constexpr (has_edge_weights_v<Graph_t>) {
             contracted.add_edge(edge.first, edge.second, 1);
         } else {
             contracted.add_edge(edge.first, edge.second);
         }
-    
     }
 
     return contracted;
 }
 
-template<typename Graph_t>
-RETURN_STATUS PebblingPartialILP<Graph_t>::computeSchedule(BspSchedule<Graph_t>&) {
+template <typename Graph_t>
+RETURN_STATUS PebblingPartialILP<Graph_t>::computeSchedule(BspSchedule<Graph_t> &) {
     return RETURN_STATUS::ERROR;
 }
 
-}
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp
index 0fb97201..f969be72 100644
--- a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp
+++ b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
@@ -21,15 +21,14 @@ limitations under the License.
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 #include "osp/dag_divider/ConnectedComponentDivider.hpp"
-#include "osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp"
 #include "osp/graph_algorithms/subgraph_algorithms.hpp"
+#include "osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp"
 
-namespace osp{
+namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class AcyclicDagDivider {
-
-    static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs."); 
+    static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs.");
 
   protected:
     using vertex_idx = vertex_idx_t<Graph_t>;
@@ -37,9 +36,11 @@ class AcyclicDagDivider {
     unsigned minPartitionSize = 40, maxPartitionSize = 80;
     bool ignore_sources_in_size = true;
 
-    std::vector<unsigned> getTopologicalSplit(const Graph_t &G, std::pair<unsigned, unsigned> min_and_max, const std::vector<bool>& is_original_source) const;
+    std::vector<unsigned> getTopologicalSplit(const Graph_t &G,
+                                              std::pair<unsigned, unsigned> min_and_max,
+                                              const std::vector<bool> &is_original_source) const;
 
-    v_commw_t<Graph_t> static getSplitCost(const Graph_t &G, const std::vector<unsigned>& node_to_part);
+    v_commw_t<Graph_t> static getSplitCost(const Graph_t &G, const std::vector<unsigned> &node_to_part);
 
   public:
     AcyclicDagDivider() {}
@@ -50,13 +51,17 @@ class AcyclicDagDivider {
 
     // getters and setters for problem parameters
     inline std::pair<unsigned, unsigned> getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); }
-    inline void setMinAndMaxSize(const std::pair<unsigned, unsigned> min_and_max) {minPartitionSize = min_and_max.first; maxPartitionSize = min_and_max.second; }
-    inline void setIgnoreSources(const bool ignore_) {ignore_sources_in_size = ignore_; }
+
+    inline void setMinAndMaxSize(const std::pair<unsigned, unsigned> min_and_max) {
+        minPartitionSize = min_and_max.first;
+        maxPartitionSize = min_and_max.second;
+    }
+
+    inline void setIgnoreSources(const bool ignore_) { ignore_sources_in_size = ignore_; }
 };
 
-template<typename Graph_t>
-std::vector<unsigned> AcyclicDagDivider<Graph_t>::computePartitioning(const BspInstance<Graph_t> &instance)
-{
+template <typename Graph_t>
+std::vector<unsigned> AcyclicDagDivider<Graph_t>::computePartitioning(const BspInstance<Graph_t> &instance) {
     const unsigned N = static_cast<unsigned>(instance.numberOfVertices());
 
     // split to connected components first
@@ -64,77 +69,69 @@ std::vector<unsigned> AcyclicDagDivider<Graph_t>::computePartitioning(const BspI
     connected_comp.divide(instance.getComputationalDag());
 
     std::vector<Graph_t> subDags = connected_comp.get_sub_dags();
-    std::vector<std::pair<unsigned, vertex_idx> > node_to_subdag_and_index(N);
-    std::vector<std::vector<vertex_idx> > original_id(subDags.size());
-    for(vertex_idx node = 0; node < N; ++node)
-    {
+    std::vector<std::pair<unsigned, vertex_idx>> node_to_subdag_and_index(N);
+    std::vector<std::vector<vertex_idx>> original_id(subDags.size());
+    for (vertex_idx node = 0; node < N; ++node) {
         node_to_subdag_and_index[node] = {connected_comp.get_component()[node], connected_comp.get_vertex_map()[node]};
         original_id[connected_comp.get_component()[node]].push_back(node);
     }
-    
+
     // TODO extend with splits at directed articulation points in future?
 
     // split components further with ILPs or heuristics
-    while(true)
-    {
+    while (true) {
         bool exists_too_large = false;
         std::vector<bool> dag_is_too_large(subDags.size(), false);
         std::vector<unsigned> dag_real_size(subDags.size(), 0);
 
-        for(unsigned idx = 0; idx < subDags.size(); ++idx)
-        {
-            const Graph_t& dag = subDags[idx];
-            if(!ignore_sources_in_size)
-            {
+        for (unsigned idx = 0; idx < subDags.size(); ++idx) {
+            const Graph_t &dag = subDags[idx];
+            if (!ignore_sources_in_size) {
                 dag_real_size[idx] = static_cast<unsigned>(dag.num_vertices());
-                if(dag.num_vertices() > maxPartitionSize)
-                {
+                if (dag.num_vertices() > maxPartitionSize) {
                     dag_is_too_large[idx] = true;
                     exists_too_large = true;
                 }
+            } else {
+                for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) {
+                    if (instance.getComputationalDag().in_degree(original_id[idx][local_ID]) > 0) {
+                        ++dag_real_size[idx];
+                    }
+                }
             }
-            else
-            {
-                for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID)
-                    if(instance.getComputationalDag().in_degree(original_id[idx][local_ID]) > 0)
-                        ++dag_real_size[idx];        
-            }
-            if(dag_real_size[idx] > maxPartitionSize)
-            {
+            if (dag_real_size[idx] > maxPartitionSize) {
                 dag_is_too_large[idx] = true;
                 exists_too_large = true;
             }
-
         }
-        
-        if(!exists_too_large)
+
+        if (!exists_too_large) {
             break;
-        
-        std::vector<Graph_t > newDagList;
-        std::vector<std::vector<vertex_idx> > original_id_updated;
-
-        for(unsigned idx = 0; idx < subDags.size(); ++idx)
-        {
-            const Graph_t& dag = subDags[idx];
-            if(!dag_is_too_large[idx])
-            {
-                for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID)
+        }
+
+        std::vector<Graph_t> newDagList;
+        std::vector<std::vector<vertex_idx>> original_id_updated;
+
+        for (unsigned idx = 0; idx < subDags.size(); ++idx) {
+            const Graph_t &dag = subDags[idx];
+            if (!dag_is_too_large[idx]) {
+                for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) {
                     node_to_subdag_and_index[original_id[idx][local_ID]].first = static_cast<unsigned>(newDagList.size());
+                }
 
                 original_id_updated.push_back(original_id[idx]);
                 newDagList.push_back(dag);
-            }
-            else
-            {
+            } else {
                 std::vector<unsigned> ILP_assignment;
-                //unsigned newMin = dag_real_size[idx]/3, minPartitionSize); minimum condition removed - it can cause very strict bisections
-                unsigned newMin = dag_real_size[idx]/3;
-                unsigned newMax =  dag_real_size[idx] - newMin;
+                // unsigned newMin = dag_real_size[idx]/3, minPartitionSize); minimum condition removed - it can cause very strict bisections
+                unsigned newMin = dag_real_size[idx] / 3;
+                unsigned newMax = dag_real_size[idx] - newMin;
 
                 // mark the source nodes of the original DAG
                 std::vector<bool> is_original_source(dag.num_vertices());
-                for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID)
+                for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) {
                     is_original_source[local_ID] = (instance.getComputationalDag().in_degree(original_id[idx][local_ID]) == 0);
+                }
 
                 // heuristic splitting
                 std::vector<unsigned> heuristic_assignment = getTopologicalSplit(dag, {newMin, newMax}, is_original_source);
@@ -146,11 +143,12 @@ std::vector<unsigned> AcyclicDagDivider<Graph_t>::computePartitioning(const BspI
                 partitioner.setTimeLimitSeconds(120);
                 partitioner.setMinAndMaxSize({newMin, newMax});
                 partitioner.setIsOriginalSource(is_original_source);
-                partitioner.setNumberOfParts(2); // note - if set to more than 2, ILP is MUCH more inefficient
+                partitioner.setNumberOfParts(2);    // note - if set to more than 2, ILP is MUCH more inefficient
                 BspInstance partial_instance(dag, instance.getArchitecture(), instance.getNodeProcessorCompatibilityMatrix());
                 RETURN_STATUS status = partitioner.computePartitioning(partial_instance, ILP_assignment);
-                if(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND)
+                if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) {
                     ILPCost = getSplitCost(dag, ILP_assignment);
+                }
 
                 std::vector<unsigned> assignment = ILPCost < heuristicCost ? ILP_assignment : heuristic_assignment;
 
@@ -161,26 +159,27 @@ std::vector<unsigned> AcyclicDagDivider<Graph_t>::computePartitioning(const BspI
                     std::cout<<sdag.numberOfVertices()<<" + ";
                 std::cout<<std::endl;*/
 
-
                 // update labels
                 std::vector<vertex_idx> node_idx_in_new_subDag(dag.num_vertices());
                 std::vector<unsigned> nr_nodes_in_new_subDag(splitDags.size(), 0);
-                for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID)
-                {
+                for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) {
                     node_idx_in_new_subDag[local_ID] = nr_nodes_in_new_subDag[assignment[local_ID]];
                     ++nr_nodes_in_new_subDag[assignment[local_ID]];
                 }
-                
-                for(auto next_dag : splitDags)
+
+                for (auto next_dag : splitDags) {
                     original_id_updated.emplace_back(next_dag.num_vertices());
+                }
 
-                for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID)
-                {
-                    node_to_subdag_and_index[original_id[idx][local_ID]] = {newDagList.size() + assignment[local_ID], node_idx_in_new_subDag[local_ID]};
-                    original_id_updated[newDagList.size() + assignment[local_ID]][node_idx_in_new_subDag[local_ID]] = original_id[idx][local_ID];
+                for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) {
+                    node_to_subdag_and_index[original_id[idx][local_ID]]
+                        = {newDagList.size() + assignment[local_ID], node_idx_in_new_subDag[local_ID]};
+                    original_id_updated[newDagList.size() + assignment[local_ID]][node_idx_in_new_subDag[local_ID]]
+                        = original_id[idx][local_ID];
                 }
-                for(auto next_dag : splitDags)
+                for (auto next_dag : splitDags) {
                     newDagList.push_back(next_dag);
+                }
             }
         }
 
@@ -190,114 +189,121 @@ std::vector<unsigned> AcyclicDagDivider<Graph_t>::computePartitioning(const BspI
 
     // output final cost
     std::vector<unsigned> final_assignment(N);
-    for(vertex_idx node = 0; node < N; ++node)
+    for (vertex_idx node = 0; node < N; ++node) {
         final_assignment[node] = node_to_subdag_and_index[node].first;
-    std::cout<<"Final cut cost of acyclic DAG divider is "<<getSplitCost(instance.getComputationalDag(), final_assignment)<<std::endl;
+    }
+    std::cout << "Final cut cost of acyclic DAG divider is " << getSplitCost(instance.getComputationalDag(), final_assignment)
+              << std::endl;
 
     return final_assignment;
 }
 
-template<typename Graph_t>
-std::vector<unsigned> AcyclicDagDivider<Graph_t>::getTopologicalSplit(const Graph_t &G, std::pair<unsigned, unsigned> min_and_max, const std::vector<bool>& is_original_source) const
-{
+template <typename Graph_t>
+std::vector<unsigned> AcyclicDagDivider<Graph_t>::getTopologicalSplit(const Graph_t &G,
+                                                                      std::pair<unsigned, unsigned> min_and_max,
+                                                                      const std::vector<bool> &is_original_source) const {
     std::vector<unsigned> node_to_part(G.num_vertices());
 
     std::vector<vertex_idx> top_order = GetTopOrder(G);
     std::vector<unsigned> top_order_idx(G.num_vertices());
-    for(unsigned idx = 0; idx < G.num_vertices(); ++idx)
+    for (unsigned idx = 0; idx < G.num_vertices(); ++idx) {
         top_order_idx[top_order[idx]] = idx;
+    }
 
     std::vector<unsigned> last_node_idx_in_hyperedge(G.num_vertices());
-    for(unsigned node = 0; node < G.num_vertices(); ++node)
-    {
+    for (unsigned node = 0; node < G.num_vertices(); ++node) {
         last_node_idx_in_hyperedge[node] = top_order_idx[node];
-        for (const auto &succ : G.children(node))
+        for (const auto &succ : G.children(node)) {
             last_node_idx_in_hyperedge[node] = std::max(last_node_idx_in_hyperedge[node], top_order_idx[succ]);
+        }
     }
 
     unsigned index = 0;
     unsigned current_part_id = 0;
 
     unsigned nodes_remaining = static_cast<unsigned>(G.num_vertices());
-    if(ignore_sources_in_size)
-    {
+    if (ignore_sources_in_size) {
         nodes_remaining = 0;
-        for(unsigned node = 0; node < G.num_vertices(); ++node)
-            if(!is_original_source[node])
+        for (unsigned node = 0; node < G.num_vertices(); ++node) {
+            if (!is_original_source[node]) {
                 ++nodes_remaining;
+            }
+        }
     }
 
-    while(nodes_remaining > min_and_max.second)
-    {
+    while (nodes_remaining > min_and_max.second) {
         unsigned best_cost = UINT_MAX;
         unsigned best_end = index;
 
         unsigned end;
         unsigned newly_added_nodes = 0;
-        for(end = index + 1; index < G.num_vertices() && newly_added_nodes < min_and_max.first; ++end)
-            if(!ignore_sources_in_size || !is_original_source[end])
+        for (end = index + 1; index < G.num_vertices() && newly_added_nodes < min_and_max.first; ++end) {
+            if (!ignore_sources_in_size || !is_original_source[end]) {
                 ++newly_added_nodes;
+            }
+        }
 
-        while(end < G.num_vertices() && newly_added_nodes < min_and_max.second)
-        {
+        while (end < G.num_vertices() && newly_added_nodes < min_and_max.second) {
             unsigned extra_cost = 0;
 
             // check the extra cut cost of the potential endpoint
-            for(unsigned top_order_pos = index; top_order_pos <= end; ++top_order_pos)
-            {
+            for (unsigned top_order_pos = index; top_order_pos <= end; ++top_order_pos) {
                 vertex_idx node = top_order[top_order_pos];
-                if(last_node_idx_in_hyperedge[node] > end)
+                if (last_node_idx_in_hyperedge[node] > end) {
                     extra_cost += G.vertex_comm_weight(node);
-                
-                for (const auto &pred : G.parents(node))
-                    if(last_node_idx_in_hyperedge[pred] > end)
-                        extra_cost += G.vertex_comm_weight(pred); 
+                }
+
+                for (const auto &pred : G.parents(node)) {
+                    if (last_node_idx_in_hyperedge[pred] > end) {
+                        extra_cost += G.vertex_comm_weight(pred);
+                    }
+                }
             }
 
-            if(extra_cost < best_cost)
-            {
+            if (extra_cost < best_cost) {
                 best_cost = extra_cost;
                 best_end = end;
             }
 
             ++end;
-            if(!ignore_sources_in_size || !is_original_source[end])
+            if (!ignore_sources_in_size || !is_original_source[end]) {
                 ++newly_added_nodes;
+            }
         }
 
-        for(vertex_idx idx = index; idx <= best_end; ++idx)
-        {
+        for (vertex_idx idx = index; idx <= best_end; ++idx) {
             node_to_part[top_order[idx]] = current_part_id;
-            if(!ignore_sources_in_size || !is_original_source[idx])
+            if (!ignore_sources_in_size || !is_original_source[idx]) {
                 --nodes_remaining;
+            }
         }
         index = best_end + 1;
         ++current_part_id;
     }
 
     // remaining nodes go into last part
-    for(vertex_idx idx = index; idx < G.num_vertices(); ++idx)
+    for (vertex_idx idx = index; idx < G.num_vertices(); ++idx) {
         node_to_part[top_order[idx]] = current_part_id;
+    }
 
     return node_to_part;
 }
 
-template<typename Graph_t>
-v_commw_t<Graph_t> AcyclicDagDivider<Graph_t>::getSplitCost(const Graph_t &G, const std::vector<unsigned>& node_to_part)
-{
+template <typename Graph_t>
+v_commw_t<Graph_t> AcyclicDagDivider<Graph_t>::getSplitCost(const Graph_t &G, const std::vector<unsigned> &node_to_part) {
     v_commw_t<Graph_t> cost = 0;
 
-    for(vertex_idx node = 0; node < G.num_vertices(); ++node)
-    {
+    for (vertex_idx node = 0; node < G.num_vertices(); ++node) {
         std::set<unsigned> parts_included;
         parts_included.insert(node_to_part[node]);
-        for (const auto &succ : G.children(node))
+        for (const auto &succ : G.children(node)) {
             parts_included.insert(node_to_part[succ]);
-        
-        cost += static_cast<v_commw_t<Graph_t>>(parts_included.size() -1) * G.vertex_comm_weight(node);
+        }
+
+        cost += static_cast<v_commw_t<Graph_t>>(parts_included.size() - 1) * G.vertex_comm_weight(node);
     }
 
     return cost;
 }
 
-}
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp
index de849a0b..2faaeb81 100644
--- a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp
+++ b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp
@@ -13,21 +13,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
 
-#include "osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp"
-#include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
+#include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp"
 
-namespace osp{
+namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class AcyclicPartitioningILP {
-
-    static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs."); 
+    static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs.");
 
   private:
     using vertex_idx = vertex_idx_t<Graph_t>;
@@ -39,7 +38,6 @@ class AcyclicPartitioningILP {
     bool ignore_sources_for_constraint = true;
 
     class WriteSolutionCallback : public CallbackBase {
-
       private:
         unsigned counter;
         unsigned max_number_solution;
@@ -48,14 +46,16 @@ class AcyclicPartitioningILP {
 
       public:
         WriteSolutionCallback()
-            : counter(0), max_number_solution(500), best_obj(COPT_INFINITY), write_solutions_path_cb(""),
+            : counter(0),
+              max_number_solution(500),
+              best_obj(COPT_INFINITY),
+              write_solutions_path_cb(""),
               solution_file_prefix_cb("") {}
 
         std::string write_solutions_path_cb;
         std::string solution_file_prefix_cb;
 
         void callback() override;
-
     };
 
     WriteSolutionCallback solution_callback;
@@ -79,12 +79,11 @@ class AcyclicPartitioningILP {
     void solveILP();
 
   public:
-    AcyclicPartitioningILP()
-        : model(COPTEnv::getInstance().CreateModel("AsyncPart")), write_solutions_found(false) {}
+    AcyclicPartitioningILP() : model(COPTEnv::getInstance().CreateModel("AsyncPart")), write_solutions_found(false) {}
 
     virtual ~AcyclicPartitioningILP() = default;
 
-    RETURN_STATUS computePartitioning(const BspInstance<Graph_t> &instance, std::vector<unsigned>& partitioning);
+    RETURN_STATUS computePartitioning(const BspInstance<Graph_t> &instance, std::vector<unsigned> &partitioning);
 
     /**
      * @brief Enables writing intermediate solutions.
@@ -132,7 +131,7 @@ class AcyclicPartitioningILP {
      * @return The best bound found by the solver.
      */
     inline double bestBound() { return model.GetDblAttr(COPT_DBLATTR_BESTBND); }
-  
+
     /**
      * @brief Get the name of the schedule.
      *
@@ -142,18 +141,25 @@ class AcyclicPartitioningILP {
 
     // getters and setters for problem parameters
     inline std::pair<unsigned, unsigned> getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); }
-    inline void setMinAndMaxSize(const std::pair<unsigned, unsigned> min_and_max) {minPartitionSize = min_and_max.first; maxPartitionSize = min_and_max.second; }
+
+    inline void setMinAndMaxSize(const std::pair<unsigned, unsigned> min_and_max) {
+        minPartitionSize = min_and_max.first;
+        maxPartitionSize = min_and_max.second;
+    }
 
     inline unsigned getNumberOfParts() const { return numberOfParts; }
-    inline void setNumberOfParts(const unsigned number_of_parts) {numberOfParts = number_of_parts; }
-    inline void setIgnoreSourceForConstraint(const bool ignore_) {ignore_sources_for_constraint = ignore_; }
-    inline void setIsOriginalSource(const std::vector<bool>& is_original_source_) {is_original_source = is_original_source_; }
+
+    inline void setNumberOfParts(const unsigned number_of_parts) { numberOfParts = number_of_parts; }
+
+    inline void setIgnoreSourceForConstraint(const bool ignore_) { ignore_sources_for_constraint = ignore_; }
+
+    inline void setIsOriginalSource(const std::vector<bool> &is_original_source_) { is_original_source = is_original_source_; }
+
     void setTimeLimitSeconds(unsigned time_limit_seconds_) { time_limit_seconds = time_limit_seconds_; }
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 void AcyclicPartitioningILP<Graph_t>::solveILP() {
-
     model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0);
 
     model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds);
@@ -172,15 +178,15 @@ void AcyclicPartitioningILP<Graph_t>::solveILP() {
     model.Solve();
 }
 
-template<typename Graph_t>
-RETURN_STATUS AcyclicPartitioningILP<Graph_t>::computePartitioning(const BspInstance<Graph_t> &instance, std::vector<unsigned>& partitioning)
-{
+template <typename Graph_t>
+RETURN_STATUS AcyclicPartitioningILP<Graph_t>::computePartitioning(const BspInstance<Graph_t> &instance,
+                                                                   std::vector<unsigned> &partitioning) {
     partitioning.clear();
 
-    if(numberOfParts == 0)
-    {
-        numberOfParts = static_cast<unsigned>(std::floor(static_cast<double>(instance.numberOfVertices())  / static_cast<double>(minPartitionSize)));
-        std::cout<<"ILP nr parts: "<<numberOfParts<<std::endl;
+    if (numberOfParts == 0) {
+        numberOfParts = static_cast<unsigned>(
+            std::floor(static_cast<double>(instance.numberOfVertices()) / static_cast<double>(minPartitionSize)));
+        std::cout << "ILP nr parts: " << numberOfParts << std::endl;
     }
 
     setupVariablesConstraintsObjective(instance);
@@ -188,19 +194,15 @@ RETURN_STATUS AcyclicPartitioningILP<Graph_t>::computePartitioning(const BspInst
     solveILP();
 
     if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
-
         partitioning = returnAssignment(instance);
         return RETURN_STATUS::OSP_SUCCESS;
 
     } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
-
         partitioning.resize(instance.numberOfVertices(), UINT_MAX);
         return RETURN_STATUS::ERROR;
 
     } else {
-
         if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
-
             partitioning = returnAssignment(instance);
             return RETURN_STATUS::OSP_SUCCESS;
 
@@ -211,140 +213,148 @@ RETURN_STATUS AcyclicPartitioningILP<Graph_t>::computePartitioning(const BspInst
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void AcyclicPartitioningILP<Graph_t>::setupVariablesConstraintsObjective(const BspInstance<Graph_t> &instance) {
-
     // Variables
 
     node_in_partition = std::vector<VarArray>(instance.numberOfVertices());
 
-    for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
         node_in_partition[node] = model.AddVars(static_cast<int>(numberOfParts), COPT_BINARY, "node_in_partition");
+    }
 
-    
     std::map<vertex_idx, unsigned> node_to_hyperedge_index;
     unsigned numberOfHyperedges = 0;
-    for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-        if(instance.getComputationalDag().out_degree(node) > 0)
-        {
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+        if (instance.getComputationalDag().out_degree(node) > 0) {
             node_to_hyperedge_index[node] = numberOfHyperedges;
             ++numberOfHyperedges;
         }
+    }
 
     hyperedge_intersects_partition = std::vector<VarArray>(numberOfHyperedges);
 
-    for (unsigned hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++)
-        hyperedge_intersects_partition[hyperedge] = model.AddVars(static_cast<int>(numberOfParts), COPT_BINARY, "hyperedge_intersects_partition");
+    for (unsigned hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) {
+        hyperedge_intersects_partition[hyperedge]
+            = model.AddVars(static_cast<int>(numberOfParts), COPT_BINARY, "hyperedge_intersects_partition");
+    }
 
     // Constraints
 
     // each node assigned to exactly one partition
     for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
-
         Expr expr;
         for (unsigned part = 0; part < numberOfParts; part++) {
-
             expr += node_in_partition[node][static_cast<int>(part)];
         }
         model.AddConstr(expr == 1);
     }
 
     // hyperedge indicators match node variables
-    for (unsigned part = 0; part < numberOfParts; part++)
-        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-        {
-            if(instance.getComputationalDag().out_degree(node) == 0)
+    for (unsigned part = 0; part < numberOfParts; part++) {
+        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+            if (instance.getComputationalDag().out_degree(node) == 0) {
                 continue;
+            }
 
-            model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast<int>(part)] >= node_in_partition[node][static_cast<int>(part)]);
-            for (const auto &succ : instance.getComputationalDag().children(node))
-                model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast<int>(part)] >= node_in_partition[succ][static_cast<int>(part)]);
+            model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast<int>(part)]
+                            >= node_in_partition[node][static_cast<int>(part)]);
+            for (const auto &succ : instance.getComputationalDag().children(node)) {
+                model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast<int>(part)]
+                                >= node_in_partition[succ][static_cast<int>(part)]);
+            }
         }
-    
+    }
+
     // partition size constraints
-    for (unsigned part = 0; part < numberOfParts; part++)
-    {
+    for (unsigned part = 0; part < numberOfParts; part++) {
         Expr expr;
-        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-            if(!ignore_sources_for_constraint || is_original_source.empty() || !is_original_source[node])
+        for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+            if (!ignore_sources_for_constraint || is_original_source.empty() || !is_original_source[node]) {
                 expr += node_in_partition[node][static_cast<int>(part)];
+            }
+        }
 
         model.AddConstr(expr <= maxPartitionSize);
         model.AddConstr(expr >= minPartitionSize);
     }
 
     // acyclicity constraints
-    for (unsigned from_part = 0; from_part < numberOfParts; from_part++)
-        for (unsigned to_part = 0; to_part < from_part; to_part++)
-            for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-                for (const auto &succ : instance.getComputationalDag().children(node))
-                    model.AddConstr(node_in_partition[node][static_cast<int>(from_part)] + node_in_partition[succ][static_cast<int>(to_part)] <= 1);
-    
+    for (unsigned from_part = 0; from_part < numberOfParts; from_part++) {
+        for (unsigned to_part = 0; to_part < from_part; to_part++) {
+            for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+                for (const auto &succ : instance.getComputationalDag().children(node)) {
+                    model.AddConstr(node_in_partition[node][static_cast<int>(from_part)]
+                                        + node_in_partition[succ][static_cast<int>(to_part)]
+                                    <= 1);
+                }
+            }
+        }
+    }
 
     // set objective
     Expr expr;
-    for (vertex_idx node = 0; node < instance.numberOfVertices(); node++)
-        if(instance.getComputationalDag().out_degree(node) > 0)
-        {
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
+        if (instance.getComputationalDag().out_degree(node) > 0) {
             expr -= instance.getComputationalDag().vertex_comm_weight(node);
-            for (unsigned part = 0; part < numberOfParts; part++)
-                expr += instance.getComputationalDag().vertex_comm_weight(node) * hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast<int>(part)];
+            for (unsigned part = 0; part < numberOfParts; part++) {
+                expr += instance.getComputationalDag().vertex_comm_weight(node)
+                        * hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast<int>(part)];
+            }
         }
+    }
 
     model.SetObjective(expr, COPT_MINIMIZE);
-             
 };
 
-template<typename Graph_t>
+template <typename Graph_t>
 void AcyclicPartitioningILP<Graph_t>::WriteSolutionCallback::callback() {
-
     if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) {
-
         try {
-
             if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) {
-
                 best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ);
                 counter++;
             }
 
-        } catch (const std::exception &e) {
-        }
+        } catch (const std::exception &e) {}
     }
 };
 
-template<typename Graph_t>
-std::vector<unsigned> AcyclicPartitioningILP<Graph_t>::returnAssignment(const BspInstance<Graph_t> &instance)
-{
+template <typename Graph_t>
+std::vector<unsigned> AcyclicPartitioningILP<Graph_t>::returnAssignment(const BspInstance<Graph_t> &instance) {
     std::vector<unsigned> node_to_partition(instance.numberOfVertices(), UINT_MAX);
 
     std::set<unsigned> nonempty_partition_ids;
-    for (unsigned node = 0; node < instance.numberOfVertices(); node++)
-        for(unsigned part = 0; part < numberOfParts; part++)
-            if(node_in_partition[node][static_cast<int>(part)].Get(COPT_DBLINFO_VALUE) >= .99)
-            {
+    for (unsigned node = 0; node < instance.numberOfVertices(); node++) {
+        for (unsigned part = 0; part < numberOfParts; part++) {
+            if (node_in_partition[node][static_cast<int>(part)].Get(COPT_DBLINFO_VALUE) >= .99) {
                 node_to_partition[node] = part;
                 nonempty_partition_ids.insert(part);
             }
+        }
+    }
+
+    for (unsigned chosen_partition : node_to_partition) {
+        if (chosen_partition == UINT_MAX) {
+            std::cout << "Error: partitioning returned by ILP seems incomplete!" << std::endl;
+        }
+    }
 
-    for(unsigned chosen_partition : node_to_partition)
-        if(chosen_partition == UINT_MAX)
-            std::cout<<"Error: partitioning returned by ILP seems incomplete!"<<std::endl;
-    
     unsigned current_index = 0;
     std::map<unsigned, unsigned> new_index;
-    for(unsigned part_index : nonempty_partition_ids)
-    {
+    for (unsigned part_index : nonempty_partition_ids) {
         new_index[part_index] = current_index;
         ++current_index;
     }
 
-    for(vertex_idx node = 0; node < instance.numberOfVertices(); node++)
+    for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) {
         node_to_partition[node] = new_index[node_to_partition[node]];
+    }
 
-    std::cout<<"Acyclic partitioning ILP best solution value: "<<model.GetDblAttr(COPT_DBLATTR_BESTOBJ)<<", best lower bound: "<<model.GetDblAttr(COPT_DBLATTR_BESTBND)<<std::endl;
+    std::cout << "Acyclic partitioning ILP best solution value: " << model.GetDblAttr(COPT_DBLATTR_BESTOBJ)
+              << ", best lower bound: " << model.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl;
 
     return node_to_partition;
 }
 
-}
\ No newline at end of file
+}    // namespace osp
diff --git a/include/osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp b/include/osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp
index 6c277470..e5678cee 100644
--- a/include/osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp
+++ b/include/osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp
@@ -13,21 +13,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
 
-#include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
+#include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
 
-namespace osp{
+namespace osp {
 
-template<typename Graph_t>
+template <typename Graph_t>
 class SubproblemMultiScheduling : public Scheduler<Graph_t> {
-
-    static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs."); 
+    static_assert(is_computational_dag_v<Graph_t>, "PebblingSchedule can only be used with computational DAGs.");
 
   private:
     using vertex_idx = vertex_idx_t<Graph_t>;
@@ -35,7 +34,7 @@ class SubproblemMultiScheduling : public Scheduler<Graph_t> {
     using workweight_type = v_workw_t<Graph_t>;
 
     std::vector<vertex_idx> last_node_on_proc;
-    std::vector<std::vector<vertex_idx> > proc_task_lists;
+    std::vector<std::vector<vertex_idx>> proc_task_lists;
     std::vector<workweight_type> longest_outgoing_path;
 
   public:
@@ -43,17 +42,17 @@ class SubproblemMultiScheduling : public Scheduler<Graph_t> {
 
     virtual ~SubproblemMultiScheduling() = default;
 
-    RETURN_STATUS computeMultiSchedule(const BspInstance<Graph_t> &instance, std::vector<std::set<unsigned> >& processors_to_node);
+    RETURN_STATUS computeMultiSchedule(const BspInstance<Graph_t> &instance, std::vector<std::set<unsigned>> &processors_to_node);
 
-    std::vector<std::pair<vertex_idx, unsigned> > makeAssignment(const BspInstance<Graph_t> &instance,
-                                                    const std::set<std::pair<unsigned, vertex_idx> > &nodes_available,
-                                                    const std::set<unsigned> &procs_available) const;
+    std::vector<std::pair<vertex_idx, unsigned>> makeAssignment(const BspInstance<Graph_t> &instance,
+                                                                const std::set<std::pair<unsigned, vertex_idx>> &nodes_available,
+                                                                const std::set<unsigned> &procs_available) const;
 
     std::vector<workweight_type> static get_longest_path(const Graph_t &graph);
 
     // not used, only here for using scheduler class base functionality (status enums, timelimits, etc)
     RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override;
-  
+
     /**
      * @brief Get the name of the schedule.
      *
@@ -61,13 +60,12 @@ class SubproblemMultiScheduling : public Scheduler<Graph_t> {
      */
     virtual std::string getScheduleName() const override { return "SubproblemMultiScheduling"; }
 
-    inline const std::vector<std::vector<unsigned> >& getProcTaskLists() const { return proc_task_lists; }
-
+    inline const std::vector<std::vector<unsigned>> &getProcTaskLists() const { return proc_task_lists; }
 };
 
 // currently duplicated from BSP locking scheduler's code
-template<typename Graph_t>
-std::vector<v_workw_t<Graph_t> > SubproblemMultiScheduling<Graph_t>::get_longest_path(const Graph_t &graph) {
+template <typename Graph_t>
+std::vector<v_workw_t<Graph_t>> SubproblemMultiScheduling<Graph_t>::get_longest_path(const Graph_t &graph) {
     std::vector<workweight_type> longest_path(graph.num_vertices(), 0);
 
     std::vector<vertex_idx> top_order = GetTopOrder(graph);
@@ -77,8 +75,9 @@ std::vector<v_workw_t<Graph_t> > SubproblemMultiScheduling<Graph_t>::get_longest
         if (graph.out_degree(*r_iter) > 0) {
             workweight_type max = 0;
             for (const auto &child : graph.children(*r_iter)) {
-                if (max <= longest_path[child])
+                if (max <= longest_path[child]) {
                     max = longest_path[child];
+                }
             }
             longest_path[*r_iter] += max;
         }
@@ -87,9 +86,9 @@ std::vector<v_workw_t<Graph_t> > SubproblemMultiScheduling<Graph_t>::get_longest
     return longest_path;
 }
 
-template<typename Graph_t>
-RETURN_STATUS SubproblemMultiScheduling<Graph_t>::computeMultiSchedule(const BspInstance<Graph_t> &instance, std::vector<std::set<unsigned> >& processors_to_node)
-{
+template <typename Graph_t>
+RETURN_STATUS SubproblemMultiScheduling<Graph_t>::computeMultiSchedule(const BspInstance<Graph_t> &instance,
+                                                                       std::vector<std::set<unsigned>> &processors_to_node) {
     const unsigned &N = static_cast<unsigned>(instance.numberOfVertices());
     const unsigned &P = instance.numberOfProcessors();
     const auto &G = instance.getComputationalDag();
@@ -105,7 +104,7 @@ RETURN_STATUS SubproblemMultiScheduling<Graph_t>::computeMultiSchedule(const Bsp
 
     longest_outgoing_path = get_longest_path(G);
 
-    std::set<std::pair<unsigned, vertex_idx> > readySet;
+    std::set<std::pair<unsigned, vertex_idx>> readySet;
 
     std::vector<unsigned> nrPredecRemain(N);
     for (vertex_idx node = 0; node < N; node++) {
@@ -116,8 +115,9 @@ RETURN_STATUS SubproblemMultiScheduling<Graph_t>::computeMultiSchedule(const Bsp
     }
 
     std::set<unsigned> free_procs;
-    for(unsigned proc = 0; proc < P; ++proc)
+    for (unsigned proc = 0; proc < P; ++proc) {
         free_procs.insert(proc);
+    }
 
     std::vector<double> node_finish_time(N, 0);
 
@@ -125,35 +125,32 @@ RETURN_STATUS SubproblemMultiScheduling<Graph_t>::computeMultiSchedule(const Bsp
     finishTimes.emplace(0, std::numeric_limits<unsigned>::max());
 
     while (!readySet.empty() || !finishTimes.empty()) {
-
         const double time = finishTimes.begin()->first;
 
         // Find new ready jobs
-        while (!finishTimes.empty() && fabs(finishTimes.begin()->first - time) < 0.0001 ) {
-
+        while (!finishTimes.empty() && fabs(finishTimes.begin()->first - time) < 0.0001) {
             const vertex_idx node = finishTimes.begin()->second;
             finishTimes.erase(finishTimes.begin());
 
-            if (node != std::numeric_limits<unsigned>::max())
-            {
-                for (const vertex_idx &succ : G.children(node))
-                {
+            if (node != std::numeric_limits<unsigned>::max()) {
+                for (const vertex_idx &succ : G.children(node)) {
                     nrPredecRemain[succ]--;
-                    if (nrPredecRemain[succ] == 0)
+                    if (nrPredecRemain[succ] == 0) {
                         readySet.emplace(-longest_outgoing_path[succ], succ);
+                    }
                 }
-                for(unsigned proc : processors_to_node[node])
+                for (unsigned proc : processors_to_node[node]) {
                     free_procs.insert(proc);
+                }
             }
         }
 
         // Assign new jobs to idle processors
 
         // first assign free processors to ready nodes
-        std::vector<std::pair<vertex_idx, unsigned> > new_assingments = makeAssignment(instance, readySet, free_procs);
+        std::vector<std::pair<vertex_idx, unsigned>> new_assingments = makeAssignment(instance, readySet, free_procs);
 
-        for(auto entry : new_assingments)
-        {
+        for (auto entry : new_assingments) {
             vertex_idx node = entry.first;
             unsigned proc = entry.second;
 
@@ -168,92 +165,95 @@ RETURN_STATUS SubproblemMultiScheduling<Graph_t>::computeMultiSchedule(const Bsp
 
         // assign remaining free processors to already started nodes, if it helps
         decltype(finishTimes.rbegin()) itr = finishTimes.rbegin();
-        while(!free_procs.empty() && itr != finishTimes.rend())
-        {
+        while (!free_procs.empty() && itr != finishTimes.rend()) {
             double last_finish_time = itr->first;
 
             decltype(finishTimes.rbegin()) itr_latest = itr;
-            std::set<std::pair<workweight_type, vertex_idx> > possible_nodes;
-            while(itr_latest !=finishTimes.rend() && itr_latest->first + 0.0001 > last_finish_time)
-            {
+            std::set<std::pair<workweight_type, vertex_idx>> possible_nodes;
+            while (itr_latest != finishTimes.rend() && itr_latest->first + 0.0001 > last_finish_time) {
                 vertex_idx node = itr_latest->second;
-                double new_finish_time = time + static_cast<double>(G.vertex_work_weight(node)) / (static_cast<double>(processors_to_node[node].size()) + 1);
-                if(new_finish_time + 0.0001 < itr_latest->first)
+                double new_finish_time = time
+                                         + static_cast<double>(G.vertex_work_weight(node))
+                                               / (static_cast<double>(processors_to_node[node].size()) + 1);
+                if (new_finish_time + 0.0001 < itr_latest->first) {
                     possible_nodes.emplace(-longest_outgoing_path[node], node);
-                
+                }
+
                 ++itr_latest;
             }
             new_assingments = makeAssignment(instance, possible_nodes, free_procs);
-            for(auto entry : new_assingments)
-            {
+            for (auto entry : new_assingments) {
                 vertex_idx node = entry.first;
                 unsigned proc = entry.second;
 
                 processors_to_node[node].insert(proc);
                 proc_task_lists[proc].push_back(node);
                 finishTimes.erase({node_finish_time[node], node});
-                double new_finish_time = time + static_cast<double>(G.vertex_work_weight(node)) / (static_cast<double>(processors_to_node[node].size()));
+                double new_finish_time
+                    = time
+                      + static_cast<double>(G.vertex_work_weight(node)) / (static_cast<double>(processors_to_node[node].size()));
                 finishTimes.emplace(new_finish_time, node);
                 node_finish_time[node] = new_finish_time;
                 last_node_on_proc[proc] = node;
                 free_procs.erase(proc);
             }
-            if(new_assingments.empty())
+            if (new_assingments.empty()) {
                 itr = itr_latest;
+            }
         }
-
     }
 
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-template<typename Graph_t>
-std::vector<std::pair<vertex_idx_t<Graph_t>, unsigned> > SubproblemMultiScheduling<Graph_t>::makeAssignment(const BspInstance<Graph_t> &instance,
-                                                    const std::set<std::pair<unsigned, vertex_idx> > &nodes_available,
-                                                    const std::set<unsigned> &procs_available) const
-{
-    std::vector<std::pair<vertex_idx, unsigned> > assignments;
-    if(nodes_available.empty() || procs_available.empty())
+template <typename Graph_t>
+std::vector<std::pair<vertex_idx_t<Graph_t>, unsigned>> SubproblemMultiScheduling<Graph_t>::makeAssignment(
+    const BspInstance<Graph_t> &instance,
+    const std::set<std::pair<unsigned, vertex_idx>> &nodes_available,
+    const std::set<unsigned> &procs_available) const {
+    std::vector<std::pair<vertex_idx, unsigned>> assignments;
+    if (nodes_available.empty() || procs_available.empty()) {
         return assignments;
+    }
 
     std::set<vertex_idx> assigned_nodes;
     std::vector<bool> assigned_procs(instance.numberOfProcessors(), false);
 
-    for(unsigned proc : procs_available)
-    {
-        if(last_node_on_proc[proc] == UINT_MAX)
+    for (unsigned proc : procs_available) {
+        if (last_node_on_proc[proc] == UINT_MAX) {
             continue;
+        }
 
-        for (const auto &succ : instance.getComputationalDag().children(last_node_on_proc[proc]))
-            if(nodes_available.find({-longest_outgoing_path[succ], succ}) != nodes_available.end() && instance.isCompatible(succ, proc)
-                && assigned_nodes.find(succ) == assigned_nodes.end())
-            {
+        for (const auto &succ : instance.getComputationalDag().children(last_node_on_proc[proc])) {
+            if (nodes_available.find({-longest_outgoing_path[succ], succ}) != nodes_available.end()
+                && instance.isCompatible(succ, proc) && assigned_nodes.find(succ) == assigned_nodes.end()) {
                 assignments.emplace_back(succ, proc);
                 assigned_nodes.insert(succ);
                 assigned_procs[proc] = true;
                 break;
             }
+        }
     }
-        
-    for(unsigned proc : procs_available)
-        if(!assigned_procs[proc])
-            for(auto itr = nodes_available.begin(); itr != nodes_available.end(); ++itr)
-            {
+
+    for (unsigned proc : procs_available) {
+        if (!assigned_procs[proc]) {
+            for (auto itr = nodes_available.begin(); itr != nodes_available.end(); ++itr) {
                 vertex_idx node = itr->second;
-                if(instance.isCompatible(node, proc) && assigned_nodes.find(node) == assigned_nodes.end())
-                {
+                if (instance.isCompatible(node, proc) && assigned_nodes.find(node) == assigned_nodes.end()) {
                     assignments.emplace_back(node, proc);
                     assigned_nodes.insert(node);
                     break;
                 }
             }
+        }
+    }
 
     return assignments;
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 RETURN_STATUS SubproblemMultiScheduling<Graph_t>::computeSchedule(BspSchedule<Graph_t> &) {
     return RETURN_STATUS::ERROR;
 }
 
-}
\ No newline at end of file
+}    // namespace osp
diff --git a/tests/balanced_coin_flips.cpp b/tests/balanced_coin_flips.cpp
index dd4a2e47..f30b64f0 100644
--- a/tests/balanced_coin_flips.cpp
+++ b/tests/balanced_coin_flips.cpp
@@ -17,12 +17,11 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE Balanced_Coin_Flips
-#include <boost/test/unit_test.hpp>
+#include "osp/auxiliary/Balanced_Coin_Flips.hpp"
 
 #include <bitset>
+#include <boost/test/unit_test.hpp>
 #include <iostream>
-#include "osp/auxiliary/Balanced_Coin_Flips.hpp"
-
 
 using namespace osp;
 
@@ -30,17 +29,17 @@ bool thue_morse_gen(long unsigned int n) {
     // std::bitset<sizeof(n)*CHAR_BIT> bits(n);
     unsigned long int bin_sum = 0;
     while (n != 0) {
-        bin_sum += n%2;
+        bin_sum += n % 2;
         n /= 2;
     }
-    return bool(bin_sum%2);  // (bits.count()%2);
+    return bool(bin_sum % 2);    // (bits.count()%2);
 }
 
 BOOST_AUTO_TEST_CASE(Random_Biased_Coin) {
-    std::cout << "True: " << true << " False: " << false << std::endl; 
+    std::cout << "True: " << true << " False: " << false << std::endl;
     Biased_Random Coin;
     std::cout << "Biased Coin: ";
-    for (int i = 0 ; i < 200 ; i++) {
+    for (int i = 0; i < 200; i++) {
         std::cout << Coin.get_flip();
     }
     std::cout << std::endl << std::endl;
@@ -49,25 +48,24 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin) {
 BOOST_AUTO_TEST_CASE(Thue__Morse) {
     Thue_Morse_Sequence Coin(0);
 
-    std::vector<bool> beginning({0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1});
+    std::vector<bool> beginning({0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1});
     std::vector<bool> generated;
-    for (long unsigned i = 0 ; i<beginning.size(); i++) {
+    for (long unsigned i = 0; i < beginning.size(); i++) {
         bool next = Coin.get_flip();
         generated.emplace_back(next);
         // std::cout << next;
     }
     // std::cout << std::endl;
 
-    BOOST_CHECK( beginning == generated );
+    BOOST_CHECK(beginning == generated);
 
     Thue_Morse_Sequence Test_Coin_in_seq(0);
-    for (unsigned i = 0 ; i < 200; i++) {
+    for (unsigned i = 0; i < 200; i++) {
         BOOST_CHECK_EQUAL(Test_Coin_in_seq.get_flip(), thue_morse_gen(i));
         // std::cout << "hi " << i << std::endl;
     }
 
-
-    for (int i = 0 ; i < 100; i++) {
+    for (int i = 0; i < 100; i++) {
         unsigned ind = static_cast<unsigned>(randInt(1048575));
         Thue_Morse_Sequence Test_Coin_random(ind);
         BOOST_CHECK_EQUAL(Test_Coin_random.get_flip(), thue_morse_gen(ind));
@@ -75,27 +73,25 @@ BOOST_AUTO_TEST_CASE(Thue__Morse) {
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(Repeater_Coin) {
     Repeat_Chance Coin;
     std::cout << "Repeater Coin: ";
-    for (int i = 0 ; i < 200 ; i++) {
+    for (int i = 0; i < 200; i++) {
         std::cout << Coin.get_flip();
     }
     std::cout << std::endl << std::endl;
 }
 
 BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_1) {
-    Biased_Random_with_side_bias Coin({1,1});
+    Biased_Random_with_side_bias Coin({1, 1});
     int true_count = 0;
     int false_count = 0;
     std::cout << "Biased Coin with side bias 1:1 : ";
-    for (int i = 0 ; i < 200 ; i++) {
+    for (int i = 0; i < 200; i++) {
         bool flip = Coin.get_flip();
         if (flip) {
             true_count++;
-        }
-        else {
+        } else {
             false_count++;
         }
         std::cout << flip;
@@ -106,16 +102,15 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_1) {
 }
 
 BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_0) {
-    Biased_Random_with_side_bias Coin({1,0});
+    Biased_Random_with_side_bias Coin({1, 0});
     int true_count = 0;
     int false_count = 0;
     std::cout << "Biased Coin with side bias 1:0 : ";
-    for (int i = 0 ; i < 200 ; i++) {
+    for (int i = 0; i < 200; i++) {
         bool flip = Coin.get_flip();
         if (flip) {
             true_count++;
-        }
-        else {
+        } else {
             false_count++;
         }
         std::cout << flip;
@@ -125,18 +120,16 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_0) {
     std::cout << std::endl;
 }
 
-
 BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_0_1) {
-    Biased_Random_with_side_bias Coin({0,1});
+    Biased_Random_with_side_bias Coin({0, 1});
     int true_count = 0;
     int false_count = 0;
     std::cout << "Biased Coin with side bias 0:1 : ";
-    for (int i = 0 ; i < 200 ; i++) {
+    for (int i = 0; i < 200; i++) {
         bool flip = Coin.get_flip();
         if (flip) {
             true_count++;
-        }
-        else {
+        } else {
             false_count++;
         }
         std::cout << flip;
@@ -146,18 +139,16 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_0_1) {
     std::cout << std::endl;
 }
 
-
 BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_2) {
-    Biased_Random_with_side_bias Coin({3,2});
+    Biased_Random_with_side_bias Coin({3, 2});
     int true_count = 0;
     int false_count = 0;
     std::cout << "Biased Coin with side bias 3:2 : ";
-    for (int i = 0 ; i < 200 ; i++) {
+    for (int i = 0; i < 200; i++) {
         bool flip = Coin.get_flip();
         if (flip) {
             true_count++;
-        }
-        else {
+        } else {
             false_count++;
         }
         std::cout << flip;
@@ -168,16 +159,15 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_2) {
 }
 
 BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_1) {
-    Biased_Random_with_side_bias Coin({3,1});
+    Biased_Random_with_side_bias Coin({3, 1});
     int true_count = 0;
     int false_count = 0;
     std::cout << "Biased Coin with side bias 3:1 : ";
-    for (int i = 0 ; i < 200 ; i++) {
+    for (int i = 0; i < 200; i++) {
         bool flip = Coin.get_flip();
         if (flip) {
             true_count++;
-        }
-        else {
+        } else {
             false_count++;
         }
         std::cout << flip;
@@ -185,4 +175,4 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_1) {
     std::cout << std::endl;
     std::cout << "True count: " << true_count << " False count: " << false_count << std::endl;
     std::cout << std::endl;
-}
\ No newline at end of file
+}
diff --git a/tests/bit_mask.cpp b/tests/bit_mask.cpp
index a80e45b4..5ba648b5 100644
--- a/tests/bit_mask.cpp
+++ b/tests/bit_mask.cpp
@@ -17,10 +17,10 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE BitMasks
-#include <boost/test/unit_test.hpp>
-
 #include "osp/auxiliary/datastructures/bit_mask.hpp"
 
+#include <boost/test/unit_test.hpp>
+
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(BitMaskTest_1) {
@@ -29,13 +29,12 @@ BOOST_AUTO_TEST_CASE(BitMaskTest_1) {
 
     for (unsigned i = 0; i < 25U; ++i) {
         for (std::size_t j = 0; j < num_flags; ++j) {
-            BOOST_CHECK_EQUAL( mask.mask[j], bool(i & (1U << j)) );
+            BOOST_CHECK_EQUAL(mask.mask[j], bool(i & (1U << j)));
         }
         ++mask;
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(BitMaskTest_2) {
     const std::size_t num_flags = 6U;
     BitMask mask(num_flags);
@@ -44,7 +43,7 @@ BOOST_AUTO_TEST_CASE(BitMaskTest_2) {
         BitMask tmp = mask;
         BitMask post = mask++;
         for (std::size_t j = 0; j < num_flags; ++j) {
-            BOOST_CHECK_EQUAL( tmp.mask[j], post.mask[j] );
+            BOOST_CHECK_EQUAL(tmp.mask[j], post.mask[j]);
         }
     }
 }
@@ -57,7 +56,7 @@ BOOST_AUTO_TEST_CASE(BitMaskTest_3) {
         BitMask tmp = mask++;
         ++tmp;
         for (std::size_t j = 0; j < num_flags; ++j) {
-            BOOST_CHECK_EQUAL( tmp.mask[j], mask.mask[j] );
+            BOOST_CHECK_EQUAL(tmp.mask[j], mask.mask[j]);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/boost_graph_adaptor.cpp b/tests/boost_graph_adaptor.cpp
index 5a1ea402..401cf216 100644
--- a/tests/boost_graph_adaptor.cpp
+++ b/tests/boost_graph_adaptor.cpp
@@ -30,7 +30,6 @@ limitations under the License.
 using namespace osp;
 
 boost_graph_int_t constr_graph_1() {
-
     boost_graph_int_t graph;
 
     using vertex_idx = boost_graph_int_t::vertex_idx;
@@ -78,14 +77,12 @@ boost_graph_int_t constr_graph_1() {
 }
 
 BOOST_AUTO_TEST_CASE(test_empty_dag_boost_graph_adapter) {
-
     boost_graph_int_t graph;
     BOOST_CHECK_EQUAL(graph.num_edges(), 0);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 0);
 }
 
 BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) {
-
     boost_graph_int_t graph = constr_graph_1();
 
     using vertex_idx = boost_graph_int_t::vertex_idx;
@@ -95,7 +92,6 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) {
 
     size_t edge_idx = 0;
     for (const auto &edge : graph.edges()) {
-
         BOOST_CHECK_EQUAL(source(edge, graph), edge_sources[edge_idx]);
         BOOST_CHECK_EQUAL(target(edge, graph), edge_targets[edge_idx]);
         edge_idx++;
@@ -103,23 +99,38 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) {
 
     edge_idx = 0;
     for (const auto &edge : edges(graph)) {
-
         BOOST_CHECK_EQUAL(source(edge, graph), edge_sources[edge_idx]);
         BOOST_CHECK_EQUAL(target(edge, graph), edge_targets[edge_idx]);
         edge_idx++;
     }
 
-
     std::vector<vertex_idx> vertices{0, 1, 2, 3, 4, 5, 6, 7};
 
-    std::vector<std::vector<vertex_idx>> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}};
-
-    std::vector<std::vector<vertex_idx>> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {3, 4}};
+    std::vector<std::vector<vertex_idx>> out_neighbors{
+        {1, 2, 3},
+        {4, 6},
+        {4, 5},
+        {7},
+        {7},
+        {},
+        {},
+        {}
+    };
+
+    std::vector<std::vector<vertex_idx>> in_neighbors{
+        {},
+        {0},
+        {0},
+        {0},
+        {1, 2},
+        {2},
+        {1},
+        {3, 4}
+    };
 
     size_t idx = 0;
 
     for (const auto &v : graph.vertices()) {
-
         BOOST_CHECK_EQUAL(v, vertices[idx++]);
 
         size_t i = 0;
@@ -168,7 +179,6 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) {
 }
 
 BOOST_AUTO_TEST_CASE(test_util_1) {
-
     const boost_graph_int_t graph = constr_graph_1();
 
     BOOST_CHECK_EQUAL(graph.num_edges(), 9);
@@ -242,7 +252,6 @@ BOOST_AUTO_TEST_CASE(test_util_1) {
 }
 
 BOOST_AUTO_TEST_CASE(test_constr_dag) {
-
     boost_graph_int_t graph;
 
     graph.add_vertex(1, 2, 3);
@@ -289,14 +298,12 @@ BOOST_AUTO_TEST_CASE(test_constr_dag) {
 }
 
 BOOST_AUTO_TEST_CASE(test_boost_graph_const_1) {
-
     boost_graph_int_t graph(10u);
     BOOST_CHECK_EQUAL(graph.num_edges(), 0);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 10);
 }
 
 BOOST_AUTO_TEST_CASE(test_boost_graph_const_2) {
-
     boost_graph_int_t graph_1 = constr_graph_1();
 
     boost_graph_int_t graph_copy(graph_1);
@@ -360,4 +367,4 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_const_2) {
     BOOST_CHECK_EQUAL(has_path(1, 2, graph_move_2), false);
     BOOST_CHECK_EQUAL(has_path(1, 3, graph_move_2), false);
     BOOST_CHECK_EQUAL(has_path(2, 1, graph_move_2), false);
-}
\ No newline at end of file
+}
diff --git a/tests/bsp_architecture.cpp b/tests/bsp_architecture.cpp
index d803bb56..16e221bc 100644
--- a/tests/bsp_architecture.cpp
+++ b/tests/bsp_architecture.cpp
@@ -25,8 +25,12 @@ limitations under the License.
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) {
-
-    std::vector<std::vector<int>> uniform_sent_costs = {{0, 1, 1, 1}, {1, 0, 1, 1}, {1, 1, 0, 1}, {1, 1, 1, 0}};
+    std::vector<std::vector<int>> uniform_sent_costs = {
+        {0, 1, 1, 1},
+        {1, 0, 1, 1},
+        {1, 1, 0, 1},
+        {1, 1, 1, 0}
+    };
 
     BspArchitecture<computational_dag_vector_impl_def_int_t> architecture(4, 2, 3);
     BOOST_TEST(architecture.numberOfProcessors() == 4);
@@ -63,7 +67,12 @@ BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) {
 
     BOOST_TEST(architecture.sendCost() == uniform_sent_costs);
 
-    std::vector<std::vector<int>> expectedSendCosts = {{0, 2, 2, 2}, {2, 0, 2, 2}, {2, 2, 0, 2}, {2, 2, 2, 0}};
+    std::vector<std::vector<int>> expectedSendCosts = {
+        {0, 2, 2, 2},
+        {2, 0, 2, 2},
+        {2, 2, 0, 2},
+        {2, 2, 2, 0}
+    };
 
     architecture.SetSendCosts(expectedSendCosts);
     BOOST_TEST(architecture.sendCost() == expectedSendCosts);
@@ -79,7 +88,6 @@ BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) {
 }
 
 BOOST_AUTO_TEST_CASE(Architecture) {
-
     // default constructor
     BspArchitecture<computational_dag_vector_impl_def_t> test;
     BOOST_CHECK_EQUAL(test.numberOfProcessors(), 2);
@@ -141,7 +149,14 @@ BOOST_AUTO_TEST_CASE(Architecture) {
     }
 
     // constructor
-    std::vector<std::vector<int>> send_costs = {{0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {1, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
+    std::vector<std::vector<int>> send_costs = {
+        {0, 1, 1, 1, 1, 1},
+        {1, 0, 1, 1, 1, 1},
+        {1, 1, 0, 1, 1, 1},
+        {1, 1, 1, 0, 1, 1},
+        {1, 1, 1, 1, 0, 1},
+        {1, 1, 1, 1, 1, 0}
+    };
 
     BOOST_CHECK_THROW(BspArchitecture<computational_dag_vector_impl_def_int_t> test31(7, 42942, 0, send_costs),
                       std::invalid_argument);
@@ -168,8 +183,22 @@ BOOST_AUTO_TEST_CASE(Architecture) {
     }
 
     // constructor
-    std::vector<std::vector<int>> send_costs2 = {{0, 1, 2, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {1, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
-    std::vector<std::vector<int>> send_costs3 = {{0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {3, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
+    std::vector<std::vector<int>> send_costs2 = {
+        {0, 1, 2, 1, 1, 1},
+        {1, 0, 1, 1, 1, 1},
+        {1, 1, 0, 1, 1, 1},
+        {1, 1, 1, 0, 1, 1},
+        {1, 1, 1, 1, 0, 1},
+        {1, 1, 1, 1, 1, 0}
+    };
+    std::vector<std::vector<int>> send_costs3 = {
+        {0, 1, 1, 1, 1, 1},
+        {1, 0, 1, 1, 1, 1},
+        {1, 1, 0, 1, 1, 1},
+        {3, 1, 1, 0, 1, 1},
+        {1, 1, 1, 1, 0, 1},
+        {1, 1, 1, 1, 1, 0}
+    };
 
     BspArchitecture<computational_dag_vector_impl_def_int_t> test4(6, 0, 4294965, send_costs2);
     BOOST_CHECK_EQUAL(test4.numberOfProcessors(), 6);
diff --git a/tests/bsp_greedy_recomputer.cpp b/tests/bsp_greedy_recomputer.cpp
index d411a2b5..cd3ab446 100644
--- a/tests/bsp_greedy_recomputer.cpp
+++ b/tests/bsp_greedy_recomputer.cpp
@@ -18,18 +18,17 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE BSP_GREEDY_RECOMPUTER
 #include <boost/test/unit_test.hpp>
+#include <filesystem>
+#include <iostream>
 
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include <filesystem>
-#include <iostream>
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(test_recomputer) {
-
     using graph = computational_dag_vector_impl_def_t;
 
     BspInstance<graph> instance1;
@@ -59,8 +58,8 @@ BOOST_AUTO_TEST_CASE(test_recomputer) {
     scheduler.computeRecompSchedule(schedule_init_cs1, schedule);
     BOOST_CHECK(schedule.satisfiesConstraints());
     BOOST_CHECK(schedule.computeCosts() < schedule_init_cs1.computeCosts());
-    std::cout<<"Cost decrease by greedy recomp: "<<schedule_init_cs1.computeCosts()<<" -> "<<schedule.computeCosts()<<std::endl;
-
+    std::cout << "Cost decrease by greedy recomp: " << schedule_init_cs1.computeCosts() << " -> " << schedule.computeCosts()
+              << std::endl;
 
     // non-toy instance
 
@@ -77,8 +76,8 @@ BOOST_AUTO_TEST_CASE(test_recomputer) {
         std::cout << cwd << std::endl;
     }
 
-    bool status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance2.getComputationalDag());
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(),
+                                                                    instance2.getComputationalDag());
 
     BOOST_CHECK(status);
 
@@ -92,5 +91,6 @@ BOOST_AUTO_TEST_CASE(test_recomputer) {
     scheduler.computeRecompSchedule(schedule_init_cs2, schedule);
     BOOST_CHECK(schedule.satisfiesConstraints());
     BOOST_CHECK(schedule.computeCosts() < schedule_init_cs2.computeCosts());
-    std::cout<<"Cost decrease by greedy recomp: "<<schedule_init_cs2.computeCosts()<<" -> "<<schedule.computeCosts()<<std::endl;
-}
\ No newline at end of file
+    std::cout << "Cost decrease by greedy recomp: " << schedule_init_cs2.computeCosts() << " -> " << schedule.computeCosts()
+              << std::endl;
+}
diff --git a/tests/bsp_improvementschedulers.cpp b/tests/bsp_improvementschedulers.cpp
index e117bf2d..f1695297 100644
--- a/tests/bsp_improvementschedulers.cpp
+++ b/tests/bsp_improvementschedulers.cpp
@@ -18,14 +18,12 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE BSP_IMPROVEMENTSCHEDULERS
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
 #include <string>
 #include <vector>
 
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
 
-
 // std::vector<std::string> test_graphs() {
 //     return {"data/spaa/small/instance_exp_N20_K4_nzP0d2.txt", "data/spaa/small/instance_kNN_N20_K5_nzP0d2.txt",
 //             "data/spaa/small/instance_exp_N10_K8_nzP0d2.txt"};  //, "data/spaa/medium/instance_kNN_N50_K4_nzP0d18.txt"}; //,
@@ -94,7 +92,6 @@ limitations under the License.
 
 //             BspInstance instance(graph, architecture);
 
-
 //             RandomBadGreedy test0;
 
 //             std::pair<RETURN_STATUS, BspSchedule> result0 = test0.computeSchedule(instance);
@@ -107,7 +104,6 @@ limitations under the License.
 //             BOOST_CHECK(result0.second.satisfiesPrecedenceConstraints());
 //             BOOST_CHECK(result0.second.hasValidCommSchedule());
 
-
 //             BalDMixR test1;
 
 //             std::pair<RETURN_STATUS, BspSchedule> result1 = test1.computeSchedule(instance);
@@ -120,7 +116,6 @@ limitations under the License.
 //             BOOST_CHECK(result1.second.satisfiesPrecedenceConstraints());
 //             BOOST_CHECK(result1.second.hasValidCommSchedule());
 
-
 //             HDagg_simple test2;
 
 //             std::pair<RETURN_STATUS, BspSchedule> result2 = test2.computeSchedule(instance);
@@ -137,8 +132,6 @@ limitations under the License.
 //     }
 // };
 
-
-
 BOOST_AUTO_TEST_CASE(Hungarian_alg_process_permuter_test) {
     // Hungarian_alg_process_permuter test;
     // run_test(&test);
@@ -151,12 +144,8 @@ BOOST_AUTO_TEST_CASE(Hungarian_alg_process_permuter_test) {
 
 // BOOST_AUTO_TEST_CASE(LKTotalCommScheduler_test) {
 //     kl_total_comm test;
-    
+
 //     test.setTimeLimitSeconds(10);
 //     test.set_compute_with_time_limit(true);
 //     run_test(&test);
 // }
-
-
-
-
diff --git a/tests/bsp_instance.cpp b/tests/bsp_instance.cpp
index 101e4b2f..d87d8259 100644
--- a/tests/bsp_instance.cpp
+++ b/tests/bsp_instance.cpp
@@ -18,6 +18,8 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE Bsp_Architecture
 #include <boost/test/unit_test.hpp>
+#include <filesystem>
+#include <iostream>
 
 #include "osp/auxiliary/io/arch_file_reader.hpp"
 #include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
@@ -26,8 +28,6 @@ limitations under the License.
 #include "osp/bsp/model/util/CompatibleProcessorRange.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include <filesystem>
-#include <iostream>
 
 using namespace osp;
 
@@ -53,7 +53,6 @@ BOOST_AUTO_TEST_CASE(test_1) {
 }
 
 BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
-
     BspInstance<computational_dag_edge_idx_vector_impl_def_t> instance;
     instance.setNumberOfProcessors(4);
     instance.setCommunicationCosts(2);
@@ -67,8 +66,8 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
         std::cout << cwd << std::endl;
     }
 
-    bool status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag());
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(),
+                                                                    instance.getComputationalDag());
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54);
@@ -117,63 +116,47 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
     BspInstance<computational_dag_vector_impl_def_t> instance_t2(instance);
 
     BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices());
-    BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(),
-                      instance.getComputationalDag().num_vertex_types());
+    BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types());
     BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges());
-    BOOST_CHECK_EQUAL(instance_t2.getArchitecture().numberOfProcessors(),
-                      instance.getArchitecture().numberOfProcessors());
+    BOOST_CHECK_EQUAL(instance_t2.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors());
     BOOST_CHECK_EQUAL(instance_t2.getArchitecture().getNumberOfProcessorTypes(),
                       instance.getArchitecture().getNumberOfProcessorTypes());
-    BOOST_CHECK_EQUAL(instance_t2.getArchitecture().communicationCosts(),
-                      instance.getArchitecture().communicationCosts());
-    BOOST_CHECK_EQUAL(instance_t2.getArchitecture().synchronisationCosts(),
-                      instance.getArchitecture().synchronisationCosts());
+    BOOST_CHECK_EQUAL(instance_t2.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts());
+    BOOST_CHECK_EQUAL(instance_t2.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts());
 
     BspInstance<computational_dag_edge_idx_vector_impl_def_t> instance_t3;
 
     instance_t3 = instance;
 
     BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices());
-    BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_vertex_types(),
-                      instance.getComputationalDag().num_vertex_types());
+    BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types());
     BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges());
-    BOOST_CHECK_EQUAL(instance_t3.getArchitecture().numberOfProcessors(),
-                      instance.getArchitecture().numberOfProcessors());
+    BOOST_CHECK_EQUAL(instance_t3.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors());
     BOOST_CHECK_EQUAL(instance_t3.getArchitecture().getNumberOfProcessorTypes(),
                       instance.getArchitecture().getNumberOfProcessorTypes());
-    BOOST_CHECK_EQUAL(instance_t3.getArchitecture().communicationCosts(),
-                      instance.getArchitecture().communicationCosts());
-    BOOST_CHECK_EQUAL(instance_t3.getArchitecture().synchronisationCosts(),
-                      instance.getArchitecture().synchronisationCosts());
+    BOOST_CHECK_EQUAL(instance_t3.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts());
+    BOOST_CHECK_EQUAL(instance_t3.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts());
 
     BspInstance<computational_dag_edge_idx_vector_impl_def_t> instance_t4(std::move(instance_t3));
 
     BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices());
-    BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_vertex_types(),
-                      instance.getComputationalDag().num_vertex_types());
+    BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types());
     BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges());
-    BOOST_CHECK_EQUAL(instance_t4.getArchitecture().numberOfProcessors(),
-                      instance.getArchitecture().numberOfProcessors());
+    BOOST_CHECK_EQUAL(instance_t4.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors());
     BOOST_CHECK_EQUAL(instance_t4.getArchitecture().getNumberOfProcessorTypes(),
                       instance.getArchitecture().getNumberOfProcessorTypes());
-    BOOST_CHECK_EQUAL(instance_t4.getArchitecture().communicationCosts(),
-                      instance.getArchitecture().communicationCosts());
-    BOOST_CHECK_EQUAL(instance_t4.getArchitecture().synchronisationCosts(),
-                      instance.getArchitecture().synchronisationCosts());
+    BOOST_CHECK_EQUAL(instance_t4.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts());
+    BOOST_CHECK_EQUAL(instance_t4.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts());
 
     BspInstance<computational_dag_edge_idx_vector_impl_def_t> instance_t5;
 
     instance_t5 = std::move(instance_t4);
     BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices());
-    BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_vertex_types(),
-                      instance.getComputationalDag().num_vertex_types());
+    BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types());
     BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges());
-    BOOST_CHECK_EQUAL(instance_t5.getArchitecture().numberOfProcessors(),
-                      instance.getArchitecture().numberOfProcessors());
+    BOOST_CHECK_EQUAL(instance_t5.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors());
     BOOST_CHECK_EQUAL(instance_t5.getArchitecture().getNumberOfProcessorTypes(),
                       instance.getArchitecture().getNumberOfProcessorTypes());
-    BOOST_CHECK_EQUAL(instance_t5.getArchitecture().communicationCosts(),
-                      instance.getArchitecture().communicationCosts());
-    BOOST_CHECK_EQUAL(instance_t5.getArchitecture().synchronisationCosts(),
-                      instance.getArchitecture().synchronisationCosts());
-}
\ No newline at end of file
+    BOOST_CHECK_EQUAL(instance_t5.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts());
+    BOOST_CHECK_EQUAL(instance_t5.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts());
+}
diff --git a/tests/bsp_schedule.cpp b/tests/bsp_schedule.cpp
index 0b587266..60cdf53e 100644
--- a/tests/bsp_schedule.cpp
+++ b/tests/bsp_schedule.cpp
@@ -18,6 +18,8 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE Bsp_Architecture
 #include <boost/test/unit_test.hpp>
+#include <filesystem>
+#include <iostream>
 
 #include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/auxiliary/io/arch_file_reader.hpp"
@@ -29,11 +31,6 @@ limitations under the License.
 #include "osp/bsp/model/BspScheduleRecomp.hpp"
 #include "osp/bsp/model/MaxBspSchedule.hpp"
 #include "osp/bsp/model/MaxBspScheduleCS.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include <filesystem>
-#include <iostream>
-
 #include "osp/bsp/model/cost/BufferedSendingCost.hpp"
 #include "osp/bsp/model/cost/LazyCommunicationCost.hpp"
 #include "osp/bsp/model/cost/TotalCommunicationCost.hpp"
@@ -47,11 +44,12 @@ limitations under the License.
 #include "osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp"
 #include "osp/bsp/scheduler/Serial.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
 
     BspInstance<graph> instance;
@@ -67,16 +65,18 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
         std::cout << cwd << std::endl;
     }
 
-    bool status = file_reader::readGraph(
-        (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag());
+    bool status = file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag());
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54);
     BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertex_types(), 1);
 
-    std::vector<Scheduler<graph> *> schedulers = {new BspLocking<graph>(), new EtfScheduler<graph>(),
-                                                  new GreedyBspScheduler<graph>(), new GreedyChildren<graph>(),
-                                                  new GrowLocalAutoCores<graph>(), new VarianceFillup<graph>()};
+    std::vector<Scheduler<graph> *> schedulers = {new BspLocking<graph>(),
+                                                  new EtfScheduler<graph>(),
+                                                  new GreedyBspScheduler<graph>(),
+                                                  new GreedyChildren<graph>(),
+                                                  new GrowLocalAutoCores<graph>(),
+                                                  new VarianceFillup<graph>()};
 
     std::vector<int> expected_bsp_costs = {92, 108, 100, 108, 102, 110};
     std::vector<double> expected_total_costs = {74, 87, 84.25, 80.25, 91.25, 86.75};
@@ -87,7 +87,6 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
 
     size_t i = 0;
     for (auto &scheduler : schedulers) {
-
         BspSchedule<graph> schedule(instance);
 
         const auto result = scheduler->computeSchedule(schedule);
@@ -125,7 +124,6 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
 }
 
 BOOST_AUTO_TEST_CASE(test_schedule_writer) {
-
     using graph_t1 = computational_dag_edge_idx_vector_impl_def_int_t;
     using graph_t2 = computational_dag_vector_impl_def_int_t;
 
@@ -142,8 +140,7 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) {
         std::cout << cwd << std::endl;
     }
 
-    bool status = file_reader::readGraph(
-        (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag());
+    bool status = file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag());
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54);
@@ -171,12 +168,10 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) {
     BOOST_CHECK(schedule_t2.satisfiesPrecedenceConstraints());
 
     BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices());
-    BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(),
-                      instance.getComputationalDag().num_vertex_types());
+    BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types());
     BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges());
 
     for (const auto &v : instance.getComputationalDag().vertices()) {
-
         BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_work_weight(v),
                           instance.getComputationalDag().vertex_work_weight(v));
         BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_comm_weight(v),
@@ -185,11 +180,9 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) {
         BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_mem_weight(v),
                           instance.getComputationalDag().vertex_mem_weight(v));
 
-        BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_type(v),
-                          instance.getComputationalDag().vertex_type(v));
+        BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_type(v), instance.getComputationalDag().vertex_type(v));
 
-        BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().out_degree(v),
-                          instance.getComputationalDag().out_degree(v));
+        BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().out_degree(v), instance.getComputationalDag().out_degree(v));
 
         BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().in_degree(v), instance.getComputationalDag().in_degree(v));
     }
@@ -216,7 +209,6 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) {
 }
 
 BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
 
     BspInstance<graph> instance;
@@ -232,8 +224,7 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
         std::cout << cwd << std::endl;
     }
 
-    file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(),
-                           instance.getComputationalDag());
+    file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag());
 
     BspSchedule<graph> schedule(instance);
     BspLocking<graph> scheduler;
@@ -252,7 +243,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
     BOOST_CHECK_EQUAL(schedule_t2.numberOfSupersteps(), schedule.numberOfSupersteps());
 
     for (const auto &v : instance.getComputationalDag().vertices()) {
-
         BOOST_CHECK_EQUAL(schedule_t2.assignedSuperstep(v), schedule.assignedSuperstep(v));
         BOOST_CHECK_EQUAL(schedule_t2.assignedProcessor(v), schedule.assignedProcessor(v));
     }
@@ -265,7 +255,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
     BOOST_CHECK_EQUAL(schedule_t3.numberOfSupersteps(), schedule.numberOfSupersteps());
 
     for (const auto &v : instance.getComputationalDag().vertices()) {
-
         BOOST_CHECK_EQUAL(schedule_t3.assignedSuperstep(v), schedule.assignedSuperstep(v));
         BOOST_CHECK_EQUAL(schedule_t3.assignedProcessor(v), schedule.assignedProcessor(v));
     }
@@ -278,7 +267,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
     BOOST_CHECK(schedule_t4.satisfiesPrecedenceConstraints());
     BOOST_CHECK_EQUAL(schedule_t4.numberOfSupersteps(), schedule.numberOfSupersteps());
     for (const auto &v : instance.getComputationalDag().vertices()) {
-
         BOOST_CHECK_EQUAL(schedule_t4.assignedSuperstep(v), schedule.assignedSuperstep(v));
         BOOST_CHECK_EQUAL(schedule_t4.assignedProcessor(v), schedule.assignedProcessor(v));
     }
@@ -290,7 +278,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
     BOOST_CHECK_EQUAL(schedule_t5.numberOfSupersteps(), schedule.numberOfSupersteps());
 
     for (const auto &v : instance.getComputationalDag().vertices()) {
-
         BOOST_CHECK_EQUAL(schedule_t5.assignedSuperstep(v), schedule.assignedSuperstep(v));
         BOOST_CHECK_EQUAL(schedule_t5.assignedProcessor(v), schedule.assignedProcessor(v));
     }
@@ -303,7 +290,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
     BOOST_CHECK_EQUAL(schedule_cs.numberOfSupersteps(), schedule.numberOfSupersteps());
 
     for (const auto &v : instance.getComputationalDag().vertices()) {
-
         BOOST_CHECK_EQUAL(schedule_cs.assignedSuperstep(v), schedule.assignedSuperstep(v));
         BOOST_CHECK_EQUAL(schedule_cs.assignedProcessor(v), schedule.assignedProcessor(v));
     }
@@ -315,7 +301,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
     BOOST_CHECK_EQUAL(schedule_t5.numberOfSupersteps(), schedule.numberOfSupersteps());
 
     for (const auto &v : instance.getComputationalDag().vertices()) {
-
         BOOST_CHECK_EQUAL(schedule_t5.assignedSuperstep(v), schedule.assignedSuperstep(v));
         BOOST_CHECK_EQUAL(schedule_t5.assignedProcessor(v), schedule.assignedProcessor(v));
     }
@@ -328,26 +313,24 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) {
     BOOST_CHECK_EQUAL(schedule_cs_t2.numberOfSupersteps(), schedule.numberOfSupersteps());
 
     for (const auto &v : instance.getComputationalDag().vertices()) {
-
         BOOST_CHECK_EQUAL(schedule_cs_t2.assignedSuperstep(v), schedule.assignedSuperstep(v));
         BOOST_CHECK_EQUAL(schedule_cs_t2.assignedProcessor(v), schedule.assignedProcessor(v));
     }
 }
 
 BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
 
     BspInstance<graph> instance;
     instance.setNumberOfProcessors(2);
-    instance.setCommunicationCosts(10);    // g=10
-    instance.setSynchronisationCosts(100); // l=100 (not used in MaxBspSchedule cost model)
+    instance.setCommunicationCosts(10);       // g=10
+    instance.setSynchronisationCosts(100);    // l=100 (not used in MaxBspSchedule cost model)
 
     auto &dag = instance.getComputationalDag();
-    dag.add_vertex(10, 1, 0); // Node 0
-    dag.add_vertex(5, 2, 0);  // Node 1
-    dag.add_vertex(5, 3, 0);  // Node 2
-    dag.add_vertex(10, 4, 0); // Node 3
+    dag.add_vertex(10, 1, 0);    // Node 0
+    dag.add_vertex(5, 2, 0);     // Node 1
+    dag.add_vertex(5, 3, 0);     // Node 2
+    dag.add_vertex(10, 4, 0);    // Node 3
     dag.add_edge(0, 1);
     dag.add_edge(0, 2);
     dag.add_edge(1, 3);
@@ -361,18 +344,18 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) {
         schedule.setAssignedProcessor(1, 0);
         schedule.setAssignedSuperstep(1, 1);
         schedule.setAssignedProcessor(2, 1);
-        schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2
+        schedule.setAssignedSuperstep(2, 2);    // 0->2 is cross-proc, 2 >= 0+2
         schedule.setAssignedProcessor(3, 0);
-        schedule.setAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2
+        schedule.setAssignedSuperstep(3, 4);    // 2->3 is cross-proc, 4 >= 2+2
         schedule.updateNumberOfSupersteps();
 
         BOOST_CHECK(schedule.satisfiesPrecedenceConstraints());
 
         // Manual cost calculation:
         // Superstep 0: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10.
-        // Superstep 1: work = {5, 0} -> max_work = 5. comm from SS0: 0->2 (P0->P1) needed at SS2, comm sent in SS0. comm=1*10=10. Cost = max(5,l+10) = 110.
-        // Superstep 2: work = {0, 5} -> max_work = 5. comm = 0. Cost = max(5, 0) = 5.
-        // Superstep 3: work = {0, 0} -> max_work = 0. comm from SS2: 2->3 (P1->P0) needed at SS4, comm sent in SS2. comm=3*10=30. Cost = max(0,l+30) = 130.
+        // Superstep 1: work = {5, 0} -> max_work = 5. comm from SS0: 0->2 (P0->P1) needed at SS2, comm sent in SS0. comm=1*10=10.
+        // Cost = max(5,l+10) = 110. Superstep 2: work = {0, 5} -> max_work = 5. comm = 0. Cost = max(5, 0) = 5. Superstep 3: work
+        // = {0, 0} -> max_work = 0. comm from SS2: 2->3 (P1->P0) needed at SS4, comm sent in SS2. comm=3*10=30. Cost = max(0,l+30) = 130.
         // Superstep 4: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10.
         // Total cost = 10 + 110 + 5 + 130 + 10 = 265
         BOOST_CHECK_EQUAL(schedule.computeCosts(), 265);
@@ -384,22 +367,22 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) {
         schedule.setAssignedProcessor(0, 0);
         schedule.setAssignedSuperstep(0, 0);
         schedule.setAssignedProcessor(1, 1);
-        schedule.setAssignedSuperstep(1, 2); // 0->1 is cross-proc, 2 >= 0+2
+        schedule.setAssignedSuperstep(1, 2);    // 0->1 is cross-proc, 2 >= 0+2
         schedule.setAssignedProcessor(2, 1);
-        schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2
+        schedule.setAssignedSuperstep(2, 2);    // 0->2 is cross-proc, 2 >= 0+2
         schedule.setAssignedProcessor(3, 0);
-        schedule.setAssignedSuperstep(3, 4); // 1->3, 2->3 are cross-proc, 4 >= 2+2
+        schedule.setAssignedSuperstep(3, 4);    // 1->3, 2->3 are cross-proc, 4 >= 2+2
         schedule.updateNumberOfSupersteps();
 
         BOOST_CHECK(schedule.satisfiesPrecedenceConstraints());
 
         // Manual cost calculation:
         // Superstep 0: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10.
-        // Superstep 1: work = {0, 0} -> max_work = 0. comm from SS0: 0->1, 0->2 (P0->P1) needed at SS2, comm sent in SS0. comm=1*10=10. Cost = max(0,l+10)=110.
-        // Superstep 2: work = {0, 10} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10.
-        // Superstep 3: work = {0, 0} -> max_work = 0. comm from SS2: 1->3, 2->3 (P1->P0) needed at SS4, comm sent in SS2. comm=(2+3)*10=50. Cost = max(0,l+50)=150.
-        // Superstep 4: work = {10, 0} -> max_work = 10. Cost = max(10, 0) = 10.
-        // Total cost = 10 + 110 + 10 + 150 + 10 = 290
+        // Superstep 1: work = {0, 0} -> max_work = 0. comm from SS0: 0->1, 0->2 (P0->P1) needed at SS2, comm sent in SS0.
+        // comm=1*10=10. Cost = max(0,l+10)=110. Superstep 2: work = {0, 10} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10.
+        // Superstep 3: work = {0, 0} -> max_work = 0. comm from SS2: 1->3, 2->3 (P1->P0) needed at SS4, comm sent in SS2.
+        // comm=(2+3)*10=50. Cost = max(0,l+50)=150. Superstep 4: work = {10, 0} -> max_work = 10. Cost = max(10, 0) = 10. Total
+        // cost = 10 + 110 + 10 + 150 + 10 = 290
         BOOST_CHECK_EQUAL(schedule.computeCosts(), 290);
     }
 
@@ -408,8 +391,8 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) {
         MaxBspSchedule<graph> schedule(instance);
         schedule.setAssignedProcessor(0, 0);
         schedule.setAssignedSuperstep(0, 0);
-        schedule.setAssignedProcessor(1, 1); // 0->1 on different procs
-        schedule.setAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1)
+        schedule.setAssignedProcessor(1, 1);    // 0->1 on different procs
+        schedule.setAssignedSuperstep(1, 1);    // step(0)+2 > step(1) is FALSE (0+2 > 1)
         schedule.updateNumberOfSupersteps();
 
         BOOST_CHECK(!schedule.satisfiesPrecedenceConstraints());
@@ -417,19 +400,18 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) {
 }
 
 BOOST_AUTO_TEST_CASE(test_max_bsp_schedule_cs) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
 
     BspInstance<graph> instance;
     instance.setNumberOfProcessors(2);
-    instance.setCommunicationCosts(10);    // g=10
-    instance.setSynchronisationCosts(100); // l=100
+    instance.setCommunicationCosts(10);       // g=10
+    instance.setSynchronisationCosts(100);    // l=100
 
     auto &dag = instance.getComputationalDag();
-    dag.add_vertex(10, 1, 0); // Node 0
-    dag.add_vertex(5, 2, 0);  // Node 1
-    dag.add_vertex(5, 3, 0);  // Node 2
-    dag.add_vertex(10, 4, 0); // Node 3
+    dag.add_vertex(10, 1, 0);    // Node 0
+    dag.add_vertex(5, 2, 0);     // Node 1
+    dag.add_vertex(5, 3, 0);     // Node 2
+    dag.add_vertex(10, 4, 0);    // Node 3
     dag.add_edge(0, 1);
     dag.add_edge(0, 2);
     dag.add_edge(1, 3);
@@ -443,16 +425,16 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule_cs) {
         schedule.setAssignedProcessor(1, 0);
         schedule.setAssignedSuperstep(1, 1);
         schedule.setAssignedProcessor(2, 1);
-        schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2
+        schedule.setAssignedSuperstep(2, 2);    // 0->2 is cross-proc, 2 >= 0+2
         schedule.setAssignedProcessor(3, 0);
-        schedule.setAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2
+        schedule.setAssignedSuperstep(3, 4);    // 2->3 is cross-proc, 4 >= 2+2
         schedule.updateNumberOfSupersteps();
 
         BOOST_CHECK(schedule.satisfiesPrecedenceConstraints());
 
         // Set communication schedule (eager)
-        schedule.addCommunicationScheduleEntry(0, 0, 1, 0); // 0->2 (P0->P1) sent in SS0
-        schedule.addCommunicationScheduleEntry(2, 1, 0, 2); // 2->3 (P1->P0) sent in SS2
+        schedule.addCommunicationScheduleEntry(0, 0, 1, 0);    // 0->2 (P0->P1) sent in SS0
+        schedule.addCommunicationScheduleEntry(2, 1, 0, 2);    // 2->3 (P1->P0) sent in SS2
 
         BOOST_CHECK(schedule.hasValidCommSchedule());
 
@@ -471,10 +453,10 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule_cs) {
         MaxBspScheduleCS<graph> schedule(instance);
         schedule.setAssignedProcessor(0, 0);
         schedule.setAssignedSuperstep(0, 0);
-        schedule.setAssignedProcessor(1, 1); // 0->1 on different procs
-        schedule.setAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1)
+        schedule.setAssignedProcessor(1, 1);    // 0->1 on different procs
+        schedule.setAssignedSuperstep(1, 1);    // step(0)+2 > step(1) is FALSE (0+2 > 1)
         schedule.updateNumberOfSupersteps();
 
         BOOST_CHECK(!schedule.satisfiesPrecedenceConstraints());
     }
-}
\ No newline at end of file
+}
diff --git a/tests/bsp_schedule_recomp.cpp b/tests/bsp_schedule_recomp.cpp
index 13fa648b..6d1ce9f1 100644
--- a/tests/bsp_schedule_recomp.cpp
+++ b/tests/bsp_schedule_recomp.cpp
@@ -18,20 +18,17 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE BSP_SCHEDULE_RECOMP
 #include <boost/test/unit_test.hpp>
+#include <filesystem>
+#include <iostream>
 
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/model/BspScheduleRecomp.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
-
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include <filesystem>
-#include <iostream>
 
 using namespace osp;
 
-BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test)
-{
-
+BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) {
     using graph = computational_dag_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -48,7 +45,7 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test)
     }
 
     file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(),
-                                                    instance.getComputationalDag());
+                                                      instance.getComputationalDag());
 
     BspSchedule<graph> schedule(instance);
     GreedyBspScheduler<graph> scheduler;
@@ -70,5 +67,4 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test)
     BspScheduleRecomp<graph> schedule_recomp_from_cs(scheduleCS);
     BOOST_CHECK(schedule_recomp_from_cs.satisfiesConstraints());
     BOOST_CHECK_EQUAL(schedule_recomp_from_cs.computeCosts(), scheduleCS.computeCosts());
-
-}
\ No newline at end of file
+}
diff --git a/tests/bsp_schedulers.cpp b/tests/bsp_schedulers.cpp
index d1fdbe4d..d9f16d41 100644
--- a/tests/bsp_schedulers.cpp
+++ b/tests/bsp_schedulers.cpp
@@ -18,12 +18,13 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE BSP_SCHEDULERS
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
 #include <string>
 #include <vector>
 
-#include "osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp"
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp"
@@ -36,22 +37,20 @@ limitations under the License.
 #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp"
 #include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp"
 #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp"
+#include "osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp"
 #include "osp/bsp/scheduler/Serial.hpp"
 #include "osp/coarser/Sarkar/SarkarMul.hpp"
 #include "osp/coarser/SquashA/SquashAMul.hpp"
 #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "osp/auxiliary/io/general_file_reader.hpp"
 #include "test_graphs.hpp"
 
 using namespace osp;
 
 std::vector<std::string> test_architectures() { return {"data/machine_params/p3.arch"}; }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test(Scheduler<Graph_t> *test_scheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
@@ -78,13 +77,11 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
 
             BspInstance<Graph_t> instance;
 
-            bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                                instance.getComputationalDag());
-            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                        instance.getArchitecture());
+            bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag());
+            bool status_architecture
+                = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
             if (!status_graph || !status_architecture) {
-
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -98,7 +95,7 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test_2(Scheduler<Graph_t> *test_scheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
@@ -127,11 +124,9 @@ void run_test_2(Scheduler<Graph_t> *test_scheduler) {
             BspArchitecture<Graph_t> arch;
 
             bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph);
-            bool status_architecture =
-                file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch);
+            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch);
 
             if (!status_graph || !status_architecture) {
-
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -148,75 +143,63 @@ void run_test_2(Scheduler<Graph_t> *test_scheduler) {
 }
 
 BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test) {
-
     GreedyBspScheduler<computational_dag_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test_2) {
-
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(Serial_test) {
-
     Serial<computational_dag_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(cilk_test_1) {
-
     CilkScheduler<computational_dag_vector_impl_def_t> test;
     test.setMode(CILK);
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(cilk_test_2) {
-
     CilkScheduler<computational_dag_vector_impl_def_t> test;
     test.setMode(SJF);
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(etf_test) {
-
     EtfScheduler<computational_dag_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(random_test) {
-
     RandomGreedy<computational_dag_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(children_test) {
-
     GreedyChildren<computational_dag_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(locking_test) {
-
     BspLocking<computational_dag_vector_impl_def_int_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(variancefillup_test) {
-
     VarianceFillup<computational_dag_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(etf_test_edge_desc_impl) {
-
     EtfScheduler<computational_dag_edge_idx_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(grow_local_auto_test_edge_desc_impl) {
-
     GrowLocalAutoCores<computational_dag_edge_idx_vector_impl_def_t> test;
     run_test(&test);
 }
@@ -314,8 +297,7 @@ BOOST_AUTO_TEST_CASE(LightEdgeVariancePartitioner_test) {
     LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, flat_spline_interpolation> test_flat;
     run_test(&test_flat);
 
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, superstep_only_interpolation>
-        test_superstep;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, superstep_only_interpolation> test_superstep;
     run_test(&test_superstep);
 
     LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, global_only_interpolation> test_global;
@@ -326,8 +308,9 @@ BOOST_AUTO_TEST_CASE(SquashAMul_test) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_t> sched;
 
     SquashAMul<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t> ml_coarsen;
-    MultilevelCoarseAndSchedule<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t> coarsen_test(sched, ml_coarsen);
-    
+    MultilevelCoarseAndSchedule<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t>
+        coarsen_test(sched, ml_coarsen);
+
     run_test(&coarsen_test);
 }
 
@@ -336,19 +319,19 @@ BOOST_AUTO_TEST_CASE(SquashAMul_improver_test) {
     HillClimbingScheduler<computational_dag_edge_idx_vector_impl_def_t> improver;
 
     SquashAMul<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t> ml_coarsen;
-    MultilevelCoarseAndSchedule<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t> coarsen_test(sched, improver, ml_coarsen);
-    
-    
+    MultilevelCoarseAndSchedule<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t>
+        coarsen_test(sched, improver, ml_coarsen);
+
     run_test(&coarsen_test);
 }
 
-
 BOOST_AUTO_TEST_CASE(SarkarMul_test) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_t> sched;
 
     SarkarMul<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t> ml_coarsen;
-    MultilevelCoarseAndSchedule<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t> coarsen_test(sched, ml_coarsen);
-    
+    MultilevelCoarseAndSchedule<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t>
+        coarsen_test(sched, ml_coarsen);
+
     run_test(&coarsen_test);
 }
 
@@ -357,7 +340,8 @@ BOOST_AUTO_TEST_CASE(SarkarMul_improver_test) {
     HillClimbingScheduler<computational_dag_edge_idx_vector_impl_def_t> improver;
 
     SarkarMul<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t> ml_coarsen;
-    MultilevelCoarseAndSchedule<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t> coarsen_test(sched, improver, ml_coarsen);
-    
+    MultilevelCoarseAndSchedule<computational_dag_edge_idx_vector_impl_def_t, computational_dag_edge_idx_vector_impl_def_t>
+        coarsen_test(sched, improver, ml_coarsen);
+
     run_test(&coarsen_test);
-}
\ No newline at end of file
+}
diff --git a/tests/bsp_schedulers_mem_const.cpp b/tests/bsp_schedulers_mem_const.cpp
index 7e3dead3..f1f39a34 100644
--- a/tests/bsp_schedulers_mem_const.cpp
+++ b/tests/bsp_schedulers_mem_const.cpp
@@ -18,45 +18,42 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE BSP_SCHEDULERS
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
 #include <string>
 #include <vector>
 
-#include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp"
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp"
-#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp"
 #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp"
+#include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp"
+#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp"
+#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp"
 #include "osp/bsp/scheduler/Serial.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp"
-#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp"
 #include "test_graphs.hpp"
 
 using namespace osp;
 
 std::vector<std::string> test_architectures() { return {"data/machine_params/p3.arch"}; }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void add_mem_weights(Graph_t &dag) {
-
     int mem_weight = 1;
     int comm_weight = 1;
 
     for (const auto &v : dag.vertices()) {
-
         dag.set_vertex_mem_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 3 + 1));
         dag.set_vertex_comm_weight(v, static_cast<v_commw_t<Graph_t>>(comm_weight++ % 3 + 1));
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test_local_memory(Scheduler<Graph_t> *test_scheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = test_graphs();
@@ -84,16 +81,15 @@ void run_test_local_memory(Scheduler<Graph_t> *test_scheduler) {
             BspInstance<Graph_t> instance;
 
             bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                                instance.getComputationalDag());
-            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                        instance.getArchitecture());
+                                                                                  instance.getComputationalDag());
+            bool status_architecture
+                = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
             add_mem_weights(instance.getComputationalDag());
             instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL);
             std::cout << "Memory constraint type: LOCAL" << std::endl;
 
             if (!status_graph || !status_architecture) {
-
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -101,7 +97,6 @@ void run_test_local_memory(Scheduler<Graph_t> *test_scheduler) {
             const std::vector<v_memw_t<Graph_t>> bounds_to_test = {10, 20, 50, 100};
 
             for (const auto &bound : bounds_to_test) {
-
                 instance.getArchitecture().setMemoryBound(bound);
 
                 BspSchedule<Graph_t> schedule(instance);
@@ -115,7 +110,7 @@ void run_test_local_memory(Scheduler<Graph_t> *test_scheduler) {
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test_persistent_transient_memory(Scheduler<Graph_t> *test_scheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = test_graphs();
@@ -143,16 +138,15 @@ void run_test_persistent_transient_memory(Scheduler<Graph_t> *test_scheduler) {
             BspInstance<Graph_t> instance;
 
             bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                                instance.getComputationalDag());
-            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                        instance.getArchitecture());
+                                                                                  instance.getComputationalDag());
+            bool status_architecture
+                = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
             add_mem_weights(instance.getComputationalDag());
             instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT);
             std::cout << "Memory constraint type: PERSISTENT_AND_TRANSIENT" << std::endl;
 
             if (!status_graph || !status_architecture) {
-
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -160,7 +154,6 @@ void run_test_persistent_transient_memory(Scheduler<Graph_t> *test_scheduler) {
             const std::vector<v_memw_t<Graph_t>> bounds_to_test = {50, 100};
 
             for (const auto &bound : bounds_to_test) {
-
                 instance.getArchitecture().setMemoryBound(bound);
 
                 BspSchedule<Graph_t> schedule(instance);
@@ -174,7 +167,7 @@ void run_test_persistent_transient_memory(Scheduler<Graph_t> *test_scheduler) {
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test_local_in_out_memory(Scheduler<Graph_t> *test_scheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = test_graphs();
@@ -202,16 +195,15 @@ void run_test_local_in_out_memory(Scheduler<Graph_t> *test_scheduler) {
             BspInstance<Graph_t> instance;
 
             bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                                instance.getComputationalDag());
-            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                        instance.getArchitecture());
+                                                                                  instance.getComputationalDag());
+            bool status_architecture
+                = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
             add_mem_weights(instance.getComputationalDag());
             instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT);
             std::cout << "Memory constraint type: LOCAL_IN_OUT" << std::endl;
 
             if (!status_graph || !status_architecture) {
-
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -219,7 +211,6 @@ void run_test_local_in_out_memory(Scheduler<Graph_t> *test_scheduler) {
             const std::vector<v_memw_t<Graph_t>> bounds_to_test = {10, 20, 50, 100};
 
             for (const auto &bound : bounds_to_test) {
-
                 instance.getArchitecture().setMemoryBound(bound);
 
                 BspSchedule<Graph_t> schedule(instance);
@@ -233,7 +224,7 @@ void run_test_local_in_out_memory(Scheduler<Graph_t> *test_scheduler) {
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test_local_inc_edges_memory(Scheduler<Graph_t> *test_scheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = test_graphs();
@@ -261,16 +252,15 @@ void run_test_local_inc_edges_memory(Scheduler<Graph_t> *test_scheduler) {
             BspInstance<Graph_t> instance;
 
             bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                                instance.getComputationalDag());
-            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                        instance.getArchitecture());
+                                                                                  instance.getComputationalDag());
+            bool status_architecture
+                = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
             add_mem_weights(instance.getComputationalDag());
             instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES);
             std::cout << "Memory constraint type: LOCAL_INC_EDGES" << std::endl;
 
             if (!status_graph || !status_architecture) {
-
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -278,7 +268,6 @@ void run_test_local_inc_edges_memory(Scheduler<Graph_t> *test_scheduler) {
             const std::vector<v_memw_t<Graph_t>> bounds_to_test = {50, 100};
 
             for (const auto &bound : bounds_to_test) {
-
                 instance.getArchitecture().setMemoryBound(bound);
 
                 BspSchedule<Graph_t> schedule(instance);
@@ -292,7 +281,7 @@ void run_test_local_inc_edges_memory(Scheduler<Graph_t> *test_scheduler) {
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test_local_inc_edges_2_memory(Scheduler<Graph_t> *test_scheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = test_graphs();
@@ -320,16 +309,15 @@ void run_test_local_inc_edges_2_memory(Scheduler<Graph_t> *test_scheduler) {
             BspInstance<Graph_t> instance;
 
             bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                                instance.getComputationalDag());
-            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                        instance.getArchitecture());
+                                                                                  instance.getComputationalDag());
+            bool status_architecture
+                = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
             add_mem_weights(instance.getComputationalDag());
             instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES);
             std::cout << "Memory constraint type: LOCAL_SOURCES_INC_EDGES" << std::endl;
 
             if (!status_graph || !status_architecture) {
-
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -337,7 +325,6 @@ void run_test_local_inc_edges_2_memory(Scheduler<Graph_t> *test_scheduler) {
             const std::vector<v_memw_t<Graph_t>> bounds_to_test = {20, 50, 100};
 
             for (const auto &bound : bounds_to_test) {
-
                 instance.getArchitecture().setMemoryBound(bound);
 
                 BspSchedule<Graph_t> schedule(instance);
@@ -352,7 +339,6 @@ void run_test_local_inc_edges_2_memory(Scheduler<Graph_t> *test_scheduler) {
 }
 
 BOOST_AUTO_TEST_CASE(GreedyBspScheduler_local_test) {
-
     using graph_impl_t = computational_dag_edge_idx_vector_impl_def_int_t;
 
     GreedyBspScheduler<graph_impl_t, local_memory_constraint<graph_impl_t>> test_1;
@@ -369,7 +355,6 @@ BOOST_AUTO_TEST_CASE(GreedyBspScheduler_local_test) {
 }
 
 BOOST_AUTO_TEST_CASE(GrowLocalAutoCores_local_test) {
-
     using graph_impl_t = computational_dag_edge_idx_vector_impl_def_int_t;
 
     GrowLocalAutoCores<graph_impl_t, local_memory_constraint<graph_impl_t>> test_1;
@@ -386,7 +371,6 @@ BOOST_AUTO_TEST_CASE(GrowLocalAutoCores_local_test) {
 }
 
 BOOST_AUTO_TEST_CASE(BspLocking_local_test) {
-
     using graph_impl_t = computational_dag_edge_idx_vector_impl_def_t;
 
     BspLocking<graph_impl_t, local_memory_constraint<graph_impl_t>> test_1;
@@ -403,9 +387,7 @@ BOOST_AUTO_TEST_CASE(BspLocking_local_test) {
 }
 
 BOOST_AUTO_TEST_CASE(variance_local_test) {
-
-    VarianceFillup<computational_dag_edge_idx_vector_impl_def_t,
-                   local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+    VarianceFillup<computational_dag_edge_idx_vector_impl_def_t, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
         test;
     run_test_local_memory(&test);
 }
@@ -415,17 +397,16 @@ BOOST_AUTO_TEST_CASE(variance_local_test) {
 //     VarianceFillup<computational_dag_edge_idx_vector_impl_def_t,
 //                    local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
 //         test;
-    
-//     kl_total_comm<computational_dag_edge_idx_vector_impl_def_t, local_search_local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> kl;
-    
+
+//     kl_total_comm<computational_dag_edge_idx_vector_impl_def_t,
+//     local_search_local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> kl;
+
 //     ComboScheduler<computational_dag_edge_idx_vector_impl_def_t> combo_test(test, kl);
-    
+
 //     run_test_local_memory(&combo_test);
 // };
 
-
 BOOST_AUTO_TEST_CASE(GreedyBspScheduler_persistent_transient_test) {
-
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_t,
                        persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
         test;
@@ -433,65 +414,108 @@ BOOST_AUTO_TEST_CASE(GreedyBspScheduler_persistent_transient_test) {
 }
 
 BOOST_AUTO_TEST_CASE(EtfScheduler_persistent_transient_test) {
-
     EtfScheduler<computational_dag_edge_idx_vector_impl_def_t,
                  persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
         test;
     run_test_persistent_transient_memory(&test);
 }
 
-
 BOOST_AUTO_TEST_CASE(VariancePartitioner_test) {
-    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, linear_interpolation, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_linear;
+    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                        linear_interpolation,
+                        local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_linear;
     run_test_local_memory(&test_linear);
 
-    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, flat_spline_interpolation, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_flat;
+    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                        flat_spline_interpolation,
+                        local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_flat;
     run_test_local_memory(&test_flat);
 
-    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, superstep_only_interpolation, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_superstep;
+    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                        superstep_only_interpolation,
+                        local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_superstep;
     run_test_local_memory(&test_superstep);
 
-    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, global_only_interpolation, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_global;
+    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                        global_only_interpolation,
+                        local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_global;
     run_test_local_memory(&test_global);
 
-    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, linear_interpolation, persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_linear_tp;
+    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                        linear_interpolation,
+                        persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_linear_tp;
     run_test_persistent_transient_memory(&test_linear_tp);
 
-    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, flat_spline_interpolation, persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_flat_tp;
+    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                        flat_spline_interpolation,
+                        persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_flat_tp;
     run_test_persistent_transient_memory(&test_flat_tp);
 
-    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, superstep_only_interpolation, persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_superstep_tp;
+    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                        superstep_only_interpolation,
+                        persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_superstep_tp;
     run_test_persistent_transient_memory(&test_superstep_tp);
 
-    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, global_only_interpolation, persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_global_tp;
+    VariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                        global_only_interpolation,
+                        persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_global_tp;
     run_test_persistent_transient_memory(&test_global_tp);
-
 }
 
-
 BOOST_AUTO_TEST_CASE(LightEdgeVariancePartitioner_test) {
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, linear_interpolation, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_linear;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                                 linear_interpolation,
+                                 local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_linear;
     run_test_local_memory(&test_linear);
 
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, flat_spline_interpolation, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_flat;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                                 flat_spline_interpolation,
+                                 local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_flat;
     run_test_local_memory(&test_flat);
 
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, superstep_only_interpolation, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_superstep;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                                 superstep_only_interpolation,
+                                 local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_superstep;
     run_test_local_memory(&test_superstep);
 
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, global_only_interpolation, local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_global;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                                 global_only_interpolation,
+                                 local_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_global;
     run_test_local_memory(&test_global);
 
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, linear_interpolation, persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_linear_tp;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                                 linear_interpolation,
+                                 persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_linear_tp;
     run_test_persistent_transient_memory(&test_linear_tp);
 
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, flat_spline_interpolation, persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_flat_tp;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                                 flat_spline_interpolation,
+                                 persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_flat_tp;
     run_test_persistent_transient_memory(&test_flat_tp);
 
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, superstep_only_interpolation, persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_superstep_tp;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                                 superstep_only_interpolation,
+                                 persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_superstep_tp;
     run_test_persistent_transient_memory(&test_superstep_tp);
 
-    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t, global_only_interpolation, persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>> test_global_tp;
+    LightEdgeVariancePartitioner<computational_dag_edge_idx_vector_impl_def_t,
+                                 global_only_interpolation,
+                                 persistent_transient_memory_constraint<computational_dag_edge_idx_vector_impl_def_t>>
+        test_global_tp;
     run_test_persistent_transient_memory(&test_global_tp);
-
 }
diff --git a/tests/coarser.cpp b/tests/coarser.cpp
index 9c77703d..d0882512 100644
--- a/tests/coarser.cpp
+++ b/tests/coarser.cpp
@@ -18,7 +18,6 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE COARSER_TEST
 #include <boost/test/unit_test.hpp>
-
 #include <chrono>
 #include <filesystem>
 #include <iostream>
@@ -48,7 +47,6 @@ using namespace osp;
 using VertexType = vertex_idx_t<computational_dag_edge_idx_vector_impl_def_t>;
 
 bool check_vertex_map(std::vector<std::vector<VertexType>> &map, std::size_t size) {
-
     std::unordered_set<VertexType> vertices;
 
     for (auto &v : map) {
@@ -63,17 +61,16 @@ bool check_vertex_map(std::vector<std::vector<VertexType>> &map, std::size_t siz
     return vertices.size() == size;
 }
 
-template<typename ComputationalDag>
-bool check_vertex_map_constraints(std::vector<std::vector<VertexType>> &map, ComputationalDag &dag,
+template <typename ComputationalDag>
+bool check_vertex_map_constraints(std::vector<std::vector<VertexType>> &map,
+                                  ComputationalDag &dag,
                                   v_type_t<ComputationalDag> size_threshold,
                                   v_memw_t<ComputationalDag> memory_threshold,
                                   v_workw_t<ComputationalDag> work_threshold,
                                   v_commw_t<ComputationalDag> communication_threshold) {
-
     std::unordered_set<VertexType> vertices;
 
     for (auto &super_node : map) {
-
         v_memw_t<ComputationalDag> memory = 0;
         v_workw_t<ComputationalDag> work = 0;
         v_commw_t<ComputationalDag> communication = 0;
@@ -87,7 +84,6 @@ bool check_vertex_map_constraints(std::vector<std::vector<VertexType>> &map, Com
         }
 
         for (auto &v : super_node) {
-
             memory += dag.vertex_mem_weight(v);
             work += dag.vertex_work_weight(v);
             communication += dag.vertex_comm_weight(v);
@@ -117,25 +113,21 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) {
     }
 
     for (auto &filename_graph : filenames_graph) {
-
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl
-                  << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl << "Graph: " << name_graph << std::endl;
 
         using graph_t = computational_dag_edge_idx_vector_impl_def_t;
 
         BspInstance<graph_t> instance;
 
-        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                   instance.getComputationalDag());
+        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag());
 
-        bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                    instance.getArchitecture());
+        bool status_architecture
+            = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
         if (!status_graph || !status_architecture) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -190,26 +182,22 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) {
     }
 
     for (auto &filename_graph : filenames_graph) {
-
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl
-                  << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = computational_dag_vector_impl_def_t;
 
         BspInstance<graph_t1> instance;
 
-        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                   instance.getComputationalDag());
+        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag());
 
-        bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                    instance.getArchitecture());
+        bool status_architecture
+            = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
         if (!status_graph || !status_architecture) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -263,25 +251,21 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) {
     }
 
     for (auto &filename_graph : filenames_graph) {
-
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl
-                  << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl << "Graph: " << name_graph << std::endl;
 
         using graph_t = computational_dag_edge_idx_vector_impl_def_t;
 
         BspInstance<graph_t> instance;
 
-        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                   instance.getComputationalDag());
+        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag());
 
-        bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                    instance.getArchitecture());
+        bool status_architecture
+            = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
         if (!status_graph || !status_architecture) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -328,9 +312,8 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) {
     }
 }
 
-template<typename graph_t>
+template <typename graph_t>
 void test_coarser_same_graph(Coarser<graph_t, graph_t> &coarser) {
-
     // BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
@@ -344,23 +327,19 @@ void test_coarser_same_graph(Coarser<graph_t, graph_t> &coarser) {
     }
 
     for (auto &filename_graph : filenames_graph) {
-
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl
-                  << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl << "Graph: " << name_graph << std::endl;
 
         BspInstance<graph_t> instance;
 
-        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                   instance.getComputationalDag());
+        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag());
 
-        bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                    instance.getArchitecture());
+        bool status_architecture
+            = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
         if (!status_graph || !status_architecture) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -372,7 +351,8 @@ void test_coarser_same_graph(Coarser<graph_t, graph_t> &coarser) {
 
         GreedyBspScheduler<graph_t> scheduler;
 
-        bool coarse_success = coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map);
+        bool coarse_success
+            = coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map);
         BOOST_CHECK(coarse_success);
 
         vertex_map = coarser_util::invert_vertex_contraction_map<graph_t, graph_t>(reverse_vertex_map);
@@ -401,7 +381,6 @@ void test_coarser_same_graph(Coarser<graph_t, graph_t> &coarser) {
 }
 
 BOOST_AUTO_TEST_CASE(coarser_funndel_bfs_test) {
-
     using graph_t = computational_dag_edge_idx_vector_impl_def_t;
     FunnelBfs<graph_t, graph_t> coarser;
 
@@ -409,7 +388,9 @@ BOOST_AUTO_TEST_CASE(coarser_funndel_bfs_test) {
 
     FunnelBfs<graph_t, graph_t>::FunnelBfs_parameters params{std::numeric_limits<v_workw_t<graph_t>>::max(),
                                                              std::numeric_limits<v_memw_t<graph_t>>::max(),
-                                                             std::numeric_limits<unsigned>::max(), false, true};
+                                                             std::numeric_limits<unsigned>::max(),
+                                                             false,
+                                                             true};
 
     FunnelBfs<graph_t, graph_t> coarser_params(params);
 
@@ -422,7 +403,6 @@ BOOST_AUTO_TEST_CASE(coarser_funndel_bfs_test) {
 }
 
 BOOST_AUTO_TEST_CASE(coarser_top_sort_test) {
-
     using graph_t = computational_dag_edge_idx_vector_impl_def_t;
     top_order_coarser<graph_t, graph_t, GetTopOrder> coarser;
 
@@ -475,26 +455,22 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
     }
 
     for (auto &filename_graph : filenames_graph) {
-
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl
-                  << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = CSG;
 
         BspInstance<graph_t1> instance;
 
-        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                   instance.getComputationalDag());
+        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag());
 
-        bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                    instance.getArchitecture());
+        bool status_architecture
+            = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
         if (!status_graph || !status_architecture) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -552,26 +528,22 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
     }
 
     for (auto &filename_graph : filenames_graph) {
-
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl
-                  << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = CSGE;
 
         BspInstance<graph_t1> instance;
 
-        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                   instance.getComputationalDag());
+        bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag());
 
-        bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                    instance.getArchitecture());
+        bool status_architecture
+            = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
         if (!status_graph || !status_architecture) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -704,4 +676,4 @@ BOOST_AUTO_TEST_CASE(SquashAML_test) {
     SquashAMul<graph_t, graph_t> coarser;
 
     test_coarser_same_graph<graph_t>(coarser);
-}
\ No newline at end of file
+}
diff --git a/tests/coarser_util.cpp b/tests/coarser_util.cpp
index 47ac1738..61b418d7 100644
--- a/tests/coarser_util.cpp
+++ b/tests/coarser_util.cpp
@@ -17,11 +17,11 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE COARSER_UTIL_TEST
-#include <boost/test/unit_test.hpp>
+#include "osp/coarser/coarser_util.hpp"
 
+#include <boost/test/unit_test.hpp>
 #include <set>
 
-#include "osp/coarser/coarser_util.hpp"
 #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 
 using namespace osp;
@@ -53,10 +53,16 @@ BOOST_AUTO_TEST_CASE(ExpansionMapValidity) {
     const std::vector<std::vector<vertex_idx_t<GraphType>>> expansionmap2 = {{0}, {2}, {3}};
     BOOST_CHECK(not check_valid_expansion_map<GraphType>(expansionmap2));
 
-    const std::vector<std::vector<vertex_idx_t<GraphType>>> expansionmap3 = {{0, 3}};
+    const std::vector<std::vector<vertex_idx_t<GraphType>>> expansionmap3 = {
+        {0, 3}
+    };
     BOOST_CHECK(not check_valid_expansion_map<GraphType>(expansionmap3));
 
-    const std::vector<std::vector<vertex_idx_t<GraphType>>> expansionmap4 = {{0, 3}, {2, 1, 4}, {5}};
+    const std::vector<std::vector<vertex_idx_t<GraphType>>> expansionmap4 = {
+        {0, 3},
+        {2, 1, 4},
+        {5}
+    };
     BOOST_CHECK(check_valid_expansion_map<GraphType>(expansionmap4));
 
     const std::vector<std::vector<vertex_idx_t<GraphType>>> expansionmap5 = {{0}, {}, {2}, {3}, {1}};
@@ -64,7 +70,10 @@ BOOST_AUTO_TEST_CASE(ExpansionMapValidity) {
 }
 
 BOOST_AUTO_TEST_CASE(ContractionMapCoarsening) {
-    std::set<std::pair<vertex_idx_t<GraphType>, vertex_idx_t<GraphType>>> edges({{0, 1}, {1, 2}});
+    std::set<std::pair<vertex_idx_t<GraphType>, vertex_idx_t<GraphType>>> edges({
+        {0, 1},
+        {1, 2}
+    });
     GraphType graph(6, edges);
 
     GraphType coarseGraph1;
@@ -91,4 +100,4 @@ BOOST_AUTO_TEST_CASE(ContractionMapCoarsening) {
     for (const auto &vert : coarseGraph1.parents(1)) {
         BOOST_CHECK_EQUAL(vert, 0);
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compact_sparse_graph.cpp b/tests/compact_sparse_graph.cpp
index f567827d..ba191d70 100644
--- a/tests/compact_sparse_graph.cpp
+++ b/tests/compact_sparse_graph.cpp
@@ -17,9 +17,10 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE Sparse_Compact_Graph
+#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
+
 #include <boost/test/unit_test.hpp>
 
-#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 
 using namespace osp;
@@ -62,7 +63,15 @@ BOOST_AUTO_TEST_CASE(No_Edges_Graph_reorder) {
 }
 
 BOOST_AUTO_TEST_CASE(LineGraph_keep_order) {
-    const std::set<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}});
+    const std::set<std::pair<std::size_t, std::size_t>> edges({
+        {0, 1},
+        {1, 2},
+        {2, 3},
+        {3, 4},
+        {4, 5},
+        {5, 6},
+        {6, 7}
+    });
 
     Compact_Sparse_Graph<true> graph(8, edges);
 
@@ -141,9 +150,16 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) {
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(LineGraph_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0, 1},
+        {1, 2},
+        {2, 3},
+        {3, 4},
+        {4, 5},
+        {5, 6},
+        {6, 7}
+    });
 
     Compact_Sparse_Graph<false> graph(8, edges);
 
@@ -230,9 +246,20 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) {
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(Graph1_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     Compact_Sparse_Graph<true> graph(11, edges);
 
@@ -276,9 +303,7 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) {
 
         cntr = 0;
         for (const auto &e : osp::out_edges(vert, graph)) {
-
             BOOST_CHECK_EQUAL(target(e, graph), out_edges[vert][cntr++]);
-
         }
     }
 
@@ -312,15 +337,12 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) {
 
         cntr = 0;
         for (const auto &e : osp::in_edges(vert, graph)) {
-
             BOOST_CHECK_EQUAL(source(e, graph), in_edges[vert][cntr++]);
-
         }
     }
-    
+
     unsigned count = 0;
-    for (const auto & e: osp::edges(graph)) {
-        
+    for (const auto &e : osp::edges(graph)) {
         std::cout << e.source << " -> " << e.target << std::endl;
         count++;
     }
@@ -337,7 +359,19 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) {
 }
 
 BOOST_AUTO_TEST_CASE(Graph1_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     Compact_Sparse_Graph<false> graph(11, edges);
 
@@ -371,9 +405,9 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
     });
 
     for (const auto &vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_chld = 0;
         std::size_t cntr = 0;
         for (const auto &chld : graph.children(vert)) {
@@ -381,7 +415,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
                 BOOST_CHECK_LE(previous_chld, chld);
             }
 
-            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld])
+                        != out_edges[ori_vert].cend());
 
             previous_chld = chld;
             ++cntr;
@@ -394,7 +429,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
             }
 
             --cntr;
-            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it]) != out_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it])
+                        != out_edges[ori_vert].cend());
 
             previous_chld = *it;
         }
@@ -415,9 +451,9 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
     });
 
     for (const auto &vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_par = 0;
         std::size_t cntr = 0;
         for (const auto &par : graph.parents(vert)) {
@@ -425,7 +461,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
                 BOOST_CHECK_LE(previous_par, par);
             }
 
-            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par])
+                        != in_edges[ori_vert].cend());
 
             previous_par = par;
             ++cntr;
@@ -438,12 +475,13 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
             }
 
             --cntr;
-            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it]) != in_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it])
+                        != in_edges[ori_vert].cend());
 
             previous_par = *it;
         }
     }
-    
+
     for (const auto &vert : graph.vertices()) {
         BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size());
     }
@@ -454,7 +492,6 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
 }
 
 BOOST_AUTO_TEST_CASE(Graph_edge_contruction) {
-
     computational_dag_edge_idx_vector_impl_def_t graph;
 
     using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx;
@@ -498,12 +535,10 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) {
     BOOST_CHECK_EQUAL(graph.num_edges(), 9);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 8);
 
-
     Compact_Sparse_Graph<true, false, false, false, false, vertex_idx> copy_graph(graph.num_vertices(), edge_view(graph));
     BOOST_CHECK_EQUAL(copy_graph.num_vertices(), 8);
     BOOST_CHECK_EQUAL(copy_graph.num_edges(), 9);
 
-
     std::vector<std::vector<std::size_t>> out_edges({
         {1, 2, 3},
         {4, 6},
@@ -554,9 +589,9 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) {
     BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend()));
 
     for (const auto &vert : reorder_graph.vertices()) {
-        BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_chld = 0;
         std::size_t cntr = 0;
         for (const auto &chld : reorder_graph.children(vert)) {
@@ -564,7 +599,8 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) {
                 BOOST_CHECK_LE(previous_chld, chld);
             }
 
-            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld])
+                        != out_edges[ori_vert].cend());
 
             previous_chld = chld;
             ++cntr;
@@ -572,9 +608,9 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) {
     }
 
     for (const auto &vert : reorder_graph.vertices()) {
-        BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_par = 0;
         std::size_t cntr = 0;
         for (const auto &par : reorder_graph.parents(vert)) {
@@ -582,7 +618,8 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) {
                 BOOST_CHECK_LE(previous_par, par);
             }
 
-            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par])
+                        != in_edges[ori_vert].cend());
 
             previous_par = par;
             ++cntr;
@@ -591,7 +628,19 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) {
 }
 
 BOOST_AUTO_TEST_CASE(Graph_work_weights_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     std::vector<unsigned> ww(11);
     std::iota(ww.begin(), ww.end(), 0);
@@ -599,16 +648,28 @@ BOOST_AUTO_TEST_CASE(Graph_work_weights_keep_order) {
     Compact_Sparse_Graph<true, true> graph(11, edges, ww);
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[vert]);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]);
 
         const unsigned wt = static_cast<unsigned>(rand());
         graph.set_vertex_work_weight(vert, wt);
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), wt);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), wt);
     }
 }
 
 BOOST_AUTO_TEST_CASE(Graph_work_weights_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     std::vector<unsigned> ww(11);
     std::iota(ww.begin(), ww.end(), 0);
@@ -618,16 +679,28 @@ BOOST_AUTO_TEST_CASE(Graph_work_weights_reorder) {
     const std::vector<std::size_t> &graph_perm = graph.get_pullback_permutation();
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]);
 
         const unsigned wt = static_cast<unsigned>(rand());
         graph.set_vertex_work_weight(graph_perm[vert], wt);
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), wt);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), wt);
     }
 }
 
 BOOST_AUTO_TEST_CASE(Graph_comm_weights_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     std::vector<unsigned> ww(11);
     std::iota(ww.begin(), ww.end(), 0);
@@ -638,20 +711,32 @@ BOOST_AUTO_TEST_CASE(Graph_comm_weights_keep_order) {
     Compact_Sparse_Graph<true, true, true> graph(11, edges, ww, cw);
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[vert]);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]);
     }
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[vert]);
-        
+        BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]);
+
         const unsigned wt = static_cast<unsigned>(rand());
         graph.set_vertex_comm_weight(vert, wt);
-        BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), wt);
+        BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), wt);
     }
 }
 
 BOOST_AUTO_TEST_CASE(Graph_comm_weights_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     std::vector<unsigned> ww(11);
     std::iota(ww.begin(), ww.end(), 0);
@@ -664,20 +749,32 @@ BOOST_AUTO_TEST_CASE(Graph_comm_weights_reorder) {
     const std::vector<std::size_t> &graph_perm = graph.get_pullback_permutation();
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]);
     }
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]);
 
         const unsigned wt = static_cast<unsigned>(rand());
         graph.set_vertex_comm_weight(graph_perm[vert], wt);
-        BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), wt);
+        BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), wt);
     }
 }
 
 BOOST_AUTO_TEST_CASE(Graph_mem_weights_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     std::vector<unsigned> ww(11);
     std::iota(ww.begin(), ww.end(), 0);
@@ -691,21 +788,33 @@ BOOST_AUTO_TEST_CASE(Graph_mem_weights_keep_order) {
     Compact_Sparse_Graph<true, true, true, true> graph(11, edges, ww, cw, mw);
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[vert]);
-        BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[vert]);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]);
+        BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]);
     }
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), mw[vert]);
-        
+        BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[vert]);
+
         const unsigned wt = static_cast<unsigned>(rand());
         graph.set_vertex_mem_weight(vert, wt);
-        BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), wt);
+        BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), wt);
     }
 }
 
 BOOST_AUTO_TEST_CASE(Graph_mem_weights_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     std::vector<unsigned> ww(11);
     std::iota(ww.begin(), ww.end(), 0);
@@ -721,21 +830,33 @@ BOOST_AUTO_TEST_CASE(Graph_mem_weights_reorder) {
     const std::vector<std::size_t> &graph_perm = graph.get_pullback_permutation();
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[graph_perm[vert]]);
-        BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]);
     }
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), mw[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[graph_perm[vert]]);
 
         const unsigned wt = static_cast<unsigned>(rand());
         graph.set_vertex_mem_weight(graph_perm[vert], wt);
-        BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), wt);
+        BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), wt);
     }
 }
 
 BOOST_AUTO_TEST_CASE(Graph_vtype_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     std::vector<unsigned> ww(11);
     std::iota(ww.begin(), ww.end(), 0);
@@ -752,22 +873,34 @@ BOOST_AUTO_TEST_CASE(Graph_vtype_keep_order) {
     Compact_Sparse_Graph<true, true, true, true, true> graph(11, edges, ww, cw, mw, vt);
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[vert]);
-        BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[vert]);
-        BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), mw[vert]);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]);
+        BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]);
+        BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[vert]);
     }
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_type(vert), vt[vert]);
-        
+        BOOST_CHECK_EQUAL(graph.vertex_type(vert), vt[vert]);
+
         const unsigned wt = static_cast<unsigned>(rand());
         graph.set_vertex_type(vert, wt);
-        BOOST_CHECK_EQUAL( graph.vertex_type(vert), wt);
+        BOOST_CHECK_EQUAL(graph.vertex_type(vert), wt);
     }
 }
 
 BOOST_AUTO_TEST_CASE(Graph_vtype_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     std::vector<unsigned> ww(11);
     std::iota(ww.begin(), ww.end(), 0);
@@ -786,26 +919,21 @@ BOOST_AUTO_TEST_CASE(Graph_vtype_reorder) {
     const std::vector<std::size_t> &graph_perm = graph.get_pullback_permutation();
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[graph_perm[vert]]);
-        BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[graph_perm[vert]]);
-        BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), mw[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[graph_perm[vert]]);
     }
 
     for (auto vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL( graph.vertex_type(vert), vt[graph_perm[vert]]);
+        BOOST_CHECK_EQUAL(graph.vertex_type(vert), vt[graph_perm[vert]]);
 
         const unsigned wt = static_cast<unsigned>(rand());
         graph.set_vertex_type(graph_perm[vert], wt);
-        BOOST_CHECK_EQUAL( graph.vertex_type(vert), wt);
+        BOOST_CHECK_EQUAL(graph.vertex_type(vert), wt);
     }
 }
 
-
-
-
-
 BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) {
-
     computational_dag_edge_idx_vector_impl_def_t graph;
 
     using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx;
@@ -849,12 +977,21 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) {
     BOOST_CHECK_EQUAL(graph.num_edges(), 9);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 8);
 
-
-    Compact_Sparse_Graph<true, true, true, true, true, vertex_idx, std::size_t, computational_dag_edge_idx_vector_impl_def_t::vertex_work_weight_type, computational_dag_edge_idx_vector_impl_def_t::vertex_comm_weight_type, computational_dag_edge_idx_vector_impl_def_t::vertex_mem_weight_type, computational_dag_edge_idx_vector_impl_def_t::vertex_type_type > copy_graph(graph);
+    Compact_Sparse_Graph<true,
+                         true,
+                         true,
+                         true,
+                         true,
+                         vertex_idx,
+                         std::size_t,
+                         computational_dag_edge_idx_vector_impl_def_t::vertex_work_weight_type,
+                         computational_dag_edge_idx_vector_impl_def_t::vertex_comm_weight_type,
+                         computational_dag_edge_idx_vector_impl_def_t::vertex_mem_weight_type,
+                         computational_dag_edge_idx_vector_impl_def_t::vertex_type_type>
+        copy_graph(graph);
     BOOST_CHECK_EQUAL(copy_graph.num_vertices(), 8);
     BOOST_CHECK_EQUAL(copy_graph.num_edges(), 9);
 
-
     std::vector<std::vector<std::size_t>> out_edges({
         {1, 2, 3},
         {4, 6},
@@ -902,7 +1039,18 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) {
         }
     }
 
-    Compact_Sparse_Graph<false, true, true, true, true, vertex_idx, std::size_t, computational_dag_edge_idx_vector_impl_def_t::vertex_work_weight_type, computational_dag_edge_idx_vector_impl_def_t::vertex_comm_weight_type, computational_dag_edge_idx_vector_impl_def_t::vertex_mem_weight_type, computational_dag_edge_idx_vector_impl_def_t::vertex_type_type> reorder_graph(graph);
+    Compact_Sparse_Graph<false,
+                         true,
+                         true,
+                         true,
+                         true,
+                         vertex_idx,
+                         std::size_t,
+                         computational_dag_edge_idx_vector_impl_def_t::vertex_work_weight_type,
+                         computational_dag_edge_idx_vector_impl_def_t::vertex_comm_weight_type,
+                         computational_dag_edge_idx_vector_impl_def_t::vertex_mem_weight_type,
+                         computational_dag_edge_idx_vector_impl_def_t::vertex_type_type>
+        reorder_graph(graph);
     BOOST_CHECK_EQUAL(reorder_graph.num_vertices(), 8);
     BOOST_CHECK_EQUAL(reorder_graph.num_edges(), 9);
 
@@ -919,9 +1067,9 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) {
     }
 
     for (const auto &vert : reorder_graph.vertices()) {
-        BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_chld = 0;
         std::size_t cntr = 0;
         for (const auto &chld : reorder_graph.children(vert)) {
@@ -929,7 +1077,8 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) {
                 BOOST_CHECK_LE(previous_chld, chld);
             }
 
-            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld])
+                        != out_edges[ori_vert].cend());
 
             previous_chld = chld;
             ++cntr;
@@ -937,9 +1086,9 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) {
     }
 
     for (const auto &vert : reorder_graph.vertices()) {
-        BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_par = 0;
         std::size_t cntr = 0;
         for (const auto &par : reorder_graph.parents(vert)) {
@@ -947,7 +1096,8 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) {
                 BOOST_CHECK_LE(previous_par, par);
             }
 
-            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par])
+                        != in_edges[ori_vert].cend());
 
             previous_par = par;
             ++cntr;
@@ -955,10 +1105,20 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) {
     }
 }
 
-
-
 BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     Compact_Sparse_Graph<true> graph(11, edges);
     Compact_Sparse_Graph<true> copy_graph(graph);
@@ -1018,7 +1178,7 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) {
             ++cntr;
         }
     }
-    
+
     for (const auto &vert : copy_graph.vertices()) {
         BOOST_CHECK_EQUAL(copy_graph.vertex_work_weight(vert), 1 + in_edges[vert].size());
     }
@@ -1029,7 +1189,19 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) {
 }
 
 BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     Compact_Sparse_Graph<true> graph(11, edges);
     Compact_Sparse_Graph<true> copy_graph(std::move(graph));
@@ -1089,7 +1261,7 @@ BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) {
             ++cntr;
         }
     }
-    
+
     for (const auto &vert : copy_graph.vertices()) {
         BOOST_CHECK_EQUAL(copy_graph.vertex_work_weight(vert), 1 + in_edges[vert].size());
     }
@@ -1099,9 +1271,20 @@ BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) {
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     Compact_Sparse_Graph<false> ori_graph(11, edges);
     Compact_Sparse_Graph<false> graph(ori_graph);
@@ -1136,9 +1319,9 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) {
     });
 
     for (const auto &vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_chld = 0;
         std::size_t cntr = 0;
         for (const auto &chld : graph.children(vert)) {
@@ -1146,7 +1329,8 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) {
                 BOOST_CHECK_LE(previous_chld, chld);
             }
 
-            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld])
+                        != out_edges[ori_vert].cend());
 
             previous_chld = chld;
             ++cntr;
@@ -1168,9 +1352,9 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) {
     });
 
     for (const auto &vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_par = 0;
         std::size_t cntr = 0;
         for (const auto &par : graph.parents(vert)) {
@@ -1178,13 +1362,14 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) {
                 BOOST_CHECK_LE(previous_par, par);
             }
 
-            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par])
+                        != in_edges[ori_vert].cend());
 
             previous_par = par;
             ++cntr;
         }
     }
-    
+
     for (const auto &vert : graph.vertices()) {
         BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size());
     }
@@ -1194,9 +1379,20 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) {
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(Graph1_move_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     Compact_Sparse_Graph<false> ori_graph(11, edges);
     Compact_Sparse_Graph<false> graph(std::move(ori_graph));
@@ -1231,9 +1427,9 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) {
     });
 
     for (const auto &vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_chld = 0;
         std::size_t cntr = 0;
         for (const auto &chld : graph.children(vert)) {
@@ -1241,7 +1437,8 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) {
                 BOOST_CHECK_LE(previous_chld, chld);
             }
 
-            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld])
+                        != out_edges[ori_vert].cend());
 
             previous_chld = chld;
             ++cntr;
@@ -1263,9 +1460,9 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) {
     });
 
     for (const auto &vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_par = 0;
         std::size_t cntr = 0;
         for (const auto &par : graph.parents(vert)) {
@@ -1273,13 +1470,14 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) {
                 BOOST_CHECK_LE(previous_par, par);
             }
 
-            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par])
+                        != in_edges[ori_vert].cend());
 
             previous_par = par;
             ++cntr;
         }
     }
-    
+
     for (const auto &vert : graph.vertices()) {
         BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size());
     }
@@ -1287,4 +1485,4 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) {
     for (const auto &vert : graph.vertices()) {
         BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0);
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compact_sparse_graph_edge_desc.cpp b/tests/compact_sparse_graph_edge_desc.cpp
index 1bf8d9bb..a8a4957b 100644
--- a/tests/compact_sparse_graph_edge_desc.cpp
+++ b/tests/compact_sparse_graph_edge_desc.cpp
@@ -17,10 +17,10 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE Sparse_Compact_Graph_Edge_Desc
-#include <boost/test/unit_test.hpp>
-
 #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp"
 
+#include <boost/test/unit_test.hpp>
+
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(Empty_Graph_keep_order) {
@@ -135,7 +135,6 @@ BOOST_AUTO_TEST_CASE(No_Edges_Graph_reorder) {
 
     BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices());
 
-
     std::vector<std::size_t> perm(10, 0);
     std::iota(perm.begin(), perm.end(), 0);
     const std::vector<std::size_t> &graph_perm = graph.get_pullback_permutation();
@@ -143,7 +142,15 @@ BOOST_AUTO_TEST_CASE(No_Edges_Graph_reorder) {
 }
 
 BOOST_AUTO_TEST_CASE(LineGraph_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0, 1},
+        {1, 2},
+        {2, 3},
+        {3, 4},
+        {4, 5},
+        {5, 6},
+        {6, 7}
+    });
 
     Compact_Sparse_Graph_EdgeDesc<true> graph(8, edges);
 
@@ -221,11 +228,10 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) {
         BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0);
     }
 
-
     std::size_t edge_counter = 0;
-    for (const auto& edge : graph.edges()) {
+    for (const auto &edge : graph.edges()) {
         BOOST_CHECK_EQUAL(graph.source(edge), edge_counter);
-        BOOST_CHECK_EQUAL(graph.target(edge), edge_counter+1);
+        BOOST_CHECK_EQUAL(graph.target(edge), edge_counter + 1);
 
         BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge)));
 
@@ -234,9 +240,9 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) {
     BOOST_CHECK_EQUAL(edge_counter, graph.num_edges());
 
     edge_counter = 0;
-    for (const auto& edge : osp::edges(graph)) {
+    for (const auto &edge : osp::edges(graph)) {
         BOOST_CHECK_EQUAL(source(edge, graph), edge_counter);
-        BOOST_CHECK_EQUAL(target(edge, graph), edge_counter+1);
+        BOOST_CHECK_EQUAL(target(edge, graph), edge_counter + 1);
 
         BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge)));
 
@@ -245,7 +251,7 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) {
     BOOST_CHECK_EQUAL(edge_counter, graph.num_edges());
 
     std::size_t vert_counter = 0;
-    for (const auto& vert : graph.vertices()) {
+    for (const auto &vert : graph.vertices()) {
         for (const auto &edge : graph.in_edges(vert)) {
             BOOST_CHECK_EQUAL(graph.source(edge), vert - 1);
             BOOST_CHECK_EQUAL(graph.target(edge), vert);
@@ -271,9 +277,16 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) {
     BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices());
 }
 
-
 BOOST_AUTO_TEST_CASE(LineGraph_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0, 1},
+        {1, 2},
+        {2, 3},
+        {3, 4},
+        {4, 5},
+        {5, 6},
+        {6, 7}
+    });
 
     Compact_Sparse_Graph_EdgeDesc<false> graph(8, edges);
 
@@ -359,11 +372,10 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) {
         BOOST_CHECK_EQUAL(perm[vert], graph_perm[vert]);
     }
 
-
     std::size_t edge_counter = 0;
-    for (const auto& edge : graph.edges()) {
+    for (const auto &edge : graph.edges()) {
         BOOST_CHECK_EQUAL(graph.source(edge), edge_counter);
-        BOOST_CHECK_EQUAL(graph.target(edge), edge_counter+1);
+        BOOST_CHECK_EQUAL(graph.target(edge), edge_counter + 1);
 
         BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge)));
 
@@ -372,7 +384,7 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) {
     BOOST_CHECK_EQUAL(edge_counter, graph.num_edges());
 
     std::size_t vert_counter = 0;
-    for (const auto& vert : graph.vertices()) {
+    for (const auto &vert : graph.vertices()) {
         for (const auto &edge : graph.in_edges(vert)) {
             BOOST_CHECK_EQUAL(graph.source(edge), vert - 1);
             BOOST_CHECK_EQUAL(graph.target(edge), vert);
@@ -387,9 +399,20 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) {
     BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices());
 }
 
-
 BOOST_AUTO_TEST_CASE(Graph1_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     Compact_Sparse_Graph_EdgeDesc<true> graph(11, edges);
 
@@ -490,7 +513,7 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) {
         ++edge_cntr;
     }
     BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges());
-    
+
     edge_cntr = 0;
     for (const auto &vert : graph.vertices()) {
         for (const auto &edge : graph.out_edges(vert)) {
@@ -500,7 +523,6 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) {
     }
     BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges());
 
-    
     for (const auto &vert : graph.vertices()) {
         BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[vert].size());
     }
@@ -511,7 +533,19 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) {
 }
 
 BOOST_AUTO_TEST_CASE(Graph1_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
 
     Compact_Sparse_Graph_EdgeDesc<false> graph(11, edges);
 
@@ -545,9 +579,9 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
     });
 
     for (const auto &vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_chld = 0;
         std::size_t cntr = 0;
         for (const auto &chld : graph.children(vert)) {
@@ -555,7 +589,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
                 BOOST_CHECK_LE(previous_chld, chld);
             }
 
-            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld])
+                        != out_edges[ori_vert].cend());
 
             previous_chld = chld;
             ++cntr;
@@ -568,7 +603,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
             }
 
             --cntr;
-            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it]) != out_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it])
+                        != out_edges[ori_vert].cend());
 
             previous_chld = *it;
         }
@@ -598,9 +634,9 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
     });
 
     for (const auto &vert : graph.vertices()) {
-        BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[ graph_perm[vert] ].size());
+        BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size());
         std::size_t ori_vert = graph_perm[vert];
-        
+
         std::size_t previous_par = 0;
         std::size_t cntr = 0;
         for (const auto &par : graph.parents(vert)) {
@@ -608,7 +644,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
                 BOOST_CHECK_LE(previous_par, par);
             }
 
-            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par])
+                        != in_edges[ori_vert].cend());
 
             previous_par = par;
             ++cntr;
@@ -621,7 +658,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
             }
 
             --cntr;
-            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it]) != in_edges[ori_vert].cend());
+            BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it])
+                        != in_edges[ori_vert].cend());
 
             previous_par = *it;
         }
@@ -635,7 +673,7 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
         }
         BOOST_CHECK_EQUAL(cntr, graph.in_degree(vert));
     }
-    
+
     for (const auto &vert : graph.vertices()) {
         BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size());
     }
@@ -653,7 +691,7 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
         ++edge_cntr;
     }
     BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges());
-    
+
     edge_cntr = 0;
     for (const auto &vert : graph.vertices()) {
         for (const auto &edge : graph.out_edges(vert)) {
@@ -665,7 +703,19 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) {
 }
 
 BOOST_AUTO_TEST_CASE(Graph1_e_comm_keep_order) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
     const std::vector<unsigned> edge_weights({3, 6, 12, 874, 134, 67, 234, 980, 123, 152, 34});
 
     Compact_Sparse_Graph_EdgeDesc<true, true, true, true, true, true> graph(11, edges);
@@ -674,7 +724,7 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_keep_order) {
     BOOST_CHECK_EQUAL(graph.num_edges(), 11);
 
     for (std::size_t i = 0; i < edges.size(); ++i) {
-        const auto& [src, tgt] = edges[i];
+        const auto &[src, tgt] = edges[i];
         graph.set_edge_comm_weight(src, tgt, edge_weights[i]);
     }
 
@@ -684,15 +734,26 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_keep_order) {
 
         auto it = std::find(edges.cbegin(), edges.cend(), std::make_pair(src, tgt));
         BOOST_CHECK(it != edges.cend());
-        
+
         auto ind = std::distance(edges.cbegin(), it);
         BOOST_CHECK_EQUAL(edge_weights[static_cast<std::size_t>(ind)], graph.edge_comm_weight(edge));
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(Graph1_e_comm_reorder) {
-    const std::vector<std::pair<std::size_t, std::size_t>> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}});
+    const std::vector<std::pair<std::size_t, std::size_t>> edges({
+        {0,  1},
+        {2,  3},
+        {6, 10},
+        {7,  9},
+        {0,  2},
+        {4,  6},
+        {1,  6},
+        {6,  7},
+        {5,  6},
+        {3,  7},
+        {1,  2}
+    });
     const std::vector<unsigned> edge_weights({3, 6, 12, 874, 134, 67, 234, 980, 123, 152, 34});
 
     Compact_Sparse_Graph_EdgeDesc<false, true, true, true, true, true> graph(11, edges);
@@ -706,7 +767,7 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_reorder) {
     BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend()));
 
     for (std::size_t i = 0; i < edges.size(); ++i) {
-        const auto& [src, tgt] = edges[i];
+        const auto &[src, tgt] = edges[i];
         graph.set_edge_comm_weight(src, tgt, edge_weights[i]);
     }
 
@@ -716,8 +777,8 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_reorder) {
 
         auto it = std::find(edges.cbegin(), edges.cend(), std::make_pair(src, tgt));
         BOOST_CHECK(it != edges.cend());
-        
+
         auto ind = std::distance(edges.cbegin(), it);
         BOOST_CHECK_EQUAL(edge_weights[static_cast<std::size_t>(ind)], graph.edge_comm_weight(edge));
     }
-}
\ No newline at end of file
+}
diff --git a/tests/connected_components_part.cpp b/tests/connected_components_part.cpp
index 22360d2f..57031311 100644
--- a/tests/connected_components_part.cpp
+++ b/tests/connected_components_part.cpp
@@ -22,13 +22,12 @@ limitations under the License.
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/dag_divider/ConnectedComponentDivider.hpp"
 #include "osp/dag_divider/ConnectedComponentScheduler.hpp"
-#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(ConnectedComponentPart_test) {
-
     BspInstance<computational_dag_vector_impl_def_int_t> instance;
     computational_dag_vector_impl_def_int_t &dag = instance.getComputationalDag();
     using VertexType = vertex_idx_t<computational_dag_vector_impl_def_int_t>;
diff --git a/tests/cost_evaluation.cpp b/tests/cost_evaluation.cpp
index 27f7660c..9375f4c8 100644
--- a/tests/cost_evaluation.cpp
+++ b/tests/cost_evaluation.cpp
@@ -30,7 +30,6 @@ limitations under the License.
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(test_cost_models_simple_dag) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
 
     BspInstance<graph> instance;
diff --git a/tests/cuthill_mckee.cpp b/tests/cuthill_mckee.cpp
index c6b2f019..89cf42f0 100644
--- a/tests/cuthill_mckee.cpp
+++ b/tests/cuthill_mckee.cpp
@@ -17,13 +17,14 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE cuthill_mckee
+#include "osp/graph_algorithms/cuthill_mckee.hpp"
+
 #include <boost/test/unit_test.hpp>
 #include <filesystem>
 
-#include "osp/graph_algorithms/cuthill_mckee.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "test_graphs.hpp"
 
 using namespace osp;
@@ -32,7 +33,6 @@ using ComputationalDag = boost_graph_int_t;
 using VertexType = vertex_idx_t<ComputationalDag>;
 
 BOOST_AUTO_TEST_CASE(cuthill_mckee_1) {
-
     ComputationalDag dag;
 
     dag.add_vertex(2, 9);
@@ -56,32 +56,32 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_1) {
 
     std::vector<VertexType> cm_wavefront = cuthill_mckee_wavefront(dag);
     std::vector<unsigned> expected_cm_wavefront = {0, 3, 1, 2, 6, 4, 5, 7};
-    BOOST_CHECK_EQUAL_COLLECTIONS(cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(),
-                                  expected_cm_wavefront.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(), expected_cm_wavefront.end());
 
     cm_wavefront = cuthill_mckee_wavefront(dag, true);
     expected_cm_wavefront = {0, 2, 3, 1, 5, 6, 4, 7};
 
-    BOOST_CHECK_EQUAL_COLLECTIONS(cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(),
-                                  expected_cm_wavefront.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(), expected_cm_wavefront.end());
 
     std::vector<VertexType> cm_undirected;
     std::vector<unsigned> expected_cm_undirected;
 
     cm_undirected = cuthill_mckee_undirected(dag, true);
     expected_cm_undirected = {7, 3, 4, 0, 1, 2, 6, 5};
-    BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(),
-                                  expected_cm_undirected.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end());
 
     cm_undirected = cuthill_mckee_undirected(dag, false);
     expected_cm_undirected = {0, 3, 1, 2, 7, 6, 4, 5};
-    BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(),
-                                  expected_cm_undirected.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end());
 
     cm_undirected = cuthill_mckee_undirected(dag, true, true);
     expected_cm_undirected = {3, 4, 5, 1, 2, 7, 6, 0};
-    BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(),
-                                  expected_cm_undirected.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end());
 
     std::vector<VertexType> top_sort;
     for (const auto &vertex : priority_vec_top_sort_view(dag, cm_undirected)) {
@@ -93,21 +93,21 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_1) {
 
     cm_undirected = cuthill_mckee_undirected(dag, false, true);
     expected_cm_undirected = {0, 2, 3, 1, 6, 7, 5, 4};
-    BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(),
-                                  expected_cm_undirected.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end());
 
     dag.add_edge(8, 9);
     dag.add_edge(9, 10);
 
     cm_undirected = cuthill_mckee_undirected(dag, true);
     expected_cm_undirected = {7, 3, 4, 0, 1, 2, 6, 5, 10, 9, 8};
-    BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(),
-                                  expected_cm_undirected.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end());
 
     cm_undirected = cuthill_mckee_undirected(dag, false);
     expected_cm_undirected = {0, 3, 1, 2, 7, 6, 4, 5, 8, 9, 10};
-    BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(),
-                                  expected_cm_undirected.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end());
 }
 
 bool is_permutation(const std::vector<VertexType> &vec) {
@@ -128,7 +128,6 @@ bool is_top_sort(const std::vector<VertexType> &vec, const ComputationalDag &dag
     }
 
     for (const auto &vertex : dag.vertices()) {
-
         for (const auto &child : dag.children(vertex)) {
             if (position[vertex] > position[child]) {
                 return false;
@@ -140,7 +139,6 @@ bool is_top_sort(const std::vector<VertexType> &vec, const ComputationalDag &dag
 }
 
 BOOST_AUTO_TEST_CASE(cuthill_mckee_2) {
-
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
 
     // Getting root git directory
@@ -152,12 +150,10 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_2) {
     }
 
     for (auto &filename_graph : filenames_graph) {
-
         ComputationalDag graph;
         auto status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), graph);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         } else {
@@ -182,4 +178,4 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_2) {
         BOOST_CHECK(is_permutation(top_sort));
         BOOST_CHECK(is_top_sort(top_sort, graph));
     }
-}
\ No newline at end of file
+}
diff --git a/tests/debug_merkle_divider.cpp b/tests/debug_merkle_divider.cpp
index 5763d840..a9a7ed1e 100644
--- a/tests/debug_merkle_divider.cpp
+++ b/tests/debug_merkle_divider.cpp
@@ -16,6 +16,8 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
+#include <iostream>
+
 #include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/auxiliary/io/dot_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp"
@@ -27,11 +29,10 @@ limitations under the License.
 #include "osp/coarser/coarser_util.hpp"
 #include "osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include <iostream>
 
 using namespace osp;
 
-template<typename GraphT>
+template <typename GraphT>
 void check_partition_type_homogeneity(const GraphT &dag, const std::vector<vertex_idx_t<GraphT>> &partition) {
     // Group partitions by their ID
     std::map<vertex_idx_t<GraphT>, std::vector<vertex_idx_t<GraphT>>> partitions;
@@ -41,8 +42,9 @@ void check_partition_type_homogeneity(const GraphT &dag, const std::vector<verte
 
     // For each partition, check that all vertices have the same type
     for (const auto &[part_id, vertices] : partitions) {
-        if (vertices.empty())
+        if (vertices.empty()) {
             continue;
+        }
         const auto first_node_type = dag.vertex_type(vertices[0]);
         for (const auto &vertex : vertices) {
             if (dag.vertex_type(vertex) != first_node_type) {
@@ -73,7 +75,8 @@ int main(int argc, char *argv[]) {
     std::cout << "Graph loaded successfully. " << instance.numberOfVertices() << " vertices." << std::endl;
 
     for (auto v : instance.getComputationalDag().vertices()) {
-        instance.getComputationalDag().set_vertex_comm_weight(v, static_cast<v_commw_t<graph_t2>>(instance.getComputationalDag().vertex_comm_weight(v) * 0.01));
+        instance.getComputationalDag().set_vertex_comm_weight(
+            v, static_cast<v_commw_t<graph_t2>>(instance.getComputationalDag().vertex_comm_weight(v) * 0.01));
     }
 
     // Set up architecture
@@ -105,7 +108,7 @@ int main(int argc, char *argv[]) {
     iso_scheduler.setCriticalPathThreshold(500);
     iso_scheduler.setOrbitLockRatio(0.5);
     iso_scheduler.setAllowTrimmedScheduler(false);
-    iso_scheduler.set_plot_dot_graphs(true); // Enable plotting for debug
+    iso_scheduler.set_plot_dot_graphs(true);    // Enable plotting for debug
 
     std::cout << "Starting partition computation..." << std::endl;
 
@@ -120,7 +123,8 @@ int main(int argc, char *argv[]) {
     std::cout << "Partition is " << (acyc ? "acyclic." : "not acyclic.");
 
     std::cout << "Partition computation finished." << std::endl;
-    std::cout << "Generated " << std::set<vertex_idx_t<graph_t>>(partition.begin(), partition.end()).size() << " partitions." << std::endl;
+    std::cout << "Generated " << std::set<vertex_idx_t<graph_t>>(partition.begin(), partition.end()).size() << " partitions."
+              << std::endl;
 
     return 0;
 }
diff --git a/tests/directed_graph_algorithms.cpp b/tests/directed_graph_algorithms.cpp
index 0b246503..da141811 100644
--- a/tests/directed_graph_algorithms.cpp
+++ b/tests/directed_graph_algorithms.cpp
@@ -22,6 +22,7 @@ limitations under the License.
 #include <iostream>
 #include <vector>
 
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/graph_algorithms/computational_dag_util.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp"
@@ -31,15 +32,12 @@ limitations under the License.
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "test_utils.hpp"
 #include "test_graphs.hpp"
+#include "test_utils.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) {
-
-
     using graph_t = boost_graph_int_t;
 
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
@@ -50,9 +48,7 @@ BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) {
     for (auto &filename_graph : filenames_graph) {
         graph_t graph;
 
-
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((project_root / filename_graph).string(),
-                                                                                graph);
+        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((project_root / filename_graph).string(), graph);
 
         BOOST_CHECK(status_graph);
 
@@ -63,16 +59,14 @@ BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) {
         std::cout << "\n" << filename_graph << std::endl;
 
         std::cout << "Time for long_edges_in_triangles: "
-                  << std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count() << "ms"
-                  << std::endl;
+                  << std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count() << "ms" << std::endl;
 
         start_time = std::chrono::high_resolution_clock::now();
         auto deleted_edges_parallel = long_edges_in_triangles_parallel(graph);
         finish_time = std::chrono::high_resolution_clock::now();
 
         std::cout << "Time for long_edges_in_triangles_parallel: "
-                  << std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count() << "ms"
-                  << std::endl;
+                  << std::chrono::duration_cast<std::chrono::milliseconds>(finish_time - start_time).count() << "ms" << std::endl;
 
         BOOST_CHECK_EQUAL(deleted_edges.size(), deleted_edges_parallel.size());
 
@@ -84,4 +78,4 @@ BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) {
             BOOST_CHECK(deleted_edges.find(edge) != deleted_edges.cend());
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/directed_graph_top_sort.cpp b/tests/directed_graph_top_sort.cpp
index 0925d1ae..3b2703f9 100644
--- a/tests/directed_graph_top_sort.cpp
+++ b/tests/directed_graph_top_sort.cpp
@@ -18,6 +18,8 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE ApproxEdgeReduction
 
+#include "osp/graph_algorithms/directed_graph_top_sort.hpp"
+
 #include <boost/test/unit_test.hpp>
 #include <iostream>
 #include <vector>
@@ -25,7 +27,6 @@ limitations under the License.
 #include "osp/graph_algorithms/computational_dag_util.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 #include "osp/graph_algorithms/directed_graph_path_util.hpp"
-#include "osp/graph_algorithms/directed_graph_top_sort.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
@@ -34,7 +35,6 @@ limitations under the License.
 using namespace osp;
 
 computational_dag_vector_impl_def_t constr_graph_1() {
-
     computational_dag_vector_impl_def_t graph;
 
     using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx;
@@ -63,21 +63,26 @@ computational_dag_vector_impl_def_t constr_graph_1() {
 }
 
 BOOST_AUTO_TEST_CASE(test_util_1) {
-
     const computational_dag_vector_impl_def_t graph = constr_graph_1();
 
     // using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx;
 }
 
 BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
-
     using VertexType = vertex_idx_t<boost_graph_int_t>;
 
-    const std::vector<std::vector<VertexType>> out(
-
-        {{7}, {}, {0}, {2}, {}, {2, 0}, {1, 2, 0}, {}, {4}, {6, 1, 5}}
-
-    );
+    const std::vector<std::vector<VertexType>> out({
+        {7},
+        {},
+        {0},
+        {2},
+        {},
+        {2, 0},
+        {1, 2, 0},
+        {},
+        {4},
+        {6, 1, 5}
+    });
     const std::vector<int> workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1});
     const std::vector<int> commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1});
 
@@ -205,7 +210,6 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
     std::vector<VertexType> loc_view_top_sort;
 
     for (const auto &v : locality_top_sort_view(graph)) {
-
         loc_view_top_sort.push_back(v);
     }
 
@@ -270,14 +274,20 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
 }
 
 BOOST_AUTO_TEST_CASE(top_sort_template_overload_csr) {
-
     using VertexType = vertex_idx_t<boost_graph_int_t>;
 
-    const std::vector<std::vector<VertexType>> out(
-
-        {{7}, {}, {0}, {2}, {}, {2, 0}, {1, 2, 0}, {}, {4}, {6, 1, 5}}
-
-    );
+    const std::vector<std::vector<VertexType>> out({
+        {7},
+        {},
+        {0},
+        {2},
+        {},
+        {2, 0},
+        {1, 2, 0},
+        {},
+        {4},
+        {6, 1, 5}
+    });
     const std::vector<int> workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1});
     const std::vector<int> commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1});
 
@@ -299,4 +309,4 @@ BOOST_AUTO_TEST_CASE(top_sort_template_overload_csr) {
         BOOST_CHECK_EQUAL(expected_top_order[idx], v);
         ++idx;
     }
-}
\ No newline at end of file
+}
diff --git a/tests/directed_graph_util.cpp b/tests/directed_graph_util.cpp
index 492f61e8..fe2c53bc 100644
--- a/tests/directed_graph_util.cpp
+++ b/tests/directed_graph_util.cpp
@@ -18,6 +18,8 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE ApproxEdgeReduction
 
+#include "osp/graph_algorithms/directed_graph_util.hpp"
+
 #include <boost/test/unit_test.hpp>
 #include <iostream>
 #include <vector>
@@ -27,14 +29,12 @@ limitations under the License.
 #include "osp/graph_algorithms/directed_graph_edge_view.hpp"
 #include "osp/graph_algorithms/directed_graph_path_util.hpp"
 #include "osp/graph_algorithms/directed_graph_top_sort.hpp"
-#include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 
 using namespace osp;
 
 computational_dag_vector_impl_def_t constr_graph_1() {
-
     computational_dag_vector_impl_def_t graph;
 
     using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx;
@@ -63,7 +63,6 @@ computational_dag_vector_impl_def_t constr_graph_1() {
 }
 
 BOOST_AUTO_TEST_CASE(test_empty_graph) {
-
     computational_dag_vector_impl_def_t graph;
 
     using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx;
@@ -82,7 +81,6 @@ BOOST_AUTO_TEST_CASE(test_empty_graph) {
 }
 
 BOOST_AUTO_TEST_CASE(test_util_1) {
-
     computational_dag_vector_impl_def_t graph = constr_graph_1();
 
     using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx;
@@ -403,7 +401,6 @@ BOOST_AUTO_TEST_CASE(test_util_1) {
 
     size_t i = 0;
     for (const auto &e : edge_view(graph)) {
-
         BOOST_CHECK_EQUAL(e.source, edge_source[i]);
         BOOST_CHECK_EQUAL(e.target, edge_target[i]);
 
@@ -423,14 +420,20 @@ BOOST_AUTO_TEST_CASE(test_util_1) {
 }
 
 BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
-
     using VertexType = vertex_idx_t<boost_graph_int_t>;
 
-    const std::vector<std::vector<VertexType>> out(
-
-        {{7}, {}, {0}, {2}, {}, {2, 0}, {1, 2, 0}, {}, {4}, {6, 1, 5}}
-
-    );
+    const std::vector<std::vector<VertexType>> out({
+        {7},
+        {},
+        {0},
+        {2},
+        {},
+        {2, 0},
+        {1, 2, 0},
+        {},
+        {4},
+        {6, 1, 5}
+    });
     const std::vector<int> workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1});
     const std::vector<int> commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1});
 
@@ -480,16 +483,15 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
     for (const auto &vertex : graph.vertices()) {
         num_edges += graph.out_degree(vertex);
         for (const auto &parent : graph.parents(vertex)) {
-            BOOST_CHECK(std::any_of(graph.children(parent).cbegin(), graph.children(parent).cend(),
-                                    [vertex](VertexType k) { return k == vertex; }));
+            BOOST_CHECK(std::any_of(
+                graph.children(parent).cbegin(), graph.children(parent).cend(), [vertex](VertexType k) { return k == vertex; }));
         }
     }
 
     for (const auto &vertex : graph.vertices()) {
         for (const auto &child : graph.children(vertex)) {
-
-            BOOST_CHECK(std::any_of(graph.parents(child).cbegin(), graph.parents(child).cend(),
-                                    [vertex](VertexType k) { return k == vertex; }));
+            BOOST_CHECK(std::any_of(
+                graph.parents(child).cbegin(), graph.parents(child).cend(), [vertex](VertexType k) { return k == vertex; }));
         }
     }
 
@@ -563,8 +565,8 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
         bool_c[i] = true;
     }
 
-    BOOST_CHECK(GetFilteredTopOrder(bool_a, graph) == std::vector<VertexType>({0, 8}) ||
-                GetFilteredTopOrder(bool_a, graph) == std::vector<VertexType>({8, 0}));
+    BOOST_CHECK(GetFilteredTopOrder(bool_a, graph) == std::vector<VertexType>({0, 8})
+                || GetFilteredTopOrder(bool_a, graph) == std::vector<VertexType>({8, 0}));
     BOOST_CHECK(GetFilteredTopOrder(bool_b, graph)[3] == 2);
     BOOST_CHECK(GetFilteredTopOrder(bool_c, graph) == std::vector<VertexType>({9, 6, 1}));
 
@@ -626,7 +628,6 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
     for (unsigned loops = 0; loops < 10; loops++) {
         for (unsigned noise = 0; noise < 6; noise++) {
             for (auto &pois_para : poisson_params) {
-
                 std::vector<int> poset_int_map = get_strict_poset_integer_map(noise, pois_para, graph);
 
                 for (const auto &vertex : graph.vertices()) {
@@ -642,7 +643,13 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
 
     auto wavefronts = compute_wavefronts(graph);
 
-    std::vector<std::vector<VertexType>> expected_wavefronts = {{3, 8, 9}, {4, 6, 5}, {1, 2}, {0}, {7}};
+    std::vector<std::vector<VertexType>> expected_wavefronts = {
+        {3, 8, 9},
+        {4, 6, 5},
+        {1, 2},
+        {0},
+        {7}
+    };
 
     size_t size = 0;
     size_t counter = 0;
@@ -650,8 +657,8 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) {
         size += wavefront.size();
         BOOST_CHECK(!wavefront.empty());
 
-        BOOST_CHECK_EQUAL_COLLECTIONS(wavefront.begin(), wavefront.end(), expected_wavefronts[counter].begin(),
-                                      expected_wavefronts[counter].end());
+        BOOST_CHECK_EQUAL_COLLECTIONS(
+            wavefront.begin(), wavefront.end(), expected_wavefronts[counter].begin(), expected_wavefronts[counter].end());
 
         counter++;
     }
@@ -707,4 +714,4 @@ BOOST_AUTO_TEST_CASE(test_edge_view_indexed_access) {
     // Check out of bounds
     auto oob_it = decltype(all_edges)::iterator(graph.num_edges() + 5, graph);
     BOOST_CHECK(oob_it == all_edges.end());
-}
\ No newline at end of file
+}
diff --git a/tests/divisors.cpp b/tests/divisors.cpp
index 74eb43d7..f9f7956c 100644
--- a/tests/divisors.cpp
+++ b/tests/divisors.cpp
@@ -17,10 +17,10 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE Divisor
-#include <boost/test/unit_test.hpp>
-
 #include "osp/auxiliary/math/divisors.hpp"
 
+#include <boost/test/unit_test.hpp>
+
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(IntegerSqrt) {
@@ -45,7 +45,7 @@ BOOST_AUTO_TEST_CASE(Divisors) {
             BOOST_CHECK_EQUAL(num % div, 0U);
         }
         std::cout << "\n";
-        
+
         auto it = divs.begin();
         for (std::size_t i = 1U; i <= num; ++i) {
             if (num % i == 0) {
diff --git a/tests/eft_subgraph_scheduler.cpp b/tests/eft_subgraph_scheduler.cpp
index e8dec670..3869b8ec 100644
--- a/tests/eft_subgraph_scheduler.cpp
+++ b/tests/eft_subgraph_scheduler.cpp
@@ -19,24 +19,23 @@ limitations under the License.
 #define BOOST_TEST_MODULE EftSubgraphScheduler
 #include <boost/test/unit_test.hpp>
 
+#include "osp/bsp/model/BspInstance.hpp"
 #include "osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/bsp/model/BspInstance.hpp"
 
 using namespace osp;
 
-BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain)
-{
+BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain) {
     using graph_t = computational_dag_vector_impl_def_t;
 
     // 1. Setup Instance
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
+    auto &dag = instance.getComputationalDag();
 
     // Create a simple coarse-grained DAG: 0 -> 1 -> 2
-    dag.add_vertex(100, 1, 0); // node 0
-    dag.add_vertex(200, 1, 0); // node 1
-    dag.add_vertex(300, 1, 0); // node 2
+    dag.add_vertex(100, 1, 0);    // node 0
+    dag.add_vertex(200, 1, 0);    // node 1
+    dag.add_vertex(300, 1, 0);    // node 2
     dag.add_edge(0, 1);
     dag.add_edge(1, 2);
 
@@ -48,7 +47,7 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain)
     std::vector<unsigned> multiplicities = {1, 2, 1};
     std::vector<unsigned> max_procs = {100, 100, 100};
     std::vector<std::vector<v_workw_t<graph_t>>> required_proc_types(3);
-    
+
     // Node 0: work 100, mult 1. Needs type 0.
     required_proc_types[0] = {100, 0};
     // Node 1: work 200, mult 2. Needs type 0 and 1.
@@ -75,19 +74,18 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain)
     BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[2][1], 2);
 }
 
-BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin)
-{
+BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin) {
     using graph_t = computational_dag_vector_impl_def_t;
 
     // 1. Setup Instance
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
+    auto &dag = instance.getComputationalDag();
 
     // Create a fork-join DAG: 0 -> {1,2} -> 3
-    dag.add_vertex(100, 1, 0); // node 0
-    dag.add_vertex(200, 1, 0); // node 1
-    dag.add_vertex(300, 1, 0); // node 2
-    dag.add_vertex(100, 1, 0); // node 3
+    dag.add_vertex(100, 1, 0);    // node 0
+    dag.add_vertex(200, 1, 0);    // node 1
+    dag.add_vertex(300, 1, 0);    // node 2
+    dag.add_vertex(100, 1, 0);    // node 3
     dag.add_edge(0, 1);
     dag.add_edge(0, 2);
     dag.add_edge(1, 3);
@@ -101,7 +99,7 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin)
     std::vector<unsigned> multiplicities = {1, 2, 1, 4};
     std::vector<unsigned> max_procs = {100, 100, 100, 100};
     std::vector<std::vector<v_workw_t<graph_t>>> required_proc_types(4);
-    
+
     // All nodes need type 0
     required_proc_types[0] = {100};
     required_proc_types[1] = {200};
@@ -139,16 +137,15 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin)
     BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[3][0], 1);
 }
 
-BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_Deadlock)
-{
+BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_Deadlock) {
     using graph_t = computational_dag_vector_impl_def_t;
 
     // 1. Setup Instance
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
+    auto &dag = instance.getComputationalDag();
 
     // Create a single-node DAG
-    dag.add_vertex(100, 1, 0); // node 0
+    dag.add_vertex(100, 1, 0);    // node 0
 
     // Setup Architecture: 1 processor of type 0
     instance.getArchitecture().setProcessorsWithTypes({0});
@@ -171,20 +168,19 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_Deadlock)
     BOOST_CHECK_LT(schedule.makespan, 0.0);
 }
 
-BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG)
-{
+BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG) {
     using graph_t = computational_dag_vector_impl_def_t;
 
     // 1. Setup Instance
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
-
-    dag.add_vertex(50, 1, 0);  // 0
-    dag.add_vertex(100, 1, 0); // 1
-    dag.add_vertex(150, 1, 0); // 2
-    dag.add_vertex(80, 1, 0);  // 3
-    dag.add_vertex(120, 1, 0); // 4
-    dag.add_vertex(60, 1, 0);  // 5
+    auto &dag = instance.getComputationalDag();
+
+    dag.add_vertex(50, 1, 0);     // 0
+    dag.add_vertex(100, 1, 0);    // 1
+    dag.add_vertex(150, 1, 0);    // 2
+    dag.add_vertex(80, 1, 0);     // 3
+    dag.add_vertex(120, 1, 0);    // 4
+    dag.add_vertex(60, 1, 0);     // 5
     dag.add_edge(0, 1);
     dag.add_edge(0, 2);
     dag.add_edge(1, 3);
@@ -199,14 +195,14 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG)
 
     // 2. Setup Scheduler Inputs
     std::vector<unsigned> multiplicities = {1, 2, 1, 4, 2, 1};
-    std::vector<unsigned> max_procs =      {100, 100, 100, 100, 100, 100};
+    std::vector<unsigned> max_procs = {100, 100, 100, 100, 100, 100};
     std::vector<std::vector<v_workw_t<graph_t>>> required_proc_types(6);
-    required_proc_types[0] = {50, 0};   // Job 0: needs T0
-    required_proc_types[1] = {100, 0};  // Job 1: needs T0
-    required_proc_types[2] = {0, 150};  // Job 2: needs T1
-    required_proc_types[3] = {40, 40};  // Job 3: needs T0 & T1
-    required_proc_types[4] = {0, 120};  // Job 4: needs T1
-    required_proc_types[5] = {60, 0};   // Job 5: needs T0
+    required_proc_types[0] = {50, 0};     // Job 0: needs T0
+    required_proc_types[1] = {100, 0};    // Job 1: needs T0
+    required_proc_types[2] = {0, 150};    // Job 2: needs T1
+    required_proc_types[3] = {40, 40};    // Job 3: needs T0 & T1
+    required_proc_types[4] = {0, 120};    // Job 4: needs T1
+    required_proc_types[5] = {60, 0};     // Job 5: needs T0
 
     // 3. Run Scheduler
     EftSubgraphScheduler<graph_t> scheduler;
@@ -225,20 +221,19 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG)
     BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[5][0], 4);
 }
 
-BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ResourceContention)
-{
+BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ResourceContention) {
     using graph_t = computational_dag_vector_impl_def_t;
 
     // 1. Setup Instance
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
+    auto &dag = instance.getComputationalDag();
 
     // Create a fork-join DAG: 0 -> {1,2,3} -> 4
-    dag.add_vertex(10, 1, 0);  // 0
-    dag.add_vertex(100, 1, 0); // 1 (high rank)
-    dag.add_vertex(50, 1, 0);  // 2 (mid rank)
-    dag.add_vertex(20, 1, 0);  // 3 (low rank)
-    dag.add_vertex(10, 1, 0);  // 4
+    dag.add_vertex(10, 1, 0);     // 0
+    dag.add_vertex(100, 1, 0);    // 1 (high rank)
+    dag.add_vertex(50, 1, 0);     // 2 (mid rank)
+    dag.add_vertex(20, 1, 0);     // 3 (low rank)
+    dag.add_vertex(10, 1, 0);     // 4
     dag.add_edge(0, 1);
     dag.add_edge(0, 2);
     dag.add_edge(0, 3);
@@ -286,18 +281,17 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ResourceContention)
     BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[4][0], 4);
 }
 
-BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ProportionalAllocation)
-{
+BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ProportionalAllocation) {
     using graph_t = computational_dag_vector_impl_def_t;
 
     // 1. Setup Instance
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
+    auto &dag = instance.getComputationalDag();
 
     // Create a fork DAG: 0 -> {1,2}
-    dag.add_vertex(10, 1, 0);  // 0
-    dag.add_vertex(300, 1, 0); // 1 (high rank)
-    dag.add_vertex(100, 1, 0); // 2 (low rank)
+    dag.add_vertex(10, 1, 0);     // 0
+    dag.add_vertex(300, 1, 0);    // 1 (high rank)
+    dag.add_vertex(100, 1, 0);    // 2 (low rank)
     dag.add_edge(0, 1);
     dag.add_edge(0, 2);
 
@@ -330,7 +324,7 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ProportionalAllocation)
     //        Job 1 finishes at 1 + 300/7 = 1 + 42.857... = 43.857...
     //        Job 2 finishes at 1 + 100/3 = 1 + 33.333... = 34.333...
     //        Makespan is 43.857...
-    BOOST_CHECK_CLOSE(schedule.makespan, 1.0 + 300.0/7.0, 1e-9);
+    BOOST_CHECK_CLOSE(schedule.makespan, 1.0 + 300.0 / 7.0, 1e-9);
 
     BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type.size(), 3);
     // Job 0: 10 workers
diff --git a/tests/filereader.cpp b/tests/filereader.cpp
index 6e64fd63..e95ad03c 100644
--- a/tests/filereader.cpp
+++ b/tests/filereader.cpp
@@ -18,22 +18,20 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE File_Reader
 #include <boost/test/unit_test.hpp>
+#include <filesystem>
+#include <iostream>
 
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 #include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/auxiliary/io/dot_graph_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/auxiliary/io/mtx_graph_file_reader.hpp"
-#include <filesystem>
-#include <iostream>
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 
 using namespace osp;
 
-
 BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) {
-
     // Getting root git directory
     std::filesystem::path cwd = std::filesystem::current_path();
     std::cout << cwd << std::endl;
@@ -44,23 +42,23 @@ BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) {
 
     computational_dag_vector_impl_def_t graph;
 
-    bool status =
-        file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph);
+    bool status
+        = file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph);
 
     std::cout << "STATUS:" << status << std::endl;
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 8);
-    BOOST_CHECK_EQUAL(graph.num_edges(),  19);
+    BOOST_CHECK_EQUAL(graph.num_edges(), 19);
 
     // ---- Node 0
-    std::vector<int> p0{ };
+    std::vector<int> p0{};
     std::vector<int> c0{4, 6, 3, 5, 2};
 
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(0).begin(), graph.parents(0).end(), p0.begin(), p0.end());
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(0).begin(), graph.children(0).end(), c0.begin(), c0.end());
 
     // ---- Node 1
-    std::vector<int> p1{ };
+    std::vector<int> p1{};
     std::vector<int> c1{3, 5, 2, 6};
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(1).begin(), graph.parents(1).end(), p1.begin(), p1.end());
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(1).begin(), graph.children(1).end(), c1.begin(), c1.end());
@@ -85,7 +83,7 @@ BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) {
 
     // ---- Node 5
     std::vector<int> p5{0, 1, 2, 3, 4};
-    std::vector<int> c5{ };
+    std::vector<int> c5{};
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(5).begin(), graph.parents(5).end(), p5.begin(), p5.end());
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(5).begin(), graph.children(5).end(), c5.begin(), c5.end());
 
@@ -97,15 +95,12 @@ BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) {
 
     // ---- Node 7
     std::vector<int> p7{3, 4, 6};
-    std::vector<int> c7{ };
+    std::vector<int> c7{};
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(7).begin(), graph.parents(7).end(), p7.begin(), p7.end());
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(7).begin(), graph.children(7).end(), c7.begin(), c7.end());
-
 }
 
-
 BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) {
-
     // Getting root git directory
     std::filesystem::path cwd = std::filesystem::current_path();
     std::cout << cwd << std::endl;
@@ -116,23 +111,23 @@ BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) {
 
     boost_graph_int_t graph;
 
-    bool status =
-        file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph);
+    bool status
+        = file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph);
 
     std::cout << "STATUS:" << status << std::endl;
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 8);
-    BOOST_CHECK_EQUAL(graph.num_edges(),  19);
+    BOOST_CHECK_EQUAL(graph.num_edges(), 19);
 
     // ---- Node 0
-    std::vector<int> p0{ };
+    std::vector<int> p0{};
     std::vector<int> c0{4, 6, 3, 5, 2};
 
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(0).begin(), graph.parents(0).end(), p0.begin(), p0.end());
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(0).begin(), graph.children(0).end(), c0.begin(), c0.end());
 
     // ---- Node 1
-    std::vector<int> p1{ };
+    std::vector<int> p1{};
     std::vector<int> c1{3, 5, 2, 6};
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(1).begin(), graph.parents(1).end(), p1.begin(), p1.end());
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(1).begin(), graph.children(1).end(), c1.begin(), c1.end());
@@ -157,7 +152,7 @@ BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) {
 
     // ---- Node 5
     std::vector<int> p5{0, 1, 2, 3, 4};
-    std::vector<int> c5{ };
+    std::vector<int> c5{};
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(5).begin(), graph.parents(5).end(), p5.begin(), p5.end());
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(5).begin(), graph.children(5).end(), c5.begin(), c5.end());
 
@@ -169,16 +164,12 @@ BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) {
 
     // ---- Node 7
     std::vector<int> p7{3, 4, 6};
-    std::vector<int> c7{ };
+    std::vector<int> c7{};
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(7).begin(), graph.parents(7).end(), p7.begin(), p7.end());
     BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(7).begin(), graph.children(7).end(), c7.begin(), c7.end());
-
 }
 
-
-
 BOOST_AUTO_TEST_CASE(test_bicgstab) {
-
     // Getting root git directory
     std::filesystem::path cwd = std::filesystem::current_path();
     std::cout << cwd << std::endl;
@@ -189,15 +180,14 @@ BOOST_AUTO_TEST_CASE(test_bicgstab) {
 
     computational_dag_vector_impl_def_t graph;
 
-    bool status =
-        file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph);
+    bool status
+        = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph);
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 54);
 }
 
 BOOST_AUTO_TEST_CASE(test_hdag_boost) {
-
     // Getting root git directory
     std::filesystem::path cwd = std::filesystem::current_path();
     std::cout << cwd << std::endl;
@@ -208,15 +198,14 @@ BOOST_AUTO_TEST_CASE(test_hdag_boost) {
 
     boost_graph_int_t graph;
 
-    bool status =
-        file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph);
+    bool status
+        = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph);
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 54);
 }
 
 BOOST_AUTO_TEST_CASE(test_arch_smpl) {
-
     std::filesystem::path cwd = std::filesystem::current_path();
 
     while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) {
@@ -232,11 +221,9 @@ BOOST_AUTO_TEST_CASE(test_arch_smpl) {
     BOOST_CHECK_EQUAL(arch.communicationCosts(), 3);
     BOOST_CHECK_EQUAL(arch.synchronisationCosts(), 5);
     BOOST_CHECK_EQUAL(arch.getMemoryConstraintType(), MEMORY_CONSTRAINT_TYPE::NONE);
-
 }
 
 BOOST_AUTO_TEST_CASE(test_arch_smpl_signed) {
-
     std::filesystem::path cwd = std::filesystem::current_path();
 
     while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) {
@@ -252,12 +239,9 @@ BOOST_AUTO_TEST_CASE(test_arch_smpl_signed) {
     BOOST_CHECK_EQUAL(arch.communicationCosts(), 3);
     BOOST_CHECK_EQUAL(arch.synchronisationCosts(), 5);
     BOOST_CHECK_EQUAL(arch.getMemoryConstraintType(), MEMORY_CONSTRAINT_TYPE::NONE);
-
 }
 
 BOOST_AUTO_TEST_CASE(test_k_means) {
-
-
     std::filesystem::path cwd = std::filesystem::current_path();
 
     while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) {
@@ -271,8 +255,7 @@ BOOST_AUTO_TEST_CASE(test_k_means) {
 
     computational_dag_vector_impl_def_t graph;
 
-    bool status =
-        file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph);
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph);
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 40);
@@ -285,8 +268,7 @@ BOOST_AUTO_TEST_CASE(test_k_means) {
 
     computational_dag_edge_idx_vector_impl_def_t graph2;
 
-    status =
-        file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph2);
+    status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph2);
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(graph2.num_vertices(), 40);
@@ -299,8 +281,6 @@ BOOST_AUTO_TEST_CASE(test_k_means) {
 }
 
 BOOST_AUTO_TEST_CASE(test_dot_graph) {
-
-
     std::filesystem::path cwd = std::filesystem::current_path();
 
     while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) {
@@ -314,8 +294,7 @@ BOOST_AUTO_TEST_CASE(test_dot_graph) {
 
     computational_dag_vector_impl_def_t graph;
 
-    bool status =
-        file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph);
+    bool status = file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph);
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 11);
@@ -328,13 +307,9 @@ BOOST_AUTO_TEST_CASE(test_dot_graph) {
         BOOST_CHECK_EQUAL(graph.vertex_mem_weight(v), mem[v]);
         BOOST_CHECK_EQUAL(graph.vertex_type(v), type[v]);
     }
-
-
 }
 
 BOOST_AUTO_TEST_CASE(test_dot_graph_boost) {
-
-
     std::filesystem::path cwd = std::filesystem::current_path();
 
     while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) {
@@ -348,8 +323,7 @@ BOOST_AUTO_TEST_CASE(test_dot_graph_boost) {
 
     boost_graph_int_t graph;
 
-    bool status =
-        file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph);
+    bool status = file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph);
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 11);
@@ -362,6 +336,4 @@ BOOST_AUTO_TEST_CASE(test_dot_graph_boost) {
         BOOST_CHECK_EQUAL(graph.vertex_mem_weight(v), mem[v]);
         BOOST_CHECK_EQUAL(graph.vertex_type(v), type[v]);
     }
-
-
-}
\ No newline at end of file
+}
diff --git a/tests/graph_vector_adapter.cpp b/tests/graph_vector_adapter.cpp
index e96a6be6..66fd7595 100644
--- a/tests/graph_vector_adapter.cpp
+++ b/tests/graph_vector_adapter.cpp
@@ -22,40 +22,65 @@ limitations under the License.
 #include <iostream>
 #include <vector>
 
-#include "osp/graph_algorithms/directed_graph_util.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp"
-#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
-#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp"
-#include "osp/bsp/scheduler/Serial.hpp"
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp"
+#include "osp/bsp/scheduler/Serial.hpp"
+#include "osp/coarser/Sarkar/Sarkar.hpp"
+#include "osp/coarser/Sarkar/SarkarMul.hpp"
 #include "osp/coarser/coarser_util.hpp"
 #include "osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp"
+#include "osp/graph_algorithms/directed_graph_util.hpp"
+#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/coarser/Sarkar/Sarkar.hpp"
-#include "osp/coarser/Sarkar/SarkarMul.hpp"
+#include "osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp"
+#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 
 using namespace osp;
 
-
 BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) {
-
-    std::vector<std::vector<int>> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}};
-
-    std::vector<std::vector<int>> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {4, 3}};
+    std::vector<std::vector<int>> out_neighbors{
+        {1, 2, 3},
+        {4, 6},
+        {4, 5},
+        {7},
+        {7},
+        {},
+        {},
+        {}
+    };
+
+    std::vector<std::vector<int>> in_neighbors{
+        {},
+        {0},
+        {0},
+        {0},
+        {1, 2},
+        {2},
+        {1},
+        {4, 3}
+    };
 
     using v_impl = cdag_vertex_impl<unsigned, int, int, int, unsigned>;
-    using graph_t = dag_vector_adapter<v_impl,int>;
+    using graph_t = dag_vector_adapter<v_impl, int>;
     using graph_constr_t = computational_dag_edge_idx_vector_impl<v_impl, cdag_edge_impl_int>;
-    using CoarseGraphType = Compact_Sparse_Graph<true, true, true, true, true, vertex_idx_t<graph_t>, std::size_t, v_workw_t<graph_t>, v_workw_t<graph_t>, v_workw_t<graph_t>, v_type_t<graph_t>>;
+    using CoarseGraphType = Compact_Sparse_Graph<true,
+                                                 true,
+                                                 true,
+                                                 true,
+                                                 true,
+                                                 vertex_idx_t<graph_t>,
+                                                 std::size_t,
+                                                 v_workw_t<graph_t>,
+                                                 v_workw_t<graph_t>,
+                                                 v_workw_t<graph_t>,
+                                                 v_type_t<graph_t>>;
 
     graph_t graph(out_neighbors, in_neighbors);
-    
+
     for (auto v : graph.vertices()) {
         graph.set_vertex_work_weight(v, 10);
     }
@@ -63,13 +88,13 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) {
     BspInstance<graph_t> instance;
     instance.getComputationalDag() = graph;
 
-    instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+    instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                                       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                                       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
     instance.setDiagonalCompatibilityMatrix(2);
     instance.setSynchronisationCosts(1000);
     instance.setCommunicationCosts(1);
 
-
-
     // Set up the scheduler
     GrowLocalAutoCores<graph_constr_t> growlocal;
     BspLocking<graph_constr_t> locking;
@@ -105,20 +130,46 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) {
     BOOST_CHECK(acyc);
 }
 
-
 BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) {
-
-    std::vector<std::vector<int>> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}};
-
-    std::vector<std::vector<int>> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {4, 3}};
+    std::vector<std::vector<int>> out_neighbors{
+        {1, 2, 3},
+        {4, 6},
+        {4, 5},
+        {7},
+        {7},
+        {},
+        {},
+        {}
+    };
+
+    std::vector<std::vector<int>> in_neighbors{
+        {},
+        {0},
+        {0},
+        {0},
+        {1, 2},
+        {2},
+        {1},
+        {4, 3}
+    };
 
     using v_impl = cdag_vertex_impl<unsigned, int, int, int, unsigned>;
-    using graph_t = dag_vector_adapter<v_impl,int>;
+    using graph_t = dag_vector_adapter<v_impl, int>;
     using graph_constr_t = computational_dag_vector_impl<v_impl>;
-    using CoarseGraphType = Compact_Sparse_Graph<true, true, true, true, true, vertex_idx_t<graph_t>, std::size_t, v_workw_t<graph_t>, v_workw_t<graph_t>, v_workw_t<graph_t>, v_type_t<graph_t>>;
+    using CoarseGraphType = Compact_Sparse_Graph<true,
+                                                 true,
+                                                 true,
+                                                 true,
+                                                 true,
+                                                 vertex_idx_t<graph_t>,
+                                                 std::size_t,
+                                                 v_workw_t<graph_t>,
+                                                 v_workw_t<graph_t>,
+                                                 v_workw_t<graph_t>,
+                                                 v_type_t<graph_t>>;
 
     graph_t graph(out_neighbors, in_neighbors);
-    
+
     for (auto v : graph.vertices()) {
         graph.set_vertex_work_weight(v, 10);
     }
@@ -126,13 +177,13 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) {
     BspInstance<graph_t> instance;
     instance.getComputationalDag() = graph;
 
-    instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+    instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                                       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                                       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
     instance.setDiagonalCompatibilityMatrix(2);
     instance.setSynchronisationCosts(1000);
     instance.setCommunicationCosts(1);
 
-
-
     // Set up the scheduler
     GrowLocalAutoCores<graph_constr_t> growlocal;
     BspLocking<graph_constr_t> locking;
@@ -166,4 +217,4 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) {
 
     acyc = is_acyclic(coarse_dag);
     BOOST_CHECK(acyc);
-}
\ No newline at end of file
+}
diff --git a/tests/graph_vector_edge_desc_impl.cpp b/tests/graph_vector_edge_desc_impl.cpp
index d079bf1a..1c6770c9 100644
--- a/tests/graph_vector_edge_desc_impl.cpp
+++ b/tests/graph_vector_edge_desc_impl.cpp
@@ -22,16 +22,15 @@ limitations under the License.
 #include <iostream>
 #include <vector>
 
-#include "osp/graph_algorithms/directed_graph_util.hpp"
-#include "osp/graph_algorithms/directed_graph_path_util.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
+#include "osp/graph_algorithms/directed_graph_path_util.hpp"
+#include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 
 using namespace osp;
 
 computational_dag_edge_idx_vector_impl_def_t constr_graph_1() {
-
     computational_dag_edge_idx_vector_impl_def_t graph;
 
     using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx;
@@ -79,14 +78,12 @@ computational_dag_edge_idx_vector_impl_def_t constr_graph_1() {
 }
 
 BOOST_AUTO_TEST_CASE(test_empty_dag_edge_idx) {
-
     computational_dag_edge_idx_vector_impl_def_t graph;
     BOOST_CHECK_EQUAL(graph.num_edges(), 0);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 0);
 }
 
 BOOST_AUTO_TEST_CASE(test_dag_edge_idx) {
-
     computational_dag_edge_idx_vector_impl_def_t graph = constr_graph_1();
 
     using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx;
@@ -96,7 +93,6 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) {
 
     size_t edge_idx = 0;
     for (const auto &edge : graph.edges()) {
-
         BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]);
         BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]);
         edge_idx++;
@@ -104,7 +100,6 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) {
 
     edge_idx = 0;
     for (const auto &edge : edges(graph)) {
-
         BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]);
         BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]);
         edge_idx++;
@@ -112,16 +107,13 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) {
 
     edge_idx = 0;
     for (auto &edge : edges(graph)) {
-
         BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]);
         BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]);
         edge_idx++;
     }
 
-
     edge_idx = 0;
     for (const auto edge : edges(graph)) {
-
         BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]);
         BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]);
         edge_idx++;
@@ -129,7 +121,6 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) {
 
     edge_idx = 0;
     for (auto edge : edges(graph)) {
-
         BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]);
         BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]);
         edge_idx++;
@@ -137,14 +128,31 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) {
 
     std::vector<vertex_idx> vertices{0, 1, 2, 3, 4, 5, 6, 7};
 
-    std::vector<std::vector<vertex_idx>> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}};
-
-    std::vector<std::vector<vertex_idx>> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {3, 4}};
+    std::vector<std::vector<vertex_idx>> out_neighbors{
+        {1, 2, 3},
+        {4, 6},
+        {4, 5},
+        {7},
+        {7},
+        {},
+        {},
+        {}
+    };
+
+    std::vector<std::vector<vertex_idx>> in_neighbors{
+        {},
+        {0},
+        {0},
+        {0},
+        {1, 2},
+        {2},
+        {1},
+        {3, 4}
+    };
 
     size_t idx = 0;
 
     for (const auto &v : graph.vertices()) {
-
         BOOST_CHECK_EQUAL(v, vertices[idx++]);
 
         size_t i = 0;
@@ -203,7 +211,6 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) {
 }
 
 BOOST_AUTO_TEST_CASE(test_util_1) {
-
     const computational_dag_edge_idx_vector_impl_def_t graph = constr_graph_1();
 
     BOOST_CHECK_EQUAL(graph.num_edges(), 9);
@@ -221,18 +228,18 @@ BOOST_AUTO_TEST_CASE(test_util_1) {
 
     const auto pair = edge_desc(0, 1, graph);
     BOOST_CHECK_EQUAL(pair.second, true);
-    BOOST_CHECK_EQUAL(source(pair.first,graph), 0);
-    BOOST_CHECK_EQUAL(target(pair.first,graph), 1);
+    BOOST_CHECK_EQUAL(source(pair.first, graph), 0);
+    BOOST_CHECK_EQUAL(target(pair.first, graph), 1);
     BOOST_CHECK_EQUAL(edge(0, 1, graph), true);
-    
+
     const auto pair2 = edge_desc(0, 4, graph);
     BOOST_CHECK_EQUAL(pair2.second, false);
     BOOST_CHECK_EQUAL(edge(0, 4, graph), false);
 
     const auto pair3 = edge_desc(1, 4, graph);
     BOOST_CHECK_EQUAL(pair3.second, true);
-    BOOST_CHECK_EQUAL(source(pair3.first,graph), 1);
-    BOOST_CHECK_EQUAL(target(pair3.first,graph), 4);
+    BOOST_CHECK_EQUAL(source(pair3.first, graph), 1);
+    BOOST_CHECK_EQUAL(target(pair3.first, graph), 4);
     BOOST_CHECK_EQUAL(edge(1, 4, graph), true);
 
     BOOST_CHECK_EQUAL(has_path(0, 1, graph), true);
@@ -294,11 +301,9 @@ BOOST_AUTO_TEST_CASE(test_util_1) {
     const auto long_edges = long_edges_in_triangles(graph);
 
     BOOST_CHECK_EQUAL(long_edges.size(), 0);
-
 }
 
 BOOST_AUTO_TEST_CASE(test_constr_dag) {
-
     computational_dag_edge_idx_vector_impl_def_int_t graph;
 
     graph.add_vertex(1, 2, 3);
@@ -343,4 +348,4 @@ BOOST_AUTO_TEST_CASE(test_constr_dag) {
     BOOST_CHECK_EQUAL(graph_3.vertex_work_weight(1), 5);
     BOOST_CHECK_EQUAL(graph_3.vertex_comm_weight(1), 6);
     BOOST_CHECK_EQUAL(graph_3.vertex_mem_weight(1), 7);
-}
\ No newline at end of file
+}
diff --git a/tests/graph_vector_impl.cpp b/tests/graph_vector_impl.cpp
index 9e2131e7..e0677f93 100644
--- a/tests/graph_vector_impl.cpp
+++ b/tests/graph_vector_impl.cpp
@@ -30,7 +30,6 @@ limitations under the License.
 using namespace osp;
 
 computational_dag_vector_impl_def_t constr_graph_1() {
-
     computational_dag_vector_impl_def_t graph;
 
     using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx;
@@ -59,7 +58,6 @@ computational_dag_vector_impl_def_t constr_graph_1() {
 }
 
 BOOST_AUTO_TEST_CASE(test_empty_dag) {
-
     computational_dag_vector_impl_def_t graph;
     BOOST_CHECK_EQUAL(graph.num_edges(), 0);
     BOOST_CHECK_EQUAL(graph.num_vertices(), 0);
@@ -73,7 +71,6 @@ BOOST_AUTO_TEST_CASE(test_empty_dag) {
 }
 
 BOOST_AUTO_TEST_CASE(test_dag) {
-
     const computational_dag_vector_impl_def_t graph = constr_graph_1();
 
     using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx;
@@ -83,14 +80,31 @@ BOOST_AUTO_TEST_CASE(test_dag) {
 
     std::vector<vertex_idx> vertices{0, 1, 2, 3, 4, 5, 6, 7};
 
-    std::vector<std::vector<vertex_idx>> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}};
-
-    std::vector<std::vector<vertex_idx>> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {4, 3}};
+    std::vector<std::vector<vertex_idx>> out_neighbors{
+        {1, 2, 3},
+        {4, 6},
+        {4, 5},
+        {7},
+        {7},
+        {},
+        {},
+        {}
+    };
+
+    std::vector<std::vector<vertex_idx>> in_neighbors{
+        {},
+        {0},
+        {0},
+        {0},
+        {1, 2},
+        {2},
+        {1},
+        {4, 3}
+    };
 
     size_t idx = 0;
 
     for (const auto &v : graph.vertices()) {
-
         BOOST_CHECK_EQUAL(v, vertices[idx++]);
 
         size_t i = 0;
@@ -105,35 +119,27 @@ BOOST_AUTO_TEST_CASE(test_dag) {
 
         i = 0;
         for (const auto &e : out_edges(v, graph)) {
-
             BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[v][i++]);
-
         }
 
         i = 0;
         for (const auto &e : in_edges(v, graph)) {
-
             BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[v][i++]);
-
         }
 
         BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[v].size());
         BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[v].size());
     }
 
-
     unsigned count = 0;
-    for (const auto & e: edges(graph)) {
-        
+    for (const auto &e : edges(graph)) {
         std::cout << e.source << " -> " << e.target << std::endl;
         count++;
     }
     BOOST_CHECK_EQUAL(count, 9);
-
 }
 
 BOOST_AUTO_TEST_CASE(test_constr_dag) {
-
     computational_dag_vector_impl_def_int_t graph;
 
     graph.add_vertex(1, 2, 3);
@@ -206,22 +212,38 @@ BOOST_AUTO_TEST_CASE(test_constr_dag) {
 }
 
 BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) {
-
     std::vector<int> vertices{0, 1, 2, 3, 4, 5, 6, 7};
 
-    std::vector<std::vector<int>> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}};
-
-    std::vector<std::vector<int>> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {4, 3}};
+    std::vector<std::vector<int>> out_neighbors{
+        {1, 2, 3},
+        {4, 6},
+        {4, 5},
+        {7},
+        {7},
+        {},
+        {},
+        {}
+    };
+
+    std::vector<std::vector<int>> in_neighbors{
+        {},
+        {0},
+        {0},
+        {0},
+        {1, 2},
+        {2},
+        {1},
+        {4, 3}
+    };
 
     using v_impl = cdag_vertex_impl<unsigned, int, int, int, unsigned>;
-    using graph_t = dag_vector_adapter<v_impl,int>;
+    using graph_t = dag_vector_adapter<v_impl, int>;
 
     graph_t graph(out_neighbors, in_neighbors);
 
     size_t idx = 0;
 
     for (const auto &v : graph.vertices()) {
-
         BOOST_CHECK_EQUAL(v, vertices[idx++]);
 
         unsigned vv = static_cast<unsigned>(v);
@@ -238,12 +260,12 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) {
 
         i = 0;
         for (const auto &e : out_edges(v, graph)) {
-            BOOST_CHECK_EQUAL(target(e,graph), out_neighbors[vv][i++]);
+            BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[vv][i++]);
         }
 
         i = 0;
         for (const auto &e : in_edges(v, graph)) {
-            BOOST_CHECK_EQUAL(source(e,graph), in_neighbors[vv][i++]);
+            BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[vv][i++]);
         }
 
         BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[vv].size());
@@ -251,10 +273,9 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) {
     }
 
     unsigned count = 0;
-    for (const auto & e: edges(graph)) {
-        
+    for (const auto &e : edges(graph)) {
         std::cout << e.source << " -> " << e.target << std::endl;
         count++;
     }
     BOOST_CHECK_EQUAL(count, 9);
-}
\ No newline at end of file
+}
diff --git a/tests/hash_pair.cpp b/tests/hash_pair.cpp
index 6f9cb7fe..070c7406 100644
--- a/tests/hash_pair.cpp
+++ b/tests/hash_pair.cpp
@@ -24,23 +24,22 @@ limitations under the License.
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(Hash_Pair) {
-    std::pair<int, int> p1({0,0});
-    std::pair<int, int> p2({1,1});
-    std::pair<int, int> p3({1,2});
-    std::pair<int, int> p4({2,1});
-    std::pair<int, int> p5({1,3});
-    std::pair<int, int> p6({2,6});
+    std::pair<int, int> p1({0, 0});
+    std::pair<int, int> p2({1, 1});
+    std::pair<int, int> p3({1, 2});
+    std::pair<int, int> p4({2, 1});
+    std::pair<int, int> p5({1, 3});
+    std::pair<int, int> p6({2, 6});
     std::pair<int, int> p7 = p6;
 
     pair_hash hasher;
 
-
-    BOOST_CHECK( hasher(p7) == hasher(p6) );
+    BOOST_CHECK(hasher(p7) == hasher(p6));
 
     // Can technically fail, but should not
-    BOOST_CHECK( hasher(p1) != hasher(p2) );
-    BOOST_CHECK( hasher(p3) != hasher(p4) );
-    BOOST_CHECK( hasher(p2) != hasher(p3) );
-    BOOST_CHECK( hasher(p2) != hasher(p5) );
-    BOOST_CHECK( hasher(p4) != hasher(p6) );
-}
\ No newline at end of file
+    BOOST_CHECK(hasher(p1) != hasher(p2));
+    BOOST_CHECK(hasher(p3) != hasher(p4));
+    BOOST_CHECK(hasher(p2) != hasher(p3));
+    BOOST_CHECK(hasher(p2) != hasher(p5));
+    BOOST_CHECK(hasher(p4) != hasher(p6));
+}
diff --git a/tests/heaps.cpp b/tests/heaps.cpp
index 2036b18c..de177ed1 100644
--- a/tests/heaps.cpp
+++ b/tests/heaps.cpp
@@ -17,38 +17,37 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE HeapTest
-#include <boost/test/unit_test.hpp>
-
-#include "osp/auxiliary/datastructures/heaps/DaryHeap.hpp"
-#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp"
-#include <boost/heap/fibonacci_heap.hpp>
-
 #include <algorithm>
+#include <boost/heap/fibonacci_heap.hpp>
+#include <boost/test/unit_test.hpp>
+#include <chrono>
+#include <iostream>
+#include <random>
 #include <set>
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include <chrono>
-#include <iostream>
-#include <random>
+
+#include "osp/auxiliary/datastructures/heaps/DaryHeap.hpp"
+#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp"
 
 namespace osp::test {
 
 // Wrapper for boost::heap::fibonacci_heap to match the test interface
 template <typename Key, typename Value, bool IsMinHeap = true>
 class BoostFibonacciHeapWrapper {
-private:
+  private:
     struct Node {
         Key key;
         Value value;
     };
 
     struct NodeCompare {
-        bool operator()(const Node& a, const Node& b) const {
+        bool operator()(const Node &a, const Node &b) const {
             if constexpr (IsMinHeap) {
-                return a.value > b.value; // For min-heap
+                return a.value > b.value;    // For min-heap
             } else {
-                return a.value < b.value; // For max-heap
+                return a.value < b.value;    // For max-heap
             }
         }
     };
@@ -59,46 +58,60 @@ class BoostFibonacciHeapWrapper {
     BoostHeap heap;
     std::unordered_map<Key, handle_type> handles;
 
-public:
+  public:
     BoostFibonacciHeapWrapper() = default;
 
     bool is_empty() const { return heap.empty(); }
+
     size_t size() const { return heap.size(); }
-    bool contains(const Key& key) const { return handles.count(key); }
 
-    const Key& top() const {
-        if (is_empty()) throw std::out_of_range("Heap is empty");
+    bool contains(const Key &key) const { return handles.count(key); }
+
+    const Key &top() const {
+        if (is_empty()) {
+            throw std::out_of_range("Heap is empty");
+        }
         return heap.top().key;
     }
 
     Key pop() {
-        if (is_empty()) throw std::out_of_range("Heap is empty");
+        if (is_empty()) {
+            throw std::out_of_range("Heap is empty");
+        }
         Key top_key = heap.top().key;
         heap.pop();
         handles.erase(top_key);
         return top_key;
     }
 
-    void push(const Key& key, const Value& value) {
-        if (contains(key)) throw std::invalid_argument("Key already exists");
+    void push(const Key &key, const Value &value) {
+        if (contains(key)) {
+            throw std::invalid_argument("Key already exists");
+        }
         handle_type handle = heap.push({key, value});
         handles[key] = handle;
     }
 
-    Value get_value(const Key& key) const {
-        if (!contains(key)) throw std::out_of_range("Key not found");
+    Value get_value(const Key &key) const {
+        if (!contains(key)) {
+            throw std::out_of_range("Key not found");
+        }
         return (*handles.at(key)).value;
     }
 
-    void update(const Key& key, const Value& new_value) {
-        if (!contains(key)) throw std::invalid_argument("Key not found for update");
+    void update(const Key &key, const Value &new_value) {
+        if (!contains(key)) {
+            throw std::invalid_argument("Key not found for update");
+        }
         handle_type handle = handles.at(key);
         (*handle).value = new_value;
         heap.update(handle);
     }
 
-    void erase(const Key& key) {
-        if (!contains(key)) throw std::invalid_argument("Key not found for erase");
+    void erase(const Key &key) {
+        if (!contains(key)) {
+            throw std::invalid_argument("Key not found for erase");
+        }
         heap.erase(handles.at(key));
         handles.erase(key);
     }
@@ -118,17 +131,17 @@ using MaxBoostFibonacciHeap = BoostFibonacciHeapWrapper<Key, Value, false>;
 // Wrapper for std::set to match the test interface
 template <typename Key, typename Value, bool IsMinHeap = true>
 class StdSetWrapper {
-private:
+  private:
     struct NodeCompare {
-        bool operator()(const std::pair<Value, Key>& a, const std::pair<Value, Key>& b) const {
+        bool operator()(const std::pair<Value, Key> &a, const std::pair<Value, Key> &b) const {
             if (a.first != b.first) {
                 if constexpr (IsMinHeap) {
-                    return a.first < b.first; // For min-heap
+                    return a.first < b.first;    // For min-heap
                 } else {
-                    return a.first > b.first; // For max-heap
+                    return a.first > b.first;    // For max-heap
                 }
             }
-            return a.second < b.second; // Tie-breaking
+            return a.second < b.second;    // Tie-breaking
         }
     };
 
@@ -136,48 +149,64 @@ class StdSetWrapper {
     SetType data_set;
     std::unordered_map<Key, Value> value_map;
 
-public:
+  public:
     StdSetWrapper() = default;
 
     bool is_empty() const { return data_set.empty(); }
+
     size_t size() const { return data_set.size(); }
-    bool contains(const Key& key) const { return value_map.count(key); }
 
-    const Key& top() const {
-        if (is_empty()) throw std::out_of_range("Heap is empty");
+    bool contains(const Key &key) const { return value_map.count(key); }
+
+    const Key &top() const {
+        if (is_empty()) {
+            throw std::out_of_range("Heap is empty");
+        }
         return data_set.begin()->second;
     }
 
     Key pop() {
-        if (is_empty()) throw std::out_of_range("Heap is empty");
+        if (is_empty()) {
+            throw std::out_of_range("Heap is empty");
+        }
         auto top_node = *data_set.begin();
         data_set.erase(data_set.begin());
         value_map.erase(top_node.second);
         return top_node.second;
     }
 
-    void push(const Key& key, const Value& value) {
-        if (contains(key)) throw std::invalid_argument("Key already exists");
+    void push(const Key &key, const Value &value) {
+        if (contains(key)) {
+            throw std::invalid_argument("Key already exists");
+        }
         data_set.insert({value, key});
         value_map[key] = value;
     }
 
-    Value get_value(const Key& key) const {
-        if (!contains(key)) throw std::out_of_range("Key not found");
+    Value get_value(const Key &key) const {
+        if (!contains(key)) {
+            throw std::out_of_range("Key not found");
+        }
         return value_map.at(key);
     }
 
-    void update(const Key& key, const Value& new_value) {
-        if (!contains(key)) throw std::invalid_argument("Key not found for update");
+    void update(const Key &key, const Value &new_value) {
+        if (!contains(key)) {
+            throw std::invalid_argument("Key not found for update");
+        }
         Value old_value = value_map.at(key);
-        if (old_value == new_value) return;
+        if (old_value == new_value) {
+            return;
+        }
         data_set.erase({old_value, key});
         data_set.insert({new_value, key});
         value_map[key] = new_value;
     }
 
-    void erase(const Key& key) {
-        if (!contains(key)) throw std::invalid_argument("Key not found for erase");
+    void erase(const Key &key) {
+        if (!contains(key)) {
+            throw std::invalid_argument("Key not found for erase");
+        }
         Value value = value_map.at(key);
         data_set.erase({value, key});
         value_map.erase(key);
@@ -196,7 +225,8 @@ template <typename Key, typename Value>
 using MaxStdSetHeap = StdSetWrapper<Key, Value, false>;
 
 // Generic test suite for any min-heap implementation that follows the API.
-template <typename HeapType> void test_min_heap_functionality() {
+template <typename HeapType>
+void test_min_heap_functionality() {
     HeapType heap;
 
     // Basic properties of an empty heap
@@ -247,12 +277,12 @@ template <typename HeapType> void test_min_heap_functionality() {
     BOOST_CHECK_THROW(heap.get_value("Z"), std::out_of_range);
 
     // Test update (decrease-key)
-    heap.update("B", 1); // B: 5 -> 1. Should be new top.
+    heap.update("B", 1);    // B: 5 -> 1. Should be new top.
     BOOST_CHECK_EQUAL(heap.top(), "B");
     BOOST_CHECK_EQUAL(heap.get_value("B"), 1);
 
     // Test update (increase-key)
-    heap.update("B", 25); // B: 1 -> 25. D (2) should be new top.
+    heap.update("B", 25);    // B: 1 -> 25. D (2) should be new top.
     BOOST_CHECK_EQUAL(heap.top(), "D");
     BOOST_CHECK_EQUAL(heap.get_value("B"), 25);
 
@@ -261,12 +291,12 @@ template <typename HeapType> void test_min_heap_functionality() {
     BOOST_CHECK_EQUAL(heap.get_value("A"), 10);
 
     // Test erase
-    heap.erase("D"); // Erase top element
+    heap.erase("D");    // Erase top element
     BOOST_CHECK_EQUAL(heap.size(), 4);
     BOOST_CHECK(!heap.contains("D"));
-    BOOST_CHECK_EQUAL(heap.top(), "A"); // A (10) is new top
+    BOOST_CHECK_EQUAL(heap.top(), "A");    // A (10) is new top
 
-    heap.erase("E"); // Erase non-top element
+    heap.erase("E");    // Erase non-top element
     BOOST_CHECK_EQUAL(heap.size(), 3);
     BOOST_CHECK(!heap.contains("E"));
     BOOST_CHECK_THROW(heap.erase("Z"), std::invalid_argument);
@@ -277,7 +307,8 @@ template <typename HeapType> void test_min_heap_functionality() {
     BOOST_CHECK_EQUAL(heap.size(), 0);
 }
 
-template <typename HeapType> void test_max_heap_functionality() {
+template <typename HeapType>
+void test_max_heap_functionality() {
     HeapType heap;
     heap.push("A", 10);
     heap.push("B", 5);
@@ -292,7 +323,8 @@ template <typename HeapType> void test_max_heap_functionality() {
 }
 
 // Stress test with a larger number of elements
-template <typename HeapType> void stress_test_heap() {
+template <typename HeapType>
+void stress_test_heap() {
     HeapType heap;
     const int num_items = 1000;
 
@@ -363,37 +395,35 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t
     std::cout << "Bulk Pop (" << num_items << " items): " << duration.count() << " ms" << std::endl;
 
     BOOST_CHECK(heap.is_empty());
-  
-
 
     // Scenario 4: Random Operations (Push, Erase, Update)
     heap.clear();
     std::vector<std::string> present_keys;
     present_keys.reserve(num_items);
     std::vector<bool> key_in_heap(num_items, false);
-    std::uniform_int_distribution<int> op_dist(0, 2); // 0: push, 1: erase, 2: update
+    std::uniform_int_distribution<int> op_dist(0, 2);    // 0: push, 1: erase, 2: update
 
     start = std::chrono::high_resolution_clock::now();
     for (size_t i = 0; i < num_random_ops; ++i) {
         int op = op_dist(gen);
-        if (op == 0 || present_keys.empty()) { // Push
+        if (op == 0 || present_keys.empty()) {    // Push
             size_t key_idx = key_distrib(gen);
             if (!key_in_heap[key_idx]) {
                 heap.push(keys[key_idx], priorities[key_idx]);
                 present_keys.push_back(keys[key_idx]);
                 key_in_heap[key_idx] = true;
             }
-        } else { // Erase or Update
+        } else {    // Erase or Update
             std::uniform_int_distribution<size_t> present_key_dist(0, present_keys.size() - 1);
             size_t present_key_vec_idx = present_key_dist(gen);
             std::string key_to_op = present_keys[present_key_vec_idx];
 
-            if (op == 1) { // Erase a random element
+            if (op == 1) {    // Erase a random element
                 heap.erase(key_to_op);
                 key_in_heap[std::stoul(key_to_op)] = false;
                 std::swap(present_keys[present_key_vec_idx], present_keys.back());
                 present_keys.pop_back();
-            } else { // op == 2, Update a random element (decrease key)
+            } else {    // op == 2, Update a random element (decrease key)
                 int new_prio = heap.get_value(key_to_op) - dec_dist(gen);
                 heap.update(key_to_op, new_prio);
             }
@@ -401,8 +431,7 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t
     }
     end = std::chrono::high_resolution_clock::now();
     duration = end - start;
-    std::cout << "Random Ops (" << num_random_ops << " ops of push/erase/update): " << duration.count() << " ms"
-              << std::endl;
+    std::cout << "Random Ops (" << num_random_ops << " ops of push/erase/update): " << duration.count() << " ms" << std::endl;
 
     // Scenario 5: Mixed Workload with Re-initialization
     const size_t num_outer_loops_s5 = 500;
@@ -412,8 +441,7 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t
     const size_t num_updates_per_iter_s5 = 25;
 
     // A large pool of keys to draw from for pushes, to avoid collisions.
-    const size_t key_pool_size_s5 =
-        num_outer_loops_s5 * (num_initial_pushes_s5 + num_inner_loops_s5 * num_pushes_per_iter_s5);
+    const size_t key_pool_size_s5 = num_outer_loops_s5 * (num_initial_pushes_s5 + num_inner_loops_s5 * num_pushes_per_iter_s5);
     std::vector<std::string> keys_s5(key_pool_size_s5);
     std::vector<int> priorities_s5(key_pool_size_s5);
     for (size_t i = 0; i < key_pool_size_s5; ++i) {
@@ -477,23 +505,33 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t
 BOOST_AUTO_TEST_SUITE(HeapTests)
 
 BOOST_AUTO_TEST_CASE(PairingHeapTest) { test_min_heap_functionality<MinPairingHeap<std::string, int>>(); }
+
 BOOST_AUTO_TEST_CASE(MaxPairingHeapTest) { test_max_heap_functionality<MaxPairingHeap<std::string, int>>(); }
+
 BOOST_AUTO_TEST_CASE(PairingHeapStressTest) { stress_test_heap<MinPairingHeap<std::string, int>>(); }
 
 BOOST_AUTO_TEST_CASE(BoostFibonacciHeapTest) { test_min_heap_functionality<MinBoostFibonacciHeap<std::string, int>>(); }
+
 BOOST_AUTO_TEST_CASE(MaxBoostFibonacciHeapTest) { test_max_heap_functionality<MaxBoostFibonacciHeap<std::string, int>>(); }
+
 BOOST_AUTO_TEST_CASE(BoostFibonacciHeapStressTest) { stress_test_heap<MinBoostFibonacciHeap<std::string, int>>(); }
 
 BOOST_AUTO_TEST_CASE(StdSetHeapTest) { test_min_heap_functionality<MinStdSetHeap<std::string, int>>(); }
+
 BOOST_AUTO_TEST_CASE(MaxStdSetHeapTest) { test_max_heap_functionality<MaxStdSetHeap<std::string, int>>(); }
+
 BOOST_AUTO_TEST_CASE(StdSetHeapStressTest) { stress_test_heap<MinStdSetHeap<std::string, int>>(); }
 
 BOOST_AUTO_TEST_CASE(DaryHeap_D2_Test) { test_min_heap_functionality<MinDaryHeap<std::string, int, 2>>(); }
+
 BOOST_AUTO_TEST_CASE(MaxDaryHeap_D2_Test) { test_max_heap_functionality<MaxDaryHeap<std::string, int, 2>>(); }
+
 BOOST_AUTO_TEST_CASE(DaryHeap_D2_StressTest) { stress_test_heap<MinDaryHeap<std::string, int, 2>>(); }
 
 BOOST_AUTO_TEST_CASE(DaryHeap_D4_Test) { test_min_heap_functionality<MinDaryHeap<std::string, int, 4>>(); }
+
 BOOST_AUTO_TEST_CASE(MaxDaryHeap_D4_Test) { test_max_heap_functionality<MaxDaryHeap<std::string, int, 4>>(); }
+
 BOOST_AUTO_TEST_CASE(DaryHeap_D4_StressTest) { stress_test_heap<MinDaryHeap<std::string, int, 4>>(); }
 
 BOOST_AUTO_TEST_SUITE_END()
@@ -506,15 +544,13 @@ BOOST_AUTO_TEST_CASE(HeapPerformanceComparison) {
     const size_t num_random_ops = 40000;
 
     run_performance_test<MinPairingHeap<std::string, int>>("Pairing Heap", num_items, num_updates, num_random_ops);
-    run_performance_test<MinBoostFibonacciHeap<std::string, int>>("Boost Fibonacci Heap", num_items, num_updates,
-                                                                  num_random_ops);
+    run_performance_test<MinBoostFibonacciHeap<std::string, int>>("Boost Fibonacci Heap", num_items, num_updates, num_random_ops);
     run_performance_test<MinStdSetHeap<std::string, int>>("std::set", num_items, num_updates, num_random_ops);
-    run_performance_test<MinDaryHeap<std::string, int, 2>>("Binary Heap (d=2)", num_items, num_updates,
-                                                           num_random_ops);
+    run_performance_test<MinDaryHeap<std::string, int, 2>>("Binary Heap (d=2)", num_items, num_updates, num_random_ops);
     run_performance_test<MinDaryHeap<std::string, int, 4>>("4-ary Heap (d=4)", num_items, num_updates, num_random_ops);
     run_performance_test<MinDaryHeap<std::string, int, 8>>("8-ary Heap (d=8)", num_items, num_updates, num_random_ops);
 }
 
 BOOST_AUTO_TEST_SUITE_END()
 
-} // namespace osp::test
+}    // namespace osp::test
diff --git a/tests/heavy_edge_preprocessing.cpp b/tests/heavy_edge_preprocessing.cpp
index 3c8c0cf7..6fcda0c2 100644
--- a/tests/heavy_edge_preprocessing.cpp
+++ b/tests/heavy_edge_preprocessing.cpp
@@ -1,19 +1,17 @@
 #define BOOST_TEST_MODULE heavy_edge_partitioning
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
 #include <string>
 #include <vector>
 
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp"
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "test_graphs.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) {
-
     using Graph_t = boost_graph_int_t;
 
     std::vector<std::string> filenames_graph = test_graphs();
@@ -27,8 +25,7 @@ BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) {
     }
 
     for (auto &filename_graph : filenames_graph) {
-        std::string name_graph =
-            filename_graph.substr(filename_graph.find_last_of("/\\") + 1, filename_graph.find_last_of("."));
+        std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1, filename_graph.find_last_of("."));
 
         std::cout << std::endl << "Graph: " << name_graph << std::endl;
 
@@ -37,7 +34,6 @@ BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) {
         bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), graph);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -59,4 +55,4 @@ BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) {
             BOOST_CHECK(value);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/hill_climbing.cpp b/tests/hill_climbing.cpp
index 7a108ba9..3bbfcde1 100644
--- a/tests/hill_climbing.cpp
+++ b/tests/hill_climbing.cpp
@@ -17,21 +17,20 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE HILL_CLIMBING
+#include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp"
+
 #include <boost/test/unit_test.hpp>
+#include <filesystem>
 
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
-#include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp"
 #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include <filesystem>
-#include "test_graphs.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+#include "test_graphs.hpp"
 
 using namespace osp;
 
-
 BOOST_AUTO_TEST_CASE(hill_climbing) {
-
     using graph = computational_dag_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -75,11 +74,9 @@ BOOST_AUTO_TEST_CASE(hill_climbing) {
     BspSchedule<graph> schedule4 = bsp_initial;
     scheduler.improveScheduleWithStepLimit(schedule4, 5);
     BOOST_CHECK_EQUAL(schedule4.satisfiesPrecedenceConstraints(), true);
-
 }
 
 BOOST_AUTO_TEST_CASE(hill_climbing_for_comm_schedule) {
-
     using graph = computational_dag_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -111,7 +108,7 @@ BOOST_AUTO_TEST_CASE(hill_climbing_for_comm_schedule) {
 
     BspSchedule<graph> schedule = initial;
     BspScheduleCS<graph> initial_cs(std::move(initial));
-    //initial_cs.setAutoCommunicationSchedule();
+    // initial_cs.setAutoCommunicationSchedule();
     initial_cs.setEagerCommunicationSchedule();
     BOOST_CHECK_EQUAL(initial_cs.hasValidCommSchedule(), true);
 
@@ -124,5 +121,4 @@ BOOST_AUTO_TEST_CASE(hill_climbing_for_comm_schedule) {
     hc_cs.setSteepestAscend(true);
     hc_cs.improveSchedule(schedule2);
     BOOST_CHECK_EQUAL(schedule2.hasValidCommSchedule(), true);
-
-}
\ No newline at end of file
+}
diff --git a/tests/hypergraph_and_partition.cpp b/tests/hypergraph_and_partition.cpp
index 10e4cb4a..4d934454 100644
--- a/tests/hypergraph_and_partition.cpp
+++ b/tests/hypergraph_and_partition.cpp
@@ -18,24 +18,22 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE HYPERGRAPH_AND_PARTITION
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
 #include <string>
 #include <vector>
 
-#include "osp/partitioning/model/partitioning.hpp"
-#include "osp/partitioning/model/partitioning_replication.hpp"
-#include "osp/partitioning/model/hypergraph_utility.hpp"
-#include "osp/partitioning/partitioners/generic_FM.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/auxiliary/io/mtx_hypergraph_file_reader.hpp"
 #include "osp/auxiliary/io/partitioning_file_writer.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+#include "osp/partitioning/model/hypergraph_utility.hpp"
+#include "osp/partitioning/model/partitioning.hpp"
+#include "osp/partitioning/model/partitioning_replication.hpp"
+#include "osp/partitioning/partitioners/generic_FM.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) {
-
     using graph = computational_dag_vector_impl_def_int_t;
     using hypergraph = Hypergraph_def_t;
 
@@ -45,12 +43,11 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) {
     while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) {
         cwd = cwd.parent_path();
         std::cout << cwd << std::endl;
-    } 
+    }
 
     graph DAG;
 
-    bool status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG);
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG);
 
     BOOST_CHECK(status);
 
@@ -66,35 +63,38 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) {
     Hgraph = convert_from_cdag_as_dag<hypergraph, graph>(DAG);
     BOOST_CHECK_EQUAL(DAG.num_vertices(), Hgraph.num_vertices());
     BOOST_CHECK_EQUAL(DAG.num_edges(), Hgraph.num_hyperedges());
-    BOOST_CHECK_EQUAL(DAG.num_edges()*2, Hgraph.num_pins());
+    BOOST_CHECK_EQUAL(DAG.num_edges() * 2, Hgraph.num_pins());
 
     // HyperDAG format, one hypredge for each non-sink node
     unsigned nr_of_non_sinks = 0;
-    for(const auto &node : DAG.vertices())
-        if(DAG.out_degree(node) > 0)
-            ++ nr_of_non_sinks;
+    for (const auto &node : DAG.vertices()) {
+        if (DAG.out_degree(node) > 0) {
+            ++nr_of_non_sinks;
+        }
+    }
 
     Hgraph = convert_from_cdag_as_hyperdag<hypergraph, graph>(DAG);
     BOOST_CHECK_EQUAL(DAG.num_vertices(), Hgraph.num_vertices());
     BOOST_CHECK_EQUAL(nr_of_non_sinks, Hgraph.num_hyperedges());
     BOOST_CHECK_EQUAL(DAG.num_edges() + nr_of_non_sinks, Hgraph.num_pins());
 
-
     // Dummy partitioning
 
     PartitioningProblem instance(Hgraph, 3, 30);
 
     Partitioning partition(instance);
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         partition.setAssignedPartition(node, node % 3);
+    }
 
     BOOST_CHECK(partition.satisfiesBalanceConstraint());
     int cutNetCost = partition.computeCutNetCost();
     int connectivityCost = partition.computeConnectivityCost();
     BOOST_CHECK(connectivityCost >= cutNetCost);
 
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         instance.getHypergraph().set_vertex_work_weight(node, 1);
+    }
 
     instance.setMaxWorkWeightViaImbalanceFactor(0);
     BOOST_CHECK(partition.satisfiesBalanceConstraint());
@@ -103,44 +103,48 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) {
     instance.setMaxWorkWeightViaImbalanceFactor(0);
     BOOST_CHECK(!partition.satisfiesBalanceConstraint());
 
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         partition.setAssignedPartition(node, node % 5);
+    }
 
     BOOST_CHECK(partition.satisfiesBalanceConstraint());
     BOOST_CHECK(partition.computeConnectivityCost() >= partition.computeCutNetCost());
 
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         instance.getHypergraph().set_vertex_memory_weight(node, 1);
+    }
     instance.setMaxMemoryWeightExplicitly(10);
     BOOST_CHECK(partition.satisfiesBalanceConstraint() == false);
     instance.setMaxMemoryWeightExplicitly(std::numeric_limits<int>::max());
 
     file_writer::write_txt(std::cout, partition);
 
-
     // Dummy partitioning with replication
 
     instance.setHypergraph(convert_from_cdag_as_hyperdag<hypergraph, graph>(DAG));
     instance.setNumberOfPartitions(3);
     instance.setMaxWorkWeightExplicitly(30);
     PartitioningWithReplication partition_with_rep(instance);
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         partition_with_rep.setAssignedPartitions(node, {node % 3});
+    }
 
     BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint());
     BOOST_CHECK(partition_with_rep.computeCutNetCost() == cutNetCost);
     BOOST_CHECK(partition_with_rep.computeConnectivityCost() == connectivityCost);
 
     instance.setMaxWorkWeightExplicitly(60);
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
-        partition_with_rep.setAssignedPartitions(node, {node % 3, (node+1)%3});
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
+        partition_with_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3});
+    }
 
     BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint());
     BOOST_CHECK(partition_with_rep.computeConnectivityCost() >= partition_with_rep.computeCutNetCost());
 
     instance.setMaxWorkWeightExplicitly(compute_total_vertex_work_weight(Hgraph));
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         partition_with_rep.setAssignedPartitions(node, {0, 1, 2});
+    }
 
     BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint());
     BOOST_CHECK(partition_with_rep.computeConnectivityCost() == 0);
@@ -148,17 +152,18 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) {
 
     file_writer::write_txt(std::cout, partition_with_rep);
 
-
     // Generic FM
 
     instance.setNumberOfPartitions(2);
     instance.setMaxWorkWeightExplicitly(35);
-    for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node)
+    for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) {
         instance.getHypergraph().set_vertex_work_weight(node, 1);
+    }
 
     Partitioning partition_to_improve(instance);
-    for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node)
+    for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) {
         partition_to_improve.setAssignedPartition(node, node % 2);
+    }
 
     int original_cost = partition_to_improve.computeConnectivityCost();
 
@@ -168,20 +173,22 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) {
 
     BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint());
     BOOST_CHECK(new_cost <= original_cost);
-    std::cout<<original_cost<<" --> "<<new_cost<<std::endl;
+    std::cout << original_cost << " --> " << new_cost << std::endl;
 
     graph larger_DAG;
-    file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(), larger_DAG);
+    file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(),
+                                                      larger_DAG);
     instance.setHypergraph(convert_from_cdag_as_hyperdag<hypergraph, graph>(larger_DAG));
 
     instance.setMaxWorkWeightExplicitly(4000);
-    for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node)
+    for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) {
         instance.getHypergraph().set_vertex_work_weight(node, 1);
+    }
 
     partition_to_improve.resetPartition();
-    for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node)
+    for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) {
         partition_to_improve.setAssignedPartition(node, node % 2);
+    }
 
     original_cost = partition_to_improve.computeConnectivityCost();
 
@@ -191,14 +198,15 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) {
 
     BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint());
     BOOST_CHECK(new_cost <= original_cost);
-    std::cout<<original_cost<<" --> "<<new_cost<<std::endl;
+    std::cout << original_cost << " --> " << new_cost << std::endl;
 
     // Recursive FM
     instance.setNumberOfPartitions(16);
     instance.setMaxWorkWeightViaImbalanceFactor(0.3);
 
-    for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node)
+    for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) {
         partition_to_improve.setAssignedPartition(node, node % 16);
+    }
 
     original_cost = partition_to_improve.computeConnectivityCost();
 
@@ -208,7 +216,5 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) {
 
     BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint());
     BOOST_CHECK(new_cost <= original_cost);
-    std::cout<<original_cost<<" --> "<<new_cost<<std::endl;
-
-
-}
\ No newline at end of file
+    std::cout << original_cost << " --> " << new_cost << std::endl;
+}
diff --git a/tests/ilp_bsp_scheduler.cpp b/tests/ilp_bsp_scheduler.cpp
index fc6934b4..201dd393 100644
--- a/tests/ilp_bsp_scheduler.cpp
+++ b/tests/ilp_bsp_scheduler.cpp
@@ -18,26 +18,24 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE COPT_ILP_SCHEDULING
 #include <boost/test/unit_test.hpp>
+#include <filesystem>
+#include <iostream>
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include <filesystem>
-#include <iostream>
-
-#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp"
-#include "osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp"
 #include "osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp"
+#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp"
 #include "osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp"
+#include "osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(test_total) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -78,7 +76,6 @@ BOOST_AUTO_TEST_CASE(test_total) {
 };
 
 BOOST_AUTO_TEST_CASE(test_full) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -142,7 +139,7 @@ BOOST_AUTO_TEST_CASE(test_full) {
     BOOST_CHECK(schedule_improved2.satisfiesConstraints());
 
     // initialize with recomputing schedule, return recomputing schedule
-    BspScheduleRecomp<graph> schedule_improved3(instance),schedule_init3(schedule_init_cs);
+    BspScheduleRecomp<graph> schedule_improved3(instance), schedule_init3(schedule_init_cs);
     CoptFullScheduler<graph> scheduler_init3(schedule_init3);
     scheduler_init3.setTimeLimitSeconds(10);
     const auto result_init3 = scheduler_init3.computeScheduleRecomp(schedule_improved3);
@@ -153,8 +150,9 @@ BOOST_AUTO_TEST_CASE(test_full) {
     BspInstance<graph> instance_typed = instance;
     instance_typed.getArchitecture().setProcessorType(0, 1);
     instance_typed.getArchitecture().setProcessorType(1, 1);
-    for(vertex_idx_t<graph> node = 0; node < static_cast<vertex_idx_t<graph> >(instance_typed.numberOfVertices()); ++node)
-        instance_typed.getComputationalDag().set_vertex_type(node, node%2);
+    for (vertex_idx_t<graph> node = 0; node < static_cast<vertex_idx_t<graph> >(instance_typed.numberOfVertices()); ++node) {
+        instance_typed.getComputationalDag().set_vertex_type(node, node % 2);
+    }
     instance_typed.setDiagonalCompatibilityMatrix(2);
 
     BspSchedule<graph> schedule_typed(instance_typed);
@@ -198,7 +196,6 @@ BOOST_AUTO_TEST_CASE(test_full) {
 };
 
 BOOST_AUTO_TEST_CASE(test_cs) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -214,8 +211,8 @@ BOOST_AUTO_TEST_CASE(test_cs) {
         std::cout << cwd << std::endl;
     }
 
-    bool status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/tiny/instance_pregel.hdag").string(), instance.getComputationalDag());
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(),
+                                                                    instance.getComputationalDag());
 
     BOOST_CHECK(status);
 
@@ -232,7 +229,7 @@ BOOST_AUTO_TEST_CASE(test_cs) {
     const auto result = scheduler.improveSchedule(schedule_cs);
     BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result);
     const auto after = schedule_cs.compute_cs_communication_costs();
-    std::cout<<before<<" --cs--> "<<after<<std::endl;
+    std::cout << before << " --cs--> " << after << std::endl;
 
     BOOST_CHECK(schedule_cs.satisfiesPrecedenceConstraints());
     BOOST_CHECK(schedule_cs.hasValidCommSchedule());
@@ -240,7 +237,6 @@ BOOST_AUTO_TEST_CASE(test_cs) {
 };
 
 BOOST_AUTO_TEST_CASE(test_partial) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -256,8 +252,8 @@ BOOST_AUTO_TEST_CASE(test_partial) {
         std::cout << cwd << std::endl;
     }
 
-    bool status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/tiny/instance_pregel.hdag").string(), instance.getComputationalDag());
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(),
+                                                                    instance.getComputationalDag());
 
     BOOST_CHECK(status);
 
@@ -285,6 +281,4 @@ BOOST_AUTO_TEST_CASE(test_partial) {
     BOOST_CHECK(schedule.hasValidCommSchedule());
     auto cost_after = schedule.computeCosts();
     BOOST_CHECK(cost_after <= cost_mid);
-
 };
-
diff --git a/tests/ilp_hypergraph_partitioning.cpp b/tests/ilp_hypergraph_partitioning.cpp
index b42bbc29..636c6545 100644
--- a/tests/ilp_hypergraph_partitioning.cpp
+++ b/tests/ilp_hypergraph_partitioning.cpp
@@ -18,19 +18,17 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE HYPERGRAPH_PARTITIONING_ILP
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
+
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/partitioning/model/hypergraph_utility.hpp"
 #include "osp/partitioning/partitioners/partitioning_ILP.hpp"
 #include "osp/partitioning/partitioners/partitioning_ILP_replication.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(test_full) {
-
     using graph = computational_dag_vector_impl_def_int_t;
     using Hypergraph = Hypergraph_def_t;
 
@@ -40,12 +38,11 @@ BOOST_AUTO_TEST_CASE(test_full) {
     while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) {
         cwd = cwd.parent_path();
         std::cout << cwd << std::endl;
-    } 
+    }
 
     graph DAG;
 
-    bool status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG);
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG);
 
     BOOST_CHECK(status);
 
@@ -55,9 +52,8 @@ BOOST_AUTO_TEST_CASE(test_full) {
     PartitioningProblem instance(Hgraph, 3, 35);
     Partitioning partition(instance);
 
-
     // ILP without replication
-    
+
     HypergraphPartitioningILP<Hypergraph> partitioner;
     partitioner.setTimeLimitSeconds(60);
     partitioner.computePartitioning(partition);
@@ -65,8 +61,9 @@ BOOST_AUTO_TEST_CASE(test_full) {
     BOOST_CHECK(partition.satisfiesBalanceConstraint());
     BOOST_CHECK(partition.computeConnectivityCost() >= partition.computeCutNetCost());
 
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         partition.setAssignedPartition(node, node % 3);
+    }
 
     partitioner.setUseInitialSolution(true);
     partitioner.computePartitioning(partition);
@@ -94,16 +91,18 @@ BOOST_AUTO_TEST_CASE(test_full) {
     BOOST_CHECK(partition_rep.computeConnectivityCost() == 0);
 
     partitioner_rep.setUseInitialSolution(true);
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         partition_rep.setAssignedPartitions(node, {node % 3});
+    }
 
     partitioner_rep.computePartitioning(partition_rep);
     BOOST_CHECK(partition_rep.satisfiesBalanceConstraint());
     BOOST_CHECK(partition_rep.computeConnectivityCost() == 0);
 
     instance.setMaxWorkWeightExplicitly(60);
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
-        partition_rep.setAssignedPartitions(node, {node % 3, (node+1)%3});
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
+        partition_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3});
+    }
 
     partitioner_rep.computePartitioning(partition_rep);
     BOOST_CHECK(partition_rep.satisfiesBalanceConstraint());
@@ -119,19 +118,20 @@ BOOST_AUTO_TEST_CASE(test_full) {
     BOOST_CHECK(partition_rep.computeConnectivityCost() == 0);
 
     partitioner_rep.setUseInitialSolution(true);
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
         partition_rep.setAssignedPartitions(node, {node % 3});
+    }
 
     partitioner_rep.computePartitioning(partition_rep);
     BOOST_CHECK(partition_rep.satisfiesBalanceConstraint());
     BOOST_CHECK(partition_rep.computeConnectivityCost() == 0);
 
     instance.setMaxWorkWeightExplicitly(60);
-    for(unsigned node = 0; node < Hgraph.num_vertices(); ++node)
-        partition_rep.setAssignedPartitions(node, {node % 3, (node+1)%3});
+    for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) {
+        partition_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3});
+    }
 
     partitioner_rep.computePartitioning(partition_rep);
     BOOST_CHECK(partition_rep.satisfiesBalanceConstraint());
     BOOST_CHECK(partition_rep.computeConnectivityCost() == 0);
-
-};
\ No newline at end of file
+};
diff --git a/tests/ilp_pebbling_scheduler.cpp b/tests/ilp_pebbling_scheduler.cpp
index 8c186919..0d8b810f 100644
--- a/tests/ilp_pebbling_scheduler.cpp
+++ b/tests/ilp_pebbling_scheduler.cpp
@@ -18,22 +18,20 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE PEBBLING_ILP
 #include <boost/test/unit_test.hpp>
-
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include <filesystem>
 #include <iostream>
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+#include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 #include "osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp"
 #include "osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp"
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(test_full) {
-
     using graph = computational_dag_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -58,7 +56,8 @@ BOOST_AUTO_TEST_CASE(test_full) {
     BspSchedule<graph> bsp_initial(instance);
     BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(bsp_initial));
 
-    std::vector<v_memw_t<graph> > minimum_memory_required_vector = PebblingSchedule<graph>::minimumMemoryRequiredPerNodeType(instance);
+    std::vector<v_memw_t<graph> > minimum_memory_required_vector
+        = PebblingSchedule<graph>::minimumMemoryRequiredPerNodeType(instance);
     v_memw_t<graph> max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end());
     instance.getArchitecture().setMemoryBound(max_required);
 
@@ -71,11 +70,9 @@ BOOST_AUTO_TEST_CASE(test_full) {
     mpp.computePebblingWithInitialSolution(initial_sol, schedule);
     schedule.cleanSchedule();
     BOOST_CHECK(schedule.isValid());
-
 };
 
 BOOST_AUTO_TEST_CASE(test_partial) {
-
     using graph = computational_dag_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -96,7 +93,8 @@ BOOST_AUTO_TEST_CASE(test_partial) {
 
     BOOST_CHECK(status);
 
-    std::vector<v_memw_t<graph> > minimum_memory_required_vector = PebblingSchedule<graph>::minimumMemoryRequiredPerNodeType(instance);
+    std::vector<v_memw_t<graph> > minimum_memory_required_vector
+        = PebblingSchedule<graph>::minimumMemoryRequiredPerNodeType(instance);
     v_memw_t<graph> max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end());
     instance.getArchitecture().setMemoryBound(max_required);
 
@@ -106,5 +104,4 @@ BOOST_AUTO_TEST_CASE(test_partial) {
     PebblingSchedule<graph> schedule(instance);
     mpp.computePebbling(schedule);
     BOOST_CHECK(schedule.isValid());
-
-};
\ No newline at end of file
+};
diff --git a/tests/intpower.cpp b/tests/intpower.cpp
index de5f973c..53ab140d 100644
--- a/tests/intpower.cpp
+++ b/tests/intpower.cpp
@@ -21,7 +21,6 @@ limitations under the License.
 
 #include "osp/auxiliary/misc.hpp"
 
-
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(IntegerPowers) {
diff --git a/tests/isomorphic_subgraph_scheduler.cpp b/tests/isomorphic_subgraph_scheduler.cpp
index 9165c5a5..e5abd52b 100644
--- a/tests/isomorphic_subgraph_scheduler.cpp
+++ b/tests/isomorphic_subgraph_scheduler.cpp
@@ -18,14 +18,13 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE IsomorphicSubgraphScheduler
 #include <boost/test/unit_test.hpp>
+#include <numeric>
+#include <set>
 
-#include "test_graphs.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-
-#include <numeric>
-#include <set>
+#include "test_graphs.hpp"
 
 using namespace osp;
 
@@ -40,14 +39,14 @@ class IsomorphicSubgraphSchedulerTester : public IsomorphicSubgraphScheduler<Gra
   public:
     using IsomorphicSubgraphScheduler<Graph_t, Constr_Graph_t>::IsomorphicSubgraphScheduler;
 
-    void test_trim_subgraph_groups(std::vector<group_t>& isomorphic_groups,
-                                   const BspInstance<Graph_t>& instance,
-                                   std::vector<bool>& was_trimmed) {
+    void test_trim_subgraph_groups(std::vector<group_t> &isomorphic_groups,
+                                   const BspInstance<Graph_t> &instance,
+                                   std::vector<bool> &was_trimmed) {
         this->trim_subgraph_groups(isomorphic_groups, instance, was_trimmed);
     }
 
     void test_schedule_isomorphic_group(const BspInstance<Graph_t> &instance,
-                                        const std::vector<group_t>& isomorphic_groups,
+                                        const std::vector<group_t> &isomorphic_groups,
                                         const SubgraphSchedule &sub_sched,
                                         std::vector<vertex_idx_t<Graph_t>> &partition) {
         this->schedule_isomorphic_group(instance, isomorphic_groups, sub_sched, partition);
@@ -57,7 +56,6 @@ class IsomorphicSubgraphSchedulerTester : public IsomorphicSubgraphScheduler<Gra
 BOOST_AUTO_TEST_SUITE(IsomorphicSubgraphSchedulerTestSuite)
 
 BOOST_AUTO_TEST_CASE(EmptyGraphTest) {
-    
     BspInstance<graph_t> instance;
     instance.getArchitecture().setNumberOfProcessors(4);
 
@@ -73,16 +71,16 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_NoTrim) {
     IsomorphicSubgraphSchedulerTester<graph_t, constr_graph_t> tester(greedy_scheduler);
 
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
-    dag.add_vertex(1, 1, 1, 0); // 0
-    dag.add_vertex(1, 1, 1, 0); // 1
-    dag.add_vertex(1, 1, 1, 0); // 2
-    dag.add_vertex(1, 1, 1, 0); // 3
-    instance.getArchitecture().setProcessorsWithTypes({0,0,0,0,0,0,0,0}); // 8 processors of type 0
+    auto &dag = instance.getComputationalDag();
+    dag.add_vertex(1, 1, 1, 0);                                                     // 0
+    dag.add_vertex(1, 1, 1, 0);                                                     // 1
+    dag.add_vertex(1, 1, 1, 0);                                                     // 2
+    dag.add_vertex(1, 1, 1, 0);                                                     // 3
+    instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0});    // 8 processors of type 0
     instance.setDiagonalCompatibilityMatrix(1);
 
     // A single group with 4 subgraphs, each with 1 node.
-    std::vector<group_t> iso_groups = { group_t{ { {0}, {1}, {2}, {3} } } };
+    std::vector<group_t> iso_groups = {group_t{{{0}, {1}, {2}, {3}}}};
 
     std::vector<bool> was_trimmed(iso_groups.size());
     // Group size (4) is a divisor of processor count for type 0 (8), so no trim.
@@ -91,28 +89,27 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_NoTrim) {
     BOOST_REQUIRE_EQUAL(was_trimmed.size(), 1);
     BOOST_CHECK(!was_trimmed[0]);
     BOOST_CHECK_EQUAL(iso_groups.size(), 1);
-    BOOST_CHECK_EQUAL(iso_groups[0].subgraphs.size(), 4); // Still 4 subgraphs in the group
+    BOOST_CHECK_EQUAL(iso_groups[0].subgraphs.size(), 4);    // Still 4 subgraphs in the group
 }
 
 BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_WithTrim) {
     GreedyBspScheduler<constr_graph_t> greedy_scheduler;
     IsomorphicSubgraphSchedulerTester<graph_t, constr_graph_t> tester(greedy_scheduler);
     tester.setAllowTrimmedScheduler(false);
-    
 
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
-    dag.add_vertex(10, 1, 1, 0); // 0
-    dag.add_vertex(10, 1, 1, 0); // 1
-    dag.add_vertex(10, 1, 1, 0); // 2
-    dag.add_vertex(10, 1, 1, 0); // 3
-    dag.add_vertex(10, 1, 1, 0); // 4
-    dag.add_vertex(10, 1, 1, 0); // 5
-    instance.getArchitecture().setProcessorsWithTypes({0,0,0,0,0,0,0,0}); // 8 processors of type 0
+    auto &dag = instance.getComputationalDag();
+    dag.add_vertex(10, 1, 1, 0);                                                    // 0
+    dag.add_vertex(10, 1, 1, 0);                                                    // 1
+    dag.add_vertex(10, 1, 1, 0);                                                    // 2
+    dag.add_vertex(10, 1, 1, 0);                                                    // 3
+    dag.add_vertex(10, 1, 1, 0);                                                    // 4
+    dag.add_vertex(10, 1, 1, 0);                                                    // 5
+    instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0});    // 8 processors of type 0
     instance.setDiagonalCompatibilityMatrix(1);
 
     // 6 subgraphs, each with 1 node and work weight 10.
-    std::vector<group_t> iso_groups = { group_t{ { {0}, {1}, {2}, {3}, {4}, {5} } } };
+    std::vector<group_t> iso_groups = {group_t{{{0}, {1}, {2}, {3}, {4}, {5}}}};
 
     std::vector<bool> was_trimmed(iso_groups.size());
     // Group size (6) is not a divisor of processor count for type 0 (8).
@@ -124,13 +121,13 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_WithTrim) {
     BOOST_REQUIRE_EQUAL(was_trimmed.size(), 1);
     BOOST_CHECK(was_trimmed[0]);
     BOOST_CHECK_EQUAL(iso_groups.size(), 1);
-    BOOST_REQUIRE_EQUAL(iso_groups[0].subgraphs.size(), 2); // Group now contains 2 merged subgraphs
+    BOOST_REQUIRE_EQUAL(iso_groups[0].subgraphs.size(), 2);    // Group now contains 2 merged subgraphs
 
     // Check that the new subgraphs are correctly merged.
     BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[0].size(), 3);
     BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[1].size(), 3);
 
-    const auto& final_sgs = iso_groups[0].subgraphs;
+    const auto &final_sgs = iso_groups[0].subgraphs;
     std::set<unsigned> vertices_sg0(final_sgs[0].begin(), final_sgs[0].end());
     std::set<unsigned> vertices_sg1(final_sgs[1].begin(), final_sgs[1].end());
     std::set<unsigned> expected_sg0 = {0, 1, 2};
@@ -144,28 +141,39 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_MultipleGroups) {
     IsomorphicSubgraphSchedulerTester<graph_t, constr_graph_t> tester(greedy_scheduler);
     tester.setAllowTrimmedScheduler(false);
 
-
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
-    for (int i = 0; i < 6; ++i) dag.add_vertex(1,1,1,0); // 0-5
-    for (int i = 0; i < 3; ++i) dag.add_vertex(1,1,1,0); // 6-8, but we will use 10-12 in test
-    for (int i = 0; i < 2; ++i) dag.add_vertex(1,1,1,0); // 9-10
-    for (int i = 0; i < 2; ++i) dag.add_vertex(1,1,1,0); // 11-12
-    for (int i = 0; i < 8; ++i) dag.add_vertex(1,1,1,0); // 13-20
-    for (int i = 0; i < 5; ++i) dag.add_vertex(1,1,1,0); // 21-25
+    auto &dag = instance.getComputationalDag();
+    for (int i = 0; i < 6; ++i) {
+        dag.add_vertex(1, 1, 1, 0);    // 0-5
+    }
+    for (int i = 0; i < 3; ++i) {
+        dag.add_vertex(1, 1, 1, 0);    // 6-8, but we will use 10-12 in test
+    }
+    for (int i = 0; i < 2; ++i) {
+        dag.add_vertex(1, 1, 1, 0);    // 9-10
+    }
+    for (int i = 0; i < 2; ++i) {
+        dag.add_vertex(1, 1, 1, 0);    // 11-12
+    }
+    for (int i = 0; i < 8; ++i) {
+        dag.add_vertex(1, 1, 1, 0);    // 13-20
+    }
+    for (int i = 0; i < 5; ++i) {
+        dag.add_vertex(1, 1, 1, 0);    // 21-25
+    }
     // Make sure all vertices used in iso_groups exist.
     // All are type 0.
 
-    instance.getArchitecture().setProcessorsWithTypes({0,0,0,0,0,0,0,0,0}); // 9 processors of type 0
+    instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0});    // 9 processors of type 0
     instance.setDiagonalCompatibilityMatrix(1);
 
     // Group 1: size 6. gcd(6, 9) = 3. merge_size = 6/3 = 2. -> 3 subgraphs of size 2.
     // Group 2: size 3. gcd(3, 9) = 3. merge_size = 3/3 = 1. -> no trim.
     // Group 3: size 5. gcd(5, 9) = 1. merge_size = 5/1 = 5. -> 1 subgraph of size 5.
     std::vector<group_t> iso_groups = {
-        group_t{ { {0}, {1}, {2}, {3}, {4}, {5} } }, // Group 1
-        group_t{ { {10}, {11}, {12} } },             // Group 2
-        group_t{ { {20}, {21}, {22}, {23}, {24} } }  // Group 3
+        group_t{{{0}, {1}, {2}, {3}, {4}, {5}}},    // Group 1
+        group_t{{{10}, {11}, {12}}},                // Group 2
+        group_t{{{20}, {21}, {22}, {23}, {24}}}     // Group 3
     };
 
     std::vector<bool> was_trimmed(iso_groups.size());
@@ -174,9 +182,9 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_MultipleGroups) {
     BOOST_REQUIRE_EQUAL(iso_groups.size(), 3);
     BOOST_REQUIRE_EQUAL(was_trimmed.size(), 3);
 
-    BOOST_CHECK(was_trimmed[0]);  // Group 1 should be trimmed
-    BOOST_CHECK(!was_trimmed[1]); // Group 2 should not be trimmed
-    BOOST_CHECK(was_trimmed[2]);  // Group 3 should be trimmed
+    BOOST_CHECK(was_trimmed[0]);     // Group 1 should be trimmed
+    BOOST_CHECK(!was_trimmed[1]);    // Group 2 should not be trimmed
+    BOOST_CHECK(was_trimmed[2]);     // Group 3 should be trimmed
     // Check Group 1
     BOOST_REQUIRE_EQUAL(iso_groups[0].subgraphs.size(), 3);
     BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[0].size(), 2);
@@ -195,34 +203,35 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_MultipleGroups) {
 BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_HeterogeneousArch) {
     // --- Setup ---
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
+    auto &dag = instance.getComputationalDag();
     // Two isomorphic groups:
     // Group 0: {0,1}, {2,3} (type 0)
     // Group 1: {4}, {5} (type 1)
-    dag.add_vertex(10, 1, 1, 0); dag.add_vertex(10, 1, 1, 0); // 0, 1
-    dag.add_vertex(10, 1, 1, 0); dag.add_vertex(10, 1, 1, 0); // 2, 3
-    dag.add_vertex(20, 1, 1, 1); // 4
-    dag.add_vertex(20, 1, 1, 1); // 5
-    dag.add_edge(0, 1); dag.add_edge(2, 3);
-    dag.add_edge(1, 4); dag.add_edge(3, 5);
+    dag.add_vertex(10, 1, 1, 0);
+    dag.add_vertex(10, 1, 1, 0);    // 0, 1
+    dag.add_vertex(10, 1, 1, 0);
+    dag.add_vertex(10, 1, 1, 0);    // 2, 3
+    dag.add_vertex(20, 1, 1, 1);    // 4
+    dag.add_vertex(20, 1, 1, 1);    // 5
+    dag.add_edge(0, 1);
+    dag.add_edge(2, 3);
+    dag.add_edge(1, 4);
+    dag.add_edge(3, 5);
 
     // 2 procs of type 0, 2 procs of type 1
     instance.getArchitecture().setProcessorsWithTypes({0, 0, 1, 1});
     instance.setDiagonalCompatibilityMatrix(2);
 
-    std::vector<group_t> iso_groups = {
-        group_t{ { {0, 1}, {2, 3} } },
-        group_t{ { {4}, {5} } }
-    };
+    std::vector<group_t> iso_groups = {group_t{{{0, 1}, {2, 3}}}, group_t{{{4}, {5}}}};
 
     // Mock SubgraphSchedule from EFT scheduler
     // Group 0 (2 subgraphs) gets 2 workers of type 0
     // Group 1 (2 subgraphs) gets 2 workers of type 1
     SubgraphSchedule sub_sched;
     sub_sched.node_assigned_worker_per_type.resize(2);
-    sub_sched.node_assigned_worker_per_type[0] = {2, 0}; // 2xT0 for group 0
-    sub_sched.node_assigned_worker_per_type[1] = {0, 2}; // 2xT1 for group 1
-    sub_sched.was_trimmed = {false, false}; // No trimming occurred
+    sub_sched.node_assigned_worker_per_type[0] = {2, 0};    // 2xT0 for group 0
+    sub_sched.node_assigned_worker_per_type[1] = {0, 2};    // 2xT1 for group 1
+    sub_sched.was_trimmed = {false, false};                 // No trimming occurred
 
     std::vector<vertex_idx_t<graph_t>> partition(dag.num_vertices());
 
@@ -252,7 +261,9 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_HeterogeneousArch) {
 
     // Verify all partitions are unique as expected
     std::set<vertex_idx_t<graph_t>> partition_ids;
-    for(const auto& p_id : partition) partition_ids.insert(p_id);
+    for (const auto &p_id : partition) {
+        partition_ids.insert(p_id);
+    }
     BOOST_CHECK_EQUAL(partition_ids.size(), 4);
 }
 
@@ -261,16 +272,16 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) {
     // This test ensures that the isomorphism mapping works correctly even if
     // the vertex IDs of isomorphic subgraphs are not in the same relative order.
     BspInstance<graph_t> instance;
-    auto& dag = instance.getComputationalDag();
+    auto &dag = instance.getComputationalDag();
 
     // Group 0, Subgraph 1: 0 -> 1
-    dag.add_vertex(10, 1, 1, 0); // 0
-    dag.add_vertex(20, 1, 1, 0); // 1
+    dag.add_vertex(10, 1, 1, 0);    // 0
+    dag.add_vertex(20, 1, 1, 0);    // 1
     dag.add_edge(0, 1);
 
     // Group 0, Subgraph 2 (isomorphic to 1, but with shuffled IDs): 3 -> 2
-    dag.add_vertex(20, 1, 1, 0); // 2 (work 20, corresponds to node 1)
-    dag.add_vertex(10, 1, 1, 0); // 3 (work 10, corresponds to node 0)
+    dag.add_vertex(20, 1, 1, 0);    // 2 (work 20, corresponds to node 1)
+    dag.add_vertex(10, 1, 1, 0);    // 3 (work 10, corresponds to node 0)
     dag.add_edge(3, 2);
 
     // Architecture: 2 processors, so each subgraph gets its own partition space.
@@ -280,15 +291,13 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) {
     // Manually define the isomorphic groups.
     // Subgraph 1 vertices: {0, 1}
     // Subgraph 2 vertices: {2, 3}
-    std::vector<group_t> iso_groups = {
-        group_t{ { {0, 1}, {2, 3} } }
-    };
+    std::vector<group_t> iso_groups = {group_t{{{0, 1}, {2, 3}}}};
 
     // Mock SubgraphSchedule: The single group gets all 2 processors.
     SubgraphSchedule sub_sched;
     sub_sched.node_assigned_worker_per_type.resize(1);
     sub_sched.node_assigned_worker_per_type[0] = {2};
-    sub_sched.was_trimmed = {false}; // No trimming occurred
+    sub_sched.was_trimmed = {false};    // No trimming occurred
 
     std::vector<vertex_idx_t<graph_t>> partition(dag.num_vertices());
 
@@ -395,4 +404,4 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) {
 //     BOOST_CHECK_NE(partition[3], partition[6]); // Sinks
 // }
 
-BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/isomorphism_mapper.cpp b/tests/isomorphism_mapper.cpp
index a64e7b99..72b6ecbb 100644
--- a/tests/isomorphism_mapper.cpp
+++ b/tests/isomorphism_mapper.cpp
@@ -18,15 +18,14 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE IsomorphismMapper
 #include <boost/test/unit_test.hpp>
+#include <numeric>
+#include <set>
+#include <unordered_map>
 
 #include "osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "test_graphs.hpp"
 
-#include <numeric>
-#include <set>
-#include <unordered_map>
-
 using namespace osp;
 
 BOOST_AUTO_TEST_SUITE(IsomorphismMapperTestSuite)
@@ -37,16 +36,20 @@ using constr_graph_t = computational_dag_vector_impl_def_t;
 BOOST_AUTO_TEST_CASE(Mapper_SimpleChain) {
     // Rep: 0 -> 1 -> 2
     constr_graph_t rep_graph;
-    rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(30,1,1);
-    rep_graph.add_edge(0, 1); rep_graph.add_edge(1, 2);
+    rep_graph.add_vertex(10, 1, 1);
+    rep_graph.add_vertex(20, 1, 1);
+    rep_graph.add_vertex(30, 1, 1);
+    rep_graph.add_edge(0, 1);
+    rep_graph.add_edge(1, 2);
     std::vector<vertex_idx_t<graph_t>> rep_map = {100, 101, 102};
 
     // Current: 2 -> 0 -> 1 (isomorphic, but different local IDs)
     constr_graph_t current_graph;
-    current_graph.add_vertex(20,1,1); // local 0 (work 20)
-    current_graph.add_vertex(30,1,1); // local 1 (work 30)
-    current_graph.add_vertex(10,1,1); // local 2 (work 10)
-    current_graph.add_edge(2, 0); current_graph.add_edge(0, 1);
+    current_graph.add_vertex(20, 1, 1);    // local 0 (work 20)
+    current_graph.add_vertex(30, 1, 1);    // local 1 (work 30)
+    current_graph.add_vertex(10, 1, 1);    // local 2 (work 10)
+    current_graph.add_edge(2, 0);
+    current_graph.add_edge(0, 1);
     std::vector<vertex_idx_t<graph_t>> current_map = {201, 202, 200};
 
     IsomorphismMapper<graph_t, constr_graph_t> mapper(rep_graph);
@@ -54,7 +57,7 @@ BOOST_AUTO_TEST_CASE(Mapper_SimpleChain) {
 
     // Translate local map to global map for the test
     std::unordered_map<vertex_idx_t<graph_t>, vertex_idx_t<graph_t>> result_map;
-    for(const auto& [curr_local, rep_local] : result_map_local) {
+    for (const auto &[curr_local, rep_local] : result_map_local) {
         result_map[current_map[curr_local]] = rep_map[rep_local];
     }
 
@@ -71,24 +74,33 @@ BOOST_AUTO_TEST_CASE(Mapper_SimpleChain) {
 BOOST_AUTO_TEST_CASE(Mapper_ForkJoin) {
     // Rep: 0 -> {1,2} -> 3
     constr_graph_t rep_graph;
-    rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(30,1,1);
-    rep_graph.add_edge(0,1); rep_graph.add_edge(0,2); rep_graph.add_edge(1,3); rep_graph.add_edge(2,3);
+    rep_graph.add_vertex(10, 1, 1);
+    rep_graph.add_vertex(20, 1, 1);
+    rep_graph.add_vertex(20, 1, 1);
+    rep_graph.add_vertex(30, 1, 1);
+    rep_graph.add_edge(0, 1);
+    rep_graph.add_edge(0, 2);
+    rep_graph.add_edge(1, 3);
+    rep_graph.add_edge(2, 3);
     std::vector<vertex_idx_t<graph_t>> rep_map = {10, 11, 12, 13};
 
     // Current: 3 -> {0,2} -> 1
     constr_graph_t current_graph;
-    current_graph.add_vertex(20,1,1); // local 0
-    current_graph.add_vertex(30,1,1); // local 1
-    current_graph.add_vertex(20,1,1); // local 2
-    current_graph.add_vertex(10,1,1); // local 3
-    current_graph.add_edge(3,0); current_graph.add_edge(3,2); current_graph.add_edge(0,1); current_graph.add_edge(2,1);
+    current_graph.add_vertex(20, 1, 1);    // local 0
+    current_graph.add_vertex(30, 1, 1);    // local 1
+    current_graph.add_vertex(20, 1, 1);    // local 2
+    current_graph.add_vertex(10, 1, 1);    // local 3
+    current_graph.add_edge(3, 0);
+    current_graph.add_edge(3, 2);
+    current_graph.add_edge(0, 1);
+    current_graph.add_edge(2, 1);
     std::vector<vertex_idx_t<graph_t>> current_map = {21, 23, 22, 20};
 
     IsomorphismMapper<graph_t, constr_graph_t> mapper(rep_graph);
     auto result_map_local = mapper.find_mapping(current_graph);
 
     std::unordered_map<vertex_idx_t<graph_t>, vertex_idx_t<graph_t>> result_map;
-    for(const auto& [curr_local, rep_local] : result_map_local) {
+    for (const auto &[curr_local, rep_local] : result_map_local) {
         result_map[current_map[curr_local]] = rep_map[rep_local];
     }
 
@@ -109,23 +121,29 @@ BOOST_AUTO_TEST_CASE(Mapper_ForkJoin) {
 BOOST_AUTO_TEST_CASE(Mapper_DisconnectedComponents) {
     // Rep: {0->1}, {2->3}. Two identical but disconnected components.
     constr_graph_t rep_graph;
-    rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); // 0, 1
-    rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); // 2, 3
-    rep_graph.add_edge(0,1); rep_graph.add_edge(2,3);
+    rep_graph.add_vertex(10, 1, 1);
+    rep_graph.add_vertex(20, 1, 1);    // 0, 1
+    rep_graph.add_vertex(10, 1, 1);
+    rep_graph.add_vertex(20, 1, 1);    // 2, 3
+    rep_graph.add_edge(0, 1);
+    rep_graph.add_edge(2, 3);
     std::vector<vertex_idx_t<graph_t>> rep_map = {10, 11, 12, 13};
 
     // Current: {2->3}, {0->1}. Same components, but different local IDs.
     constr_graph_t current_graph;
-    current_graph.add_vertex(10,1,1); current_graph.add_vertex(20,1,1); // 0, 1
-    current_graph.add_vertex(10,1,1); current_graph.add_vertex(20,1,1); // 2, 3
-    current_graph.add_edge(2,3); current_graph.add_edge(0,1);
+    current_graph.add_vertex(10, 1, 1);
+    current_graph.add_vertex(20, 1, 1);    // 0, 1
+    current_graph.add_vertex(10, 1, 1);
+    current_graph.add_vertex(20, 1, 1);    // 2, 3
+    current_graph.add_edge(2, 3);
+    current_graph.add_edge(0, 1);
     std::vector<vertex_idx_t<graph_t>> current_map = {22, 23, 20, 21};
 
     IsomorphismMapper<graph_t, constr_graph_t> mapper(rep_graph);
     auto result_map_local = mapper.find_mapping(current_graph);
 
     std::unordered_map<vertex_idx_t<graph_t>, vertex_idx_t<graph_t>> result_map;
-    for(const auto& [curr_local, rep_local] : result_map_local) {
+    for (const auto &[curr_local, rep_local] : result_map_local) {
         result_map[current_map[curr_local]] = rep_map[rep_local];
     }
 
@@ -137,14 +155,12 @@ BOOST_AUTO_TEST_CASE(Mapper_DisconnectedComponents) {
     // Mapping Option 1:
     // rep {10,11} -> current {20,21}
     // rep {12,13} -> current {22,23}
-    bool mapping1 = (result_map.at(20) == 12 && result_map.at(21) == 13 &&
-                     result_map.at(22) == 10 && result_map.at(23) == 11);
+    bool mapping1 = (result_map.at(20) == 12 && result_map.at(21) == 13 && result_map.at(22) == 10 && result_map.at(23) == 11);
 
     // Mapping Option 2:
     // rep {10,11} -> current {22,23}
     // rep {12,13} -> current {20,21}
-    bool mapping2 = (result_map.at(22) == 12 && result_map.at(23) == 13 &&
-                     result_map.at(20) == 10 && result_map.at(21) == 11);
+    bool mapping2 = (result_map.at(22) == 12 && result_map.at(23) == 13 && result_map.at(20) == 10 && result_map.at(21) == 11);
 
     BOOST_CHECK(mapping1 || mapping2);
 }
@@ -162,21 +178,23 @@ BOOST_AUTO_TEST_CASE(Mapper_MultiPipeline) {
     // Pipeline 1 (local IDs 0,1,2) corresponds to rep pipeline 2 (global 20,21,22)
     // Pipeline 2 (local IDs 3,4,5) corresponds to rep pipeline 1 (global 10,11,12)
     constr_graph_t current_graph;
-    current_graph.add_vertex(10,1,1); // local 0, stage 0
-    current_graph.add_vertex(20,1,1); // local 1, stage 1
-    current_graph.add_vertex(30,1,1); // local 2, stage 2
-    current_graph.add_vertex(10,1,1); // local 3, stage 0
-    current_graph.add_vertex(20,1,1); // local 4, stage 1
-    current_graph.add_vertex(30,1,1); // local 5, stage 2
-    current_graph.add_edge(0, 1); current_graph.add_edge(1, 2); // First pipeline
-    current_graph.add_edge(3, 4); current_graph.add_edge(4, 5); // Second pipeline
+    current_graph.add_vertex(10, 1, 1);    // local 0, stage 0
+    current_graph.add_vertex(20, 1, 1);    // local 1, stage 1
+    current_graph.add_vertex(30, 1, 1);    // local 2, stage 2
+    current_graph.add_vertex(10, 1, 1);    // local 3, stage 0
+    current_graph.add_vertex(20, 1, 1);    // local 4, stage 1
+    current_graph.add_vertex(30, 1, 1);    // local 5, stage 2
+    current_graph.add_edge(0, 1);
+    current_graph.add_edge(1, 2);    // First pipeline
+    current_graph.add_edge(3, 4);
+    current_graph.add_edge(4, 5);    // Second pipeline
     std::vector<vertex_idx_t<graph_t>> current_map = {120, 121, 122, 110, 111, 112};
 
     IsomorphismMapper<graph_t, constr_graph_t> mapper(rep_graph);
     auto result_map_local = mapper.find_mapping(current_graph);
 
     std::unordered_map<vertex_idx_t<graph_t>, vertex_idx_t<graph_t>> result_map;
-    for(const auto& [curr_local, rep_local] : result_map_local) {
+    for (const auto &[curr_local, rep_local] : result_map_local) {
         result_map[current_map[curr_local]] = rep_map[rep_local];
     }
 
@@ -185,12 +203,12 @@ BOOST_AUTO_TEST_CASE(Mapper_MultiPipeline) {
     // The two pipelines are symmetric, so the mapping can go either way.
 
     // Mapping Option 1: current pipeline 1 -> rep pipeline 1, current pipeline 2 -> rep pipeline 2
-    bool mapping1 = (result_map.at(110) == 10 && result_map.at(111) == 11 && result_map.at(112) == 12 &&
-                     result_map.at(120) == 20 && result_map.at(121) == 21 && result_map.at(122) == 22);
+    bool mapping1 = (result_map.at(110) == 10 && result_map.at(111) == 11 && result_map.at(112) == 12 && result_map.at(120) == 20
+                     && result_map.at(121) == 21 && result_map.at(122) == 22);
 
     // Mapping Option 2: current pipeline 1 -> rep pipeline 2, current pipeline 2 -> rep pipeline 1
-    bool mapping2 = (result_map.at(110) == 20 && result_map.at(111) == 21 && result_map.at(112) == 22 &&
-                     result_map.at(120) == 10 && result_map.at(121) == 11 && result_map.at(122) == 12);
+    bool mapping2 = (result_map.at(110) == 20 && result_map.at(111) == 21 && result_map.at(112) == 22 && result_map.at(120) == 10
+                     && result_map.at(121) == 11 && result_map.at(122) == 12);
 
     BOOST_CHECK(mapping1 || mapping2);
 }
@@ -211,18 +229,22 @@ BOOST_AUTO_TEST_CASE(Mapper_ShuffledSymmetric) {
     // A naive mapping of local IDs (0->0, 1->1, etc.) would be incorrect
     // because the work weights would not match.
     constr_graph_t current_graph;
-    current_graph.add_vertex(20,1,1); // local 0 (work 20, right)
-    current_graph.add_vertex(10,1,1); // local 1 (work 10, left)
-    current_graph.add_vertex(20,1,1); // local 2 (work 20, right)
-    current_graph.add_vertex(10,1,1); // local 3 (work 10, left)
-    current_graph.add_vertex(20,1,1); // local 4 (work 20, right)
-    current_graph.add_vertex(10,1,1); // local 5 (work 10, left)
+    current_graph.add_vertex(20, 1, 1);    // local 0 (work 20, right)
+    current_graph.add_vertex(10, 1, 1);    // local 1 (work 10, left)
+    current_graph.add_vertex(20, 1, 1);    // local 2 (work 20, right)
+    current_graph.add_vertex(10, 1, 1);    // local 3 (work 10, left)
+    current_graph.add_vertex(20, 1, 1);    // local 4 (work 20, right)
+    current_graph.add_vertex(10, 1, 1);    // local 5 (work 10, left)
     // Edges for {5,0} -> {3,2} -> {1,4}
-    current_graph.add_edge(5, 3); current_graph.add_edge(5, 2); // Rung 1
-    current_graph.add_edge(0, 3); current_graph.add_edge(0, 2);
+    current_graph.add_edge(5, 3);
+    current_graph.add_edge(5, 2);    // Rung 1
+    current_graph.add_edge(0, 3);
+    current_graph.add_edge(0, 2);
 
-    current_graph.add_edge(3, 1); current_graph.add_edge(3, 4); // Rung 2
-    current_graph.add_edge(2, 1); current_graph.add_edge(2, 4);
+    current_graph.add_edge(3, 1);
+    current_graph.add_edge(3, 4);    // Rung 2
+    current_graph.add_edge(2, 1);
+    current_graph.add_edge(2, 4);
 
     std::vector<vertex_idx_t<graph_t>> current_map = {111, 114, 113, 112, 115, 110};
 
@@ -230,15 +252,15 @@ BOOST_AUTO_TEST_CASE(Mapper_ShuffledSymmetric) {
     auto result_map_local = mapper.find_mapping(current_graph);
 
     std::unordered_map<vertex_idx_t<graph_t>, vertex_idx_t<graph_t>> result_map;
-    for(const auto& [curr_local, rep_local] : result_map_local) {
+    for (const auto &[curr_local, rep_local] : result_map_local) {
         result_map[current_map[curr_local]] = rep_map[rep_local];
     }
 
     BOOST_REQUIRE_EQUAL(result_map.size(), 6);
     // Check that structurally identical nodes are mapped, regardless of their original IDs.
     // E.g., current global 110 (from local 5, work 10) must map to a rep node with work 10.
-    BOOST_CHECK_EQUAL(result_map.at(110), 10); // current 5 (work 10) -> rep 0 (work 10)
-    BOOST_CHECK_EQUAL(result_map.at(111), 11); // current 0 (work 20) -> rep 1 (work 20)
+    BOOST_CHECK_EQUAL(result_map.at(110), 10);    // current 5 (work 10) -> rep 0 (work 10)
+    BOOST_CHECK_EQUAL(result_map.at(111), 11);    // current 0 (work 20) -> rep 1 (work 20)
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/iterators.cpp b/tests/iterators.cpp
index 04812cb8..55c9052e 100644
--- a/tests/iterators.cpp
+++ b/tests/iterators.cpp
@@ -24,10 +24,9 @@ limitations under the License.
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(integral_range_test) {
-
     integral_range<unsigned> range(0, 10);
     BOOST_CHECK_EQUAL(range.size(), 10);
-    
+
     int count = 0;
     for (auto it = range.begin(); it != range.end(); ++it) {
         BOOST_CHECK_EQUAL(*it, count);
@@ -52,7 +51,7 @@ BOOST_AUTO_TEST_CASE(integral_range_test) {
         --count;
     }
     BOOST_CHECK_EQUAL(count, -1);
-    
+
     count = 0;
     integral_range<unsigned> range2(10);
     BOOST_CHECK_EQUAL(range2.size(), 10);
@@ -69,8 +68,6 @@ BOOST_AUTO_TEST_CASE(integral_range_test) {
         --count;
     }
     BOOST_CHECK_EQUAL(count, -1);
-    
-
 
     count = 5;
     integral_range<unsigned> range3(5, 15);
@@ -87,7 +84,4 @@ BOOST_AUTO_TEST_CASE(integral_range_test) {
         --count;
     }
     BOOST_CHECK_EQUAL(count, 4);
-
-
-
 }
diff --git a/tests/kl.cpp b/tests/kl.cpp
index 773fe6b5..3a8a506f 100644
--- a/tests/kl.cpp
+++ b/tests/kl.cpp
@@ -20,32 +20,29 @@ limitations under the License.
 #include <boost/test/unit_test.hpp>
 #include <filesystem>
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "test_graphs.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "test_graphs.hpp"
 
 using namespace osp;
 
-template<typename Graph_t>
+template <typename Graph_t>
 void add_mem_weights(Graph_t &dag) {
-
     int mem_weight = 1;
     int comm_weight = 1;
 
     for (const auto &v : dag.vertices()) {
-
         dag.set_vertex_mem_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 3 + 1));
         dag.set_vertex_comm_weight(v, static_cast<v_commw_t<Graph_t>>(comm_weight++ % 3 + 1));
     }
 }
 
 BOOST_AUTO_TEST_CASE(kl_base_1) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -82,12 +79,11 @@ BOOST_AUTO_TEST_CASE(kl_base_1) {
 
     using kl_move = kl_move<graph>;
 
-    kl_total_comm_test<graph, no_local_search_memory_constraint, false> kl;    
+    kl_total_comm_test<graph, no_local_search_memory_constraint, false> kl;
 
     kl.test_setup_schedule(schedule);
 
-    auto &kl_current_schedule =
-        kl.get_current_schedule();
+    auto &kl_current_schedule = kl.get_current_schedule();
 
     BOOST_CHECK_EQUAL(kl_current_schedule.step_max_work[0], 44.0);
     BOOST_CHECK_EQUAL(kl_current_schedule.step_second_max_work[0], 0.0);
@@ -145,7 +141,6 @@ BOOST_AUTO_TEST_CASE(kl_base_1) {
 };
 
 BOOST_AUTO_TEST_CASE(kl_total_comm_test_1) {
-
     std::vector<std::string> filenames_graph = test_graphs();
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -161,18 +156,16 @@ BOOST_AUTO_TEST_CASE(kl_total_comm_test_1) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 
     for (auto &filename_graph : filenames_graph) {
-
         BspInstance<graph> instance;
 
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+        bool status_graph
+            = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
 
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
         instance.getArchitecture().setNumberOfProcessors(4);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -194,7 +187,6 @@ BOOST_AUTO_TEST_CASE(kl_total_comm_test_1) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_total_comm_test_2) {
-
     std::vector<std::string> filenames_graph = test_graphs();
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -210,18 +202,16 @@ BOOST_AUTO_TEST_CASE(kl_total_comm_test_2) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 
     for (auto &filename_graph : filenames_graph) {
-
         BspInstance<graph> instance;
 
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+        bool status_graph
+            = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
 
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
         instance.getArchitecture().setNumberOfProcessors(4);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -243,7 +233,6 @@ BOOST_AUTO_TEST_CASE(kl_total_comm_test_2) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_total_cut_test_1) {
-
     std::vector<std::string> filenames_graph = test_graphs();
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -259,17 +248,15 @@ BOOST_AUTO_TEST_CASE(kl_total_cut_test_1) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 
     for (auto &filename_graph : filenames_graph) {
-
         BspInstance<graph> instance;
 
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+        bool status_graph
+            = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
         instance.getArchitecture().setNumberOfProcessors(4);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -291,7 +278,6 @@ BOOST_AUTO_TEST_CASE(kl_total_cut_test_1) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_total_cut_test_2) {
-
     std::vector<std::string> filenames_graph = test_graphs();
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -307,17 +293,15 @@ BOOST_AUTO_TEST_CASE(kl_total_cut_test_2) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 
     for (auto &filename_graph : filenames_graph) {
-
         BspInstance<graph> instance;
 
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+        bool status_graph
+            = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
         instance.getArchitecture().setNumberOfProcessors(4);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -337,4 +321,3 @@ BOOST_AUTO_TEST_CASE(kl_total_cut_test_2) {
         BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true);
     }
 }
-
diff --git a/tests/kl_bsp_affinity_test.cpp b/tests/kl_bsp_affinity_test.cpp
index 9d67de8e..24418309 100644
--- a/tests/kl_bsp_affinity_test.cpp
+++ b/tests/kl_bsp_affinity_test.cpp
@@ -16,20 +16,20 @@ BOOST_AUTO_TEST_CASE(simple_parent_child_test) {
     using VertexType = graph::vertex_idx;
 
     graph dag;
-    const VertexType v0 = dag.add_vertex(10, 5, 2); // work=10, mem=5, comm=2
-    const VertexType v1 = dag.add_vertex(8, 4, 1);  // work=8, mem=4, comm=1
-    dag.add_edge(v0, v1, 3);                        // edge weight=3
+    const VertexType v0 = dag.add_vertex(10, 5, 2);    // work=10, mem=5, comm=2
+    const VertexType v1 = dag.add_vertex(8, 4, 1);     // work=8, mem=4, comm=1
+    dag.add_edge(v0, v1, 3);                           // edge weight=3
 
     BspArchitecture<graph> arch;
     arch.setNumberOfProcessors(2);
 
     BspInstance<graph> instance(dag, arch);
-    instance.setCommunicationCosts(10); // comm multiplier
+    instance.setCommunicationCosts(10);    // comm multiplier
     instance.setSynchronisationCosts(5);
 
     BspSchedule schedule(instance);
-    schedule.setAssignedProcessors({0, 1}); // v0 on p0, v1 on p1
-    schedule.setAssignedSupersteps({0, 1}); // v0 in step 0, v1 in step 1
+    schedule.setAssignedProcessors({0, 1});    // v0 on p0, v1 on p1
+    schedule.setAssignedSupersteps({0, 1});    // v0 in step 0, v1 in step 1
     schedule.updateNumberOfSupersteps();
 
     using comm_cost_t = kl_bsp_comm_cost_function<graph, double, no_local_search_memory_constraint>;
@@ -54,11 +54,11 @@ BOOST_AUTO_TEST_CASE(simple_parent_child_test) {
 /**
  * Helper to validate comm datastructures by comparing with freshly computed ones
  */
-template<typename Graph>
-bool validate_comm_datastructures(
-    const max_comm_datastructure<Graph, double, kl_active_schedule_t> &comm_ds_incremental,
-    kl_active_schedule_t &active_sched, const BspInstance<Graph> &instance, const std::string &context) {
-
+template <typename Graph>
+bool validate_comm_datastructures(const max_comm_datastructure<Graph, double, kl_active_schedule_t> &comm_ds_incremental,
+                                  kl_active_schedule_t &active_sched,
+                                  const BspInstance<Graph> &instance,
+                                  const std::string &context) {
     // 1. Clone Schedule
     BspSchedule<Graph> current_schedule(instance);
     active_sched.write_schedule(current_schedule);
@@ -122,12 +122,10 @@ bool validate_comm_datastructures(
 /**
  * Helper to validate affinity tables by comparing with freshly computed ones
  */
-template<typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size,
-         typename cost_t>
-bool validate_affinity_tables(
-    kl_improver_test<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t> &kl_incremental,
-    const BspInstance<Graph_t> &instance, const std::string &context) {
-
+template <typename Graph_t, typename comm_cost_function_t, typename MemoryConstraint_t, unsigned window_size, typename cost_t>
+bool validate_affinity_tables(kl_improver_test<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t> &kl_incremental,
+                              const BspInstance<Graph_t> &instance,
+                              const std::string &context) {
     // 1. Get current schedule from incremental
     BspSchedule<Graph_t> current_schedule(instance);
     kl_incremental.get_active_schedule_test(current_schedule);
@@ -138,12 +136,11 @@ bool validate_affinity_tables(
 
     // Get selected nodes from incremental
     std::vector<vertex_idx_t<Graph_t>> selected_nodes;
-    
+
     const size_t active_count = kl_incremental.get_affinity_table().size();
     for (size_t i = 0; i < active_count; ++i) {
         selected_nodes.push_back(kl_incremental.get_affinity_table().get_selected_nodes()[i]);
     }
-    
 
     std::cout << "\n  [" << context << "] Validating " << selected_nodes.size() << " selected nodes: { ";
     for (const auto n : selected_nodes) {
@@ -159,17 +156,17 @@ bool validate_affinity_tables(
     const unsigned num_steps = kl_incremental.get_active_schedule().num_steps();
 
     // 3. Compare affinity tables for each selected node
-   
-    for (const auto & node : selected_nodes) {    
-    
+
+    for (const auto &node : selected_nodes) {
         const auto &affinity_inc = kl_incremental.get_affinity_table().get_affinity_table(node);
         const auto &affinity_fresh = kl_fresh.get_affinity_table().get_affinity_table(node);
 
         unsigned node_step = kl_incremental.get_active_schedule().assigned_superstep(node);
 
         for (unsigned p = 0; p < num_procs; ++p) {
-            if (p >= affinity_inc.size() || p >= affinity_fresh.size())
+            if (p >= affinity_inc.size() || p >= affinity_fresh.size()) {
                 continue;
+            }
 
             for (unsigned idx = 0; idx < affinity_inc[p].size() && idx < affinity_fresh[p].size(); ++idx) {
                 int step_offset = static_cast<int>(idx) - static_cast<int>(window_size);
@@ -203,12 +200,12 @@ BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) {
     graph dag;
 
     // Create 6 vertices with specific comm weights
-    dag.add_vertex(1, 10, 1); // 0
-    dag.add_vertex(1, 1, 1);  // 1
-    dag.add_vertex(1, 5, 1);  // 2
-    dag.add_vertex(1, 1, 1);  // 3
-    dag.add_vertex(1, 2, 1);  // 4
-    dag.add_vertex(1, 1, 1);  // 5
+    dag.add_vertex(1, 10, 1);    // 0
+    dag.add_vertex(1, 1, 1);     // 1
+    dag.add_vertex(1, 5, 1);     // 2
+    dag.add_vertex(1, 1, 1);     // 3
+    dag.add_vertex(1, 2, 1);     // 4
+    dag.add_vertex(1, 1, 1);     // 5
 
     // Add edges
     dag.add_edge(0, 1, 1);
@@ -244,8 +241,8 @@ BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) {
     double after_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_tracked = kl.get_current_cost();
 
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "test_update_datastructure_after_move"));
+    BOOST_CHECK(validate_comm_datastructures(
+        kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_update_datastructure_after_move"));
     BOOST_CHECK_CLOSE(after_recomputed, after_tracked, 0.00001);
 }
 
@@ -253,10 +250,10 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) {
     graph dag;
 
     // Create a linear chain: 0 -> 1 -> 2 -> 3
-    dag.add_vertex(1, 10, 1); // 0
-    dag.add_vertex(1, 8, 1);  // 1
-    dag.add_vertex(1, 6, 1);  // 2
-    dag.add_vertex(1, 4, 1);  // 3
+    dag.add_vertex(1, 10, 1);    // 0
+    dag.add_vertex(1, 8, 1);     // 1
+    dag.add_vertex(1, 6, 1);     // 2
+    dag.add_vertex(1, 4, 1);     // 3
 
     dag.add_edge(0, 1, 1);
     dag.add_edge(1, 2, 1);
@@ -285,24 +282,24 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) {
 
     double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move1_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "test_multiple_sequential_moves_1"));
+    BOOST_CHECK(validate_comm_datastructures(
+        kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_1"));
     BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move2_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "test_multiple_sequential_moves_2"));
+    BOOST_CHECK(validate_comm_datastructures(
+        kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_2"));
     BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move3_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "test_multiple_sequential_moves_3"));
+    BOOST_CHECK(validate_comm_datastructures(
+        kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_3"));
     BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001);
 
     // After: Node 0 has 3 local children
@@ -315,10 +312,10 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) {
     graph dag;
 
     // Tree structure: Node 0 has three children (1, 2, 3)
-    dag.add_vertex(1, 1, 1); // 0
-    dag.add_vertex(1, 1, 1); // 1
-    dag.add_vertex(1, 1, 1); // 2
-    dag.add_vertex(1, 1, 1); // 3
+    dag.add_vertex(1, 1, 1);    // 0
+    dag.add_vertex(1, 1, 1);    // 1
+    dag.add_vertex(1, 1, 1);    // 2
+    dag.add_vertex(1, 1, 1);    // 3
 
     dag.add_edge(0, 1, 1);
     dag.add_edge(0, 2, 1);
@@ -348,24 +345,24 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) {
 
     double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move1_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "test_node_with_multiple_children"));
+    BOOST_CHECK(validate_comm_datastructures(
+        kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children"));
     BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move2_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "test_node_with_multiple_children_2"));
+    BOOST_CHECK(validate_comm_datastructures(
+        kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children_2"));
     BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move3_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "test_node_with_multiple_children_3"));
+    BOOST_CHECK(validate_comm_datastructures(
+        kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children_3"));
     BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001);
 
     // After: Node 0 has 3 local children
@@ -378,9 +375,9 @@ BOOST_AUTO_TEST_CASE(test_cross_step_moves) {
     graph dag;
 
     // 0 -> 1 -> 2
-    dag.add_vertex(1, 10, 1); // 0
-    dag.add_vertex(1, 8, 1);  // 1
-    dag.add_vertex(1, 6, 1);  // 2
+    dag.add_vertex(1, 10, 1);    // 0
+    dag.add_vertex(1, 8, 1);     // 1
+    dag.add_vertex(1, 6, 1);     // 2
 
     dag.add_edge(0, 1, 1);
     dag.add_edge(1, 2, 1);
@@ -408,8 +405,8 @@ BOOST_AUTO_TEST_CASE(test_cross_step_moves) {
 
     double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move1_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "test_cross_step_moves_1"));
+    BOOST_CHECK(validate_comm_datastructures(
+        kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_cross_step_moves_1"));
     BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001);
 }
 
@@ -423,8 +420,8 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario) {
     const auto v4 = dag.add_vertex(5, 6, 2);
     const auto v5 = dag.add_vertex(6, 5, 6);
     const auto v6 = dag.add_vertex(7, 4, 2);
-    dag.add_vertex(8, 3, 4);                 // v7 (index 6)
-    const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7)
+    dag.add_vertex(8, 3, 4);                    // v7 (index 6)
+    const auto v8 = dag.add_vertex(9, 2, 1);    // v8 (index 7)
 
     dag.add_edge(v1, v2, 2);
     dag.add_edge(v1, v3, 2);
@@ -436,7 +433,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario) {
     dag.add_edge(v4, v8, 9);
 
     BspArchitecture<graph> arch;
-    arch.setNumberOfProcessors(2); // P0, P1
+    arch.setNumberOfProcessors(2);    // P0, P1
     arch.setCommunicationCosts(1);
     arch.setSynchronisationCosts(1);
 
@@ -458,16 +455,14 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario) {
 
     double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move1_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "complex_move1"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1"));
     BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move2_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "complex_move2"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move2"));
     BOOST_CHECK(validate_affinity_tables(kl, instance, "complex_move2"));
     BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001);
 
@@ -475,24 +470,21 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario) {
 
     double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move3_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "complex_move3"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move3"));
     BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move4_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "complex_move4"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move4"));
     BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move5_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move5_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "complex_move5"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move5"));
     BOOST_CHECK_CLOSE(after_move5_recomputed, after_move5_tracked, 0.00001);
 }
 
@@ -505,8 +497,8 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     const auto v4 = dag.add_vertex(5, 6, 2);
     const auto v5 = dag.add_vertex(6, 5, 6);
     const auto v6 = dag.add_vertex(7, 4, 2);
-    const auto v7 = dag.add_vertex(8, 3, 4); // v7 (index 6)
-    const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7)
+    const auto v7 = dag.add_vertex(8, 3, 4);    // v7 (index 6)
+    const auto v8 = dag.add_vertex(9, 2, 1);    // v8 (index 7)
 
     dag.add_edge(v1, v2, 2);
     dag.add_edge(v1, v3, 2);
@@ -518,7 +510,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     dag.add_edge(v4, v8, 9);
 
     BspArchitecture<graph> arch;
-    arch.setNumberOfProcessors(2); // P0, P1
+    arch.setNumberOfProcessors(2);    // P0, P1
     arch.setCommunicationCosts(1);
     arch.setSynchronisationCosts(1);
 
@@ -538,8 +530,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     kl.insert_gain_heap_test({v1});
     kl.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "complex_move1"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1"));
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     kl_improver_test kl2;
@@ -548,8 +539,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     kl2.insert_gain_heap_test({v2});
     kl2.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance,
-                                             "complex_move2"));
+    BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance, "complex_move2"));
     BOOST_CHECK_CLOSE(kl2.get_comm_cost_f().compute_schedule_cost_test(), kl2.get_current_cost(), 0.00001);
 
     kl_improver_test kl3;
@@ -558,8 +548,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     kl3.insert_gain_heap_test({v3});
     kl3.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance,
-                                             "complex_move3"));
+    BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance, "complex_move3"));
     BOOST_CHECK_CLOSE(kl3.get_comm_cost_f().compute_schedule_cost_test(), kl3.get_current_cost(), 0.00001);
 
     kl_improver_test kl4;
@@ -568,8 +557,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     kl4.insert_gain_heap_test({v4});
     kl4.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance,
-                                             "complex_move4"));
+    BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance, "complex_move4"));
     BOOST_CHECK_CLOSE(kl4.get_comm_cost_f().compute_schedule_cost_test(), kl4.get_current_cost(), 0.00001);
 
     kl_improver_test kl5;
@@ -578,8 +566,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     kl5.insert_gain_heap_test({v5});
     kl5.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance,
-                                             "complex_move5"));
+    BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance, "complex_move5"));
     BOOST_CHECK_CLOSE(kl5.get_comm_cost_f().compute_schedule_cost_test(), kl5.get_current_cost(), 0.00001);
 
     kl_improver_test kl6;
@@ -588,8 +575,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     kl6.insert_gain_heap_test({v6});
     kl6.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance,
-                                             "complex_move6"));
+    BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance, "complex_move6"));
     BOOST_CHECK_CLOSE(kl6.get_comm_cost_f().compute_schedule_cost_test(), kl6.get_current_cost(), 0.00001);
 
     kl_improver_test kl7;
@@ -598,8 +584,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     kl7.insert_gain_heap_test({v7});
     kl7.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance,
-                                             "complex_move7"));
+    BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance, "complex_move7"));
     BOOST_CHECK_CLOSE(kl7.get_comm_cost_f().compute_schedule_cost_test(), kl7.get_current_cost(), 0.00001);
 
     kl_improver_test kl8;
@@ -608,8 +593,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) {
     kl8.insert_gain_heap_test({v8});
     kl8.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance,
-                                             "complex_move8"));
+    BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance, "complex_move8"));
     BOOST_CHECK_CLOSE(kl8.get_comm_cost_f().compute_schedule_cost_test(), kl8.get_current_cost(), 0.00001);
 }
 
@@ -622,8 +606,8 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     const auto v4 = dag.add_vertex(5, 6, 2);
     const auto v5 = dag.add_vertex(6, 5, 6);
     const auto v6 = dag.add_vertex(7, 4, 2);
-    const auto v7 = dag.add_vertex(8, 3, 4); // v7 (index 6)
-    const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7)
+    const auto v7 = dag.add_vertex(8, 3, 4);    // v7 (index 6)
+    const auto v8 = dag.add_vertex(9, 2, 1);    // v8 (index 7)
 
     dag.add_edge(v1, v2, 2);
     dag.add_edge(v1, v5, 2);
@@ -645,7 +629,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     dag.add_edge(v7, v8, 2);
 
     BspArchitecture<graph> arch;
-    arch.setNumberOfProcessors(2); // P0, P1
+    arch.setNumberOfProcessors(2);    // P0, P1
     arch.setCommunicationCosts(1);
     arch.setSynchronisationCosts(1);
 
@@ -665,8 +649,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     kl.insert_gain_heap_test({v1});
     kl.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "complex_move1"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1"));
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     kl_improver_test kl2;
@@ -675,8 +658,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     kl2.insert_gain_heap_test({v2});
     kl2.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance,
-                                             "complex_move2"));
+    BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance, "complex_move2"));
     BOOST_CHECK_CLOSE(kl2.get_comm_cost_f().compute_schedule_cost_test(), kl2.get_current_cost(), 0.00001);
 
     kl_improver_test kl3;
@@ -685,8 +667,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     kl3.insert_gain_heap_test({v3});
     kl3.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance,
-                                             "complex_move3"));
+    BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance, "complex_move3"));
     BOOST_CHECK_CLOSE(kl3.get_comm_cost_f().compute_schedule_cost_test(), kl3.get_current_cost(), 0.00001);
 
     kl_improver_test kl4;
@@ -695,8 +676,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     kl4.insert_gain_heap_test({v4});
     kl4.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance,
-                                             "complex_move4"));
+    BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance, "complex_move4"));
     BOOST_CHECK_CLOSE(kl4.get_comm_cost_f().compute_schedule_cost_test(), kl4.get_current_cost(), 0.00001);
 
     kl_improver_test kl5;
@@ -705,8 +685,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     kl5.insert_gain_heap_test({v5});
     kl5.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance,
-                                             "complex_move5"));
+    BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance, "complex_move5"));
     BOOST_CHECK_CLOSE(kl5.get_comm_cost_f().compute_schedule_cost_test(), kl5.get_current_cost(), 0.00001);
 
     kl_improver_test kl6;
@@ -715,8 +694,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     kl6.insert_gain_heap_test({v6});
     kl6.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance,
-                                             "complex_move6"));
+    BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance, "complex_move6"));
     BOOST_CHECK_CLOSE(kl6.get_comm_cost_f().compute_schedule_cost_test(), kl6.get_current_cost(), 0.00001);
 
     kl_improver_test kl7;
@@ -725,8 +703,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     kl7.insert_gain_heap_test({v7});
     kl7.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance,
-                                             "complex_move7"));
+    BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance, "complex_move7"));
     BOOST_CHECK_CLOSE(kl7.get_comm_cost_f().compute_schedule_cost_test(), kl7.get_current_cost(), 0.00001);
 
     kl_improver_test kl8;
@@ -735,8 +712,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) {
     kl8.insert_gain_heap_test({v8});
     kl8.run_inner_iteration_test();
 
-    BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance,
-                                             "complex_move8"));
+    BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance, "complex_move8"));
     BOOST_CHECK_CLOSE(kl8.get_comm_cost_f().compute_schedule_cost_test(), kl8.get_current_cost(), 0.00001);
 }
 
@@ -745,7 +721,7 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) {
     graph dag = osp::construct_grid_dag<graph>(5, 5);
 
     BspArchitecture<graph> arch;
-    arch.setNumberOfProcessors(4); // P0..P3
+    arch.setNumberOfProcessors(4);    // P0..P3
     arch.setCommunicationCosts(1);
     arch.setSynchronisationCosts(1);
 
@@ -791,32 +767,28 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) {
 
     double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move1_tracked = kl.get_current_cost();
-    BOOST_CHECK(
-        validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move1"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move1"));
     BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move2_tracked = kl.get_current_cost();
-    BOOST_CHECK(
-        validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move2"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move2"));
     BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move3_tracked = kl.get_current_cost();
-    BOOST_CHECK(
-        validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move3"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move3"));
     BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move4_tracked = kl.get_current_cost();
-    BOOST_CHECK(
-        validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move4"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move4"));
     BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001);
 }
 
@@ -867,32 +839,28 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) {
 
     double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move1_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "butterfly_move1"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move1"));
     BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move2_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "butterfly_move2"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move2"));
     BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move3_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "butterfly_move3"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move3"));
     BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move4_tracked = kl.get_current_cost();
-    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance,
-                                             "butterfly_move4"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move4"));
     BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001);
 }
 
@@ -937,31 +905,27 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) {
 
     double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move1_tracked = kl.get_current_cost();
-    BOOST_CHECK(
-        validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move1"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move1"));
     BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move2_tracked = kl.get_current_cost();
-    BOOST_CHECK(
-        validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move2"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move2"));
     BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move3_tracked = kl.get_current_cost();
-    BOOST_CHECK(
-        validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move3"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move3"));
     BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001);
 
     kl.run_inner_iteration_test();
 
     double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test();
     double after_move4_tracked = kl.get_current_cost();
-    BOOST_CHECK(
-        validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move4"));
+    BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move4"));
     BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001);
-}
\ No newline at end of file
+}
diff --git a/tests/kl_bsp_cost.cpp b/tests/kl_bsp_cost.cpp
index 05a5882c..01779f45 100644
--- a/tests/kl_bsp_cost.cpp
+++ b/tests/kl_bsp_cost.cpp
@@ -75,12 +75,12 @@ BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) {
     comm_ds.arrange_superstep_comm_data(step);
 
     BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10);
-    BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 1);  // Only proc 0 has 10
-    BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 8); // Next highest is 8 (from recv)
+    BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 1);     // Only proc 0 has 10
+    BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 8);    // Next highest is 8 (from recv)
 
     // Case 2: Shared Max
     comm_ds.reset_superstep(step);
-    comm_ds.step_proc_send(step, 0) = 10; // Need to re-set this as reset clears it
+    comm_ds.step_proc_send(step, 0) = 10;    // Need to re-set this as reset clears it
     comm_ds.step_proc_send(step, 1) = 10;
     comm_ds.step_proc_send(step, 2) = 2;
     comm_ds.step_proc_send(step, 3) = 1;
@@ -92,8 +92,8 @@ BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) {
     comm_ds.arrange_superstep_comm_data(step);
 
     BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10);
-    BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 2);  // Proc 0 and 1
-    BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 5); // Next highest is 5 (from recv)
+    BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 2);     // Proc 0 and 1
+    BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 5);    // Next highest is 5 (from recv)
 
     // Case 3: Max in Recv
     comm_ds.reset_superstep(step);
@@ -124,8 +124,8 @@ BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) {
     comm_ds.arrange_superstep_comm_data(step);
 
     BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10);
-    BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 8);  // 4 sends + 4 recvs
-    BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 0); // If all removed, 0.
+    BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 8);     // 4 sends + 4 recvs
+    BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 0);    // If all removed, 0.
 
     // Case 5: Max removed, second max is from same type (Send)
     comm_ds.reset_superstep(step);
@@ -134,8 +134,9 @@ BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) {
     comm_ds.step_proc_send(step, 2) = 2;
     comm_ds.step_proc_send(step, 3) = 1;
 
-    for (unsigned i = 0; i < 4; ++i)
+    for (unsigned i = 0; i < 4; ++i) {
         comm_ds.step_proc_receive(step, i) = 5;
+    }
 
     comm_ds.arrange_superstep_comm_data(step);
 
@@ -182,7 +183,7 @@ BOOST_AUTO_TEST_CASE(test_compute_comm_datastructures) {
 
     // Add edges
     // 0 -> 1
-    dag.add_edge(0, 1, 1); // Edge weight ignored by max_comm_datastructure
+    dag.add_edge(0, 1, 1);    // Edge weight ignored by max_comm_datastructure
     // 2 -> 3
     dag.add_edge(2, 3, 1);
     // 4 -> 5
@@ -258,11 +259,11 @@ BOOST_AUTO_TEST_CASE(test_compute_comm_datastructures) {
 /**
  * Helper to validate comm datastructures by comparing with freshly computed ones
  */
-template<typename Graph>
-bool validate_comm_datastructures(
-    const max_comm_datastructure<Graph, double, kl_active_schedule_t> &comm_ds_incremental,
-    kl_active_schedule_t &active_sched, const BspInstance<Graph> &instance, const std::string &context) {
-
+template <typename Graph>
+bool validate_comm_datastructures(const max_comm_datastructure<Graph, double, kl_active_schedule_t> &comm_ds_incremental,
+                                  kl_active_schedule_t &active_sched,
+                                  const BspInstance<Graph> &instance,
+                                  const std::string &context) {
     // 1. Clone Schedule
     BspSchedule<Graph> current_schedule(instance);
     active_sched.write_schedule(current_schedule);
@@ -327,12 +328,12 @@ BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) {
     graph dag;
 
     // Create 6 vertices with specific comm weights
-    dag.add_vertex(1, 10, 1); // 0
-    dag.add_vertex(1, 1, 1);  // 1
-    dag.add_vertex(1, 5, 1);  // 2
-    dag.add_vertex(1, 1, 1);  // 3
-    dag.add_vertex(1, 2, 1);  // 4
-    dag.add_vertex(1, 1, 1);  // 5
+    dag.add_vertex(1, 10, 1);    // 0
+    dag.add_vertex(1, 1, 1);     // 1
+    dag.add_vertex(1, 5, 1);     // 2
+    dag.add_vertex(1, 1, 1);     // 3
+    dag.add_vertex(1, 2, 1);     // 4
+    dag.add_vertex(1, 1, 1);     // 5
 
     // Add edges
     dag.add_edge(0, 1, 1);
@@ -413,10 +414,10 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) {
     graph dag;
 
     // Create a linear chain: 0 -> 1 -> 2 -> 3
-    dag.add_vertex(1, 10, 1); // 0
-    dag.add_vertex(1, 8, 1);  // 1
-    dag.add_vertex(1, 6, 1);  // 2
-    dag.add_vertex(1, 4, 1);  // 3
+    dag.add_vertex(1, 10, 1);    // 0
+    dag.add_vertex(1, 8, 1);     // 1
+    dag.add_vertex(1, 6, 1);     // 2
+    dag.add_vertex(1, 4, 1);     // 3
 
     dag.add_edge(0, 1, 1);
     dag.add_edge(1, 2, 1);
@@ -460,9 +461,9 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) {
     comm_ds.update_datastructure_after_move(move1, 0, 0);
     BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_1"));
 
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 8);    // Node 1 sends
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0);    // Node was moved away
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); // No receives at P0
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 8);       // Node 1 sends
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0);       // Node was moved away
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0);    // No receives at P0
 
     // Move 2: Move node 2 from P2 to P0 (chain more local)
     kl_move move2(2, 0.0, 2, 0, 0, 0);
@@ -471,9 +472,9 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) {
     BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_2"));
 
     // After move2: Nodes 0,1,2 all at P0, only 3 at P3
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 6);    // Only node 2 sends off-proc
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 2), 0);    // Node moved away
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 6); // P3 receives from node 2
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 6);       // Only node 2 sends off-proc
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 2), 0);       // Node moved away
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 6);    // P3 receives from node 2
 
     // Move 3: Move node 3 to P0 (everything local)
     kl_move move3(3, 0.0, 3, 0, 0, 0);
@@ -482,18 +483,18 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) {
     BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_3"));
 
     // After move3: All nodes at P0, all communication is local
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // All local
-    BOOST_CHECK_EQUAL(comm_ds.step_max_comm(0), 0);     // No communication cost
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0);    // All local
+    BOOST_CHECK_EQUAL(comm_ds.step_max_comm(0), 0);        // No communication cost
 }
 
 BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) {
     graph dag;
 
     // Tree structure: Node 0 has three children (1, 2, 3)
-    dag.add_vertex(1, 10, 1); // 0
-    dag.add_vertex(1, 1, 1);  // 1
-    dag.add_vertex(1, 1, 1);  // 2
-    dag.add_vertex(1, 1, 1);  // 3
+    dag.add_vertex(1, 10, 1);    // 0
+    dag.add_vertex(1, 1, 1);     // 1
+    dag.add_vertex(1, 1, 1);     // 2
+    dag.add_vertex(1, 1, 1);     // 3
 
     dag.add_edge(0, 1, 1);
     dag.add_edge(0, 2, 1);
@@ -538,7 +539,7 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) {
     // After: Node 0 has 1 local child, 2 off-proc (P2, P3)
     // Send cost = 10 * 2 = 20
     BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 20);
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0); // No longer receives
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0);    // No longer receives
     BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 10);
     BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 10);
 
@@ -550,7 +551,7 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) {
     // After: Node 0 has 2 local children, 1 off-proc (P3)
     // Send cost = 10 * 1 = 10
     BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 10);
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 0); // No longer receives
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 0);    // No longer receives
     BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 10);
 
     // Move child 3 to P0 (all local)
@@ -562,16 +563,16 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) {
     // After: Node 0 has 3 local children
     // Send cost = 10 * 0 = 0 (all local)
     BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0);
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 0); // No longer receives
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 0);    // No longer receives
 }
 
 BOOST_AUTO_TEST_CASE(test_cross_step_moves) {
     graph dag;
 
     // 0 -> 1 -> 2
-    dag.add_vertex(1, 10, 1); // 0
-    dag.add_vertex(1, 8, 1);  // 1
-    dag.add_vertex(1, 6, 1);  // 2
+    dag.add_vertex(1, 10, 1);    // 0
+    dag.add_vertex(1, 8, 1);     // 1
+    dag.add_vertex(1, 6, 1);     // 2
 
     dag.add_edge(0, 1, 1);
     dag.add_edge(1, 2, 1);
@@ -610,17 +611,17 @@ BOOST_AUTO_TEST_CASE(test_cross_step_moves) {
     kl_sched.apply_move(move1, active_schedule_data);
     comm_ds.update_datastructure_after_move(move1, 0, 2);
 
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0);    // Local (same processor)
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); // No receive needed
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0);       // Local (same processor)
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0);    // No receive needed
 
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); // Local (same processor)
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0); // Node moved away
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0);    // Local (same processor)
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0);    // Node moved away
 
     kl_move move2(1, 0.0, 0, 1, 0, 0);
     kl_sched.apply_move(move2, active_schedule_data);
     comm_ds.update_datastructure_after_move(move2, 0, 2);
 
-    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // All local at P0
+    BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0);    // All local at P0
 }
 
 BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) {
@@ -634,8 +635,8 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) {
     const auto v4 = dag.add_vertex(5, 6, 2);
     const auto v5 = dag.add_vertex(6, 5, 6);
     const auto v6 = dag.add_vertex(7, 4, 2);
-    dag.add_vertex(8, 3, 4);                 // v7 (index 6)
-    const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7)
+    dag.add_vertex(8, 3, 4);                    // v7 (index 6)
+    const auto v8 = dag.add_vertex(9, 2, 1);    // v8 (index 7)
 
     // Edges
     dag.add_edge(v1, v2, 2);
@@ -648,7 +649,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) {
     dag.add_edge(v4, v8, 9);
 
     BspArchitecture<graph> arch;
-    arch.setNumberOfProcessors(2); // P0, P1
+    arch.setNumberOfProcessors(2);    // P0, P1
     arch.setCommunicationCosts(1);
     arch.setSynchronisationCosts(1);
 
@@ -756,7 +757,7 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) {
     graph dag = osp::construct_grid_dag<graph>(5, 5);
 
     BspArchitecture<graph> arch;
-    arch.setNumberOfProcessors(4); // P0..P3
+    arch.setNumberOfProcessors(4);    // P0..P3
     arch.setCommunicationCosts(1);
     arch.setSynchronisationCosts(1);
 
@@ -1187,7 +1188,7 @@ BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) {
         BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0);
 
         using kl_move = osp::kl_move_struct<double, graph::vertex_idx>;
-        kl_move move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1
+        kl_move move(1, 0.0, 1, 2, 1, 3);    // Node 1, Step 2->3, Proc 1->1
         kl_sched.apply_move(move, active_schedule_data);
         comm_ds.update_datastructure_after_move(move, 0, 4);
 
@@ -1254,7 +1255,7 @@ BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) {
         BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0);
 
         using kl_move = osp::kl_move_struct<double, graph::vertex_idx>;
-        kl_move move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1
+        kl_move move(1, 0.0, 1, 2, 1, 3);    // Node 1, Step 2->3, Proc 1->1
         kl_sched.apply_move(move, active_schedule_data);
         comm_ds.update_datastructure_after_move(move, 0, 4);
 
@@ -1271,4 +1272,4 @@ BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) {
         BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 1), 0);
         BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0);
     }
-}
\ No newline at end of file
+}
diff --git a/tests/kl_bsp_improver_test.cpp b/tests/kl_bsp_improver_test.cpp
index 6e1611ec..b0a0a3db 100644
--- a/tests/kl_bsp_improver_test.cpp
+++ b/tests/kl_bsp_improver_test.cpp
@@ -17,14 +17,12 @@
 
 using namespace osp;
 
-template<typename Graph_t>
+template <typename Graph_t>
 void add_mem_weights(Graph_t &dag) {
-
     int mem_weight = 1;
     int comm_weight = 7;
 
     for (const auto &v : dag.vertices()) {
-
         dag.set_vertex_work_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 10 + 2));
         dag.set_vertex_mem_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 10 + 2));
         dag.set_vertex_comm_weight(v, static_cast<v_commw_t<Graph_t>>(comm_weight++ % 10 + 2));
@@ -32,7 +30,6 @@ void add_mem_weights(Graph_t &dag) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -247,4 +244,4 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) {
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         BOOST_CHECK_EQUAL(schedule_cs_2.satisfiesPrecedenceConstraints(), true);
 //     }
-// }
\ No newline at end of file
+// }
diff --git a/tests/kl_lambda.cpp b/tests/kl_lambda.cpp
index 31f86130..8090fa1d 100644
--- a/tests/kl_lambda.cpp
+++ b/tests/kl_lambda.cpp
@@ -20,14 +20,12 @@ limitations under the License.
 #include <boost/test/unit_test.hpp>
 #include <filesystem>
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp"
-
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp"
@@ -36,21 +34,19 @@ limitations under the License.
 
 using namespace osp;
 
-template<typename Graph_t>
+template <typename Graph_t>
 void add_mem_weights(Graph_t &dag) {
-
     int mem_weight = 1;
     int comm_weight = 7;
 
     for (const auto &v : dag.vertices()) {
-
         dag.set_vertex_work_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 10 + 2));
         dag.set_vertex_mem_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 10 + 2));
         dag.set_vertex_comm_weight(v, static_cast<v_commw_t<Graph_t>>(comm_weight++ % 10 + 2));
     }
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void add_node_types(Graph_t &dag) {
     unsigned node_type = 0;
 
@@ -59,46 +55,50 @@ void add_node_types(Graph_t &dag) {
     }
 }
 
-template<typename table_t>
+template <typename table_t>
 void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::set<size_t> &nodes) {
-
     for (auto i : nodes) {
         BOOST_CHECK_EQUAL(table_1[i].size(), table_2[i].size());
-        if (table_1[i].size() != table_2[i].size())
+        if (table_1[i].size() != table_2[i].size()) {
             continue;
+        }
         for (size_t j = 0; j < table_1[i].size(); ++j) {
             BOOST_CHECK_EQUAL(table_1[i][j].size(), table_2[i][j].size());
-            if (table_1[i][j].size() != table_2[i][j].size())
+            if (table_1[i][j].size() != table_2[i][j].size()) {
                 continue;
+            }
             for (size_t k = 0; k < table_1[i][j].size(); ++k) {
                 BOOST_CHECK(std::abs(table_1[i][j][k] - table_2[i][j][k]) < 0.000001);
 
                 if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) {
-                    std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] << ", table_2=" << table_2[i][j][k] << std::endl;
+                    std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k]
+                              << ", table_2=" << table_2[i][j][k] << std::endl;
                 }
             }
         }
     }
 }
 
-void check_equal_lambda_map(const std::vector<std::map<unsigned, unsigned>> &map_1, const std::vector<std::map<unsigned, unsigned>> &map_2) {
+void check_equal_lambda_map(const std::vector<std::map<unsigned, unsigned>> &map_1,
+                            const std::vector<std::map<unsigned, unsigned>> &map_2) {
     BOOST_CHECK_EQUAL(map_1.size(), map_2.size());
-    if (map_1.size() != map_2.size())
+    if (map_1.size() != map_2.size()) {
         return;
+    }
 
     for (size_t i = 0; i < map_1.size(); ++i) {
         for (const auto &[key, value] : map_1[i]) {
             BOOST_CHECK_EQUAL(value, map_2[i].at(key));
 
             if (value != map_2[i].at(key)) {
-                std::cout << "Mismatch at [" << i << "][" << key << "]: map_1=" << value << ", map_2=" << map_2[i].at(key) << std::endl;
+                std::cout << "Mismatch at [" << i << "][" << key << "]: map_1=" << value << ", map_2=" << map_2[i].at(key)
+                          << std::endl;
             }
         }
     }
 }
 
 BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) {
-
     std::vector<std::string> filenames_graph = test_graphs();
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -114,18 +114,16 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 
     for (auto &filename_graph : filenames_graph) {
-
         BspInstance<graph> instance;
 
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                              instance.getComputationalDag());
+        bool status_graph
+            = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
 
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
         instance.getArchitecture().setNumberOfProcessors(4);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -158,7 +156,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
-
     std::vector<std::string> filenames_graph = test_graphs();
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -174,18 +171,16 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 
     for (auto &filename_graph : filenames_graph) {
-
         BspInstance<graph> instance;
 
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                              instance.getComputationalDag());
+        bool status_graph
+            = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
 
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
         instance.getArchitecture().setNumberOfProcessors(4);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -495,7 +490,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
 // };
 
 BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -552,7 +546,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) {
 
     auto node_selection = kl.insert_gain_heap_test_penalty({2, 3});
 
-    auto recompute_max_gain = kl.run_inner_iteration_test(); // best move 3
+    auto recompute_max_gain = kl.run_inner_iteration_test();    // best move 3
     std::cout << "------------------------recompute max_gain: { ";
     for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
@@ -561,7 +555,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) {
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
-    recompute_max_gain = kl.run_inner_iteration_test(); // best move 0
+    recompute_max_gain = kl.run_inner_iteration_test();    // best move 0
     std::cout << "recompute max_gain: { ";
     for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
@@ -570,7 +564,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) {
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
-    recompute_max_gain = kl.run_inner_iteration_test(); // best move 1
+    recompute_max_gain = kl.run_inner_iteration_test();    // best move 1
     std::cout << "recompute max_gain: { ";
     for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
@@ -590,7 +584,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -732,7 +725,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 
 //         schedule.updateNumberOfSupersteps();
 
-//         std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " <<
+//         schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BspSchedule<graph> schedule_2(schedule);
 
@@ -746,7 +740,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //         auto finish_time = std::chrono::high_resolution_clock::now();
 //         auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
-//         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with "
+//         << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true);
@@ -759,7 +754,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 
 //         // duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
-//         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with "
+//         << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true);
@@ -808,7 +804,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 
 //         schedule.updateNumberOfSupersteps();
 
-//         std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " <<
+//         schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BspSchedule<graph> schedule_2(schedule);
 
@@ -822,7 +819,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //         auto finish_time = std::chrono::high_resolution_clock::now();
 //         auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
-//         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with "
+//         << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true);
@@ -835,10 +833,11 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 
 //         // duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
-//         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with "
+//         << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true);
 
 //     }
-// }
\ No newline at end of file
+// }
diff --git a/tests/kl_mem_constr.cpp b/tests/kl_mem_constr.cpp
index 7f4c0ef4..1e2de362 100644
--- a/tests/kl_mem_constr.cpp
+++ b/tests/kl_mem_constr.cpp
@@ -20,33 +20,30 @@ limitations under the License.
 #include <boost/test/unit_test.hpp>
 #include <filesystem>
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
-#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "test_graphs.hpp"
+#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "test_graphs.hpp"
 
 using namespace osp;
 
-template<typename Graph_t>
+template <typename Graph_t>
 void add_mem_weights(Graph_t &dag) {
-
     int mem_weight = 1;
     int comm_weight = 1;
 
     for (const auto &v : dag.vertices()) {
-
         dag.set_vertex_mem_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 3 + 1));
         dag.set_vertex_comm_weight(v, static_cast<v_commw_t<Graph_t>>(comm_weight++ % 3 + 1));
     }
 }
 
 BOOST_AUTO_TEST_CASE(kl_local_memconst) {
-
     std::vector<std::string> filenames_graph = test_graphs();
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -62,12 +59,11 @@ BOOST_AUTO_TEST_CASE(kl_local_memconst) {
     GreedyBspScheduler<graph, local_memory_constraint<graph>> test_scheduler;
 
     for (auto &filename_graph : filenames_graph) {
-
         std::cout << filename_graph << std::endl;
         BspInstance<graph> instance;
 
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+        bool status_graph
+            = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
         instance.getArchitecture().setSynchronisationCosts(10);
         instance.getArchitecture().setCommunicationCosts(5);
         instance.getArchitecture().setNumberOfProcessors(4);
@@ -79,13 +75,11 @@ BOOST_AUTO_TEST_CASE(kl_local_memconst) {
         add_mem_weights(instance.getComputationalDag());
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
 
         for (const auto &bound : bounds_to_test) {
-
             instance.getArchitecture().setMemoryBound(bound);
 
             BspSchedule<graph> schedule(instance);
@@ -104,4 +98,4 @@ BOOST_AUTO_TEST_CASE(kl_local_memconst) {
             BOOST_CHECK(schedule.satisfiesMemoryConstraints());
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/kl_total.cpp b/tests/kl_total.cpp
index 58421144..21b565c0 100644
--- a/tests/kl_total.cpp
+++ b/tests/kl_total.cpp
@@ -20,8 +20,6 @@ limitations under the License.
 #include <boost/test/unit_test.hpp>
 #include <filesystem>
 
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
-
 #include "osp/auxiliary/io/arch_file_reader.hpp"
 #include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
@@ -33,21 +31,19 @@ limitations under the License.
 
 using namespace osp;
 
-template<typename Graph_t>
+template <typename Graph_t>
 void add_mem_weights(Graph_t &dag) {
-
     int mem_weight = 1;
     int comm_weight = 7;
 
     for (const auto &v : dag.vertices()) {
-
         dag.set_vertex_work_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 10 + 2));
         dag.set_vertex_mem_weight(v, static_cast<v_memw_t<Graph_t>>(mem_weight++ % 10 + 2));
         dag.set_vertex_comm_weight(v, static_cast<v_commw_t<Graph_t>>(comm_weight++ % 10 + 2));
     }
 }
 
-template<typename table_t>
+template <typename table_t>
 void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::set<size_t> &nodes) {
     BOOST_CHECK_EQUAL(table_1.size(), table_2.size());
 
@@ -57,7 +53,8 @@ void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::s
                 BOOST_CHECK(std::abs(table_1[i][j][k] - table_2[i][j][k]) < 0.000001);
 
                 if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) {
-                    std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] << ", table_2=" << table_2[i][j][k] << std::endl;
+                    std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k]
+                              << ", table_2=" << table_2[i][j][k] << std::endl;
                 }
             }
         }
@@ -65,7 +62,6 @@ void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::s
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_smoke_test) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -110,7 +106,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_smoke_test) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) {
-
     std::vector<std::string> filenames_graph = test_graphs();
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -126,18 +121,16 @@ BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) {
     GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 
     for (auto &filename_graph : filenames_graph) {
-
         BspInstance<graph> instance;
 
-        bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                              instance.getComputationalDag());
+        bool status_graph
+            = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
 
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
         instance.getArchitecture().setNumberOfProcessors(4);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         }
@@ -161,7 +154,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_superstep_removal_test) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -209,7 +201,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_superstep_removal_test) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -320,7 +311,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_penalty_test) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -405,7 +395,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_penalty_test) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -435,7 +424,7 @@ BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) {
 
     BspSchedule schedule(instance);
 
-    schedule.setAssignedProcessors({0, 1, 0, 0, 1, 0, 0, 1}); // v1->v2 is on same step, different procs
+    schedule.setAssignedProcessors({0, 1, 0, 0, 1, 0, 0, 1});    // v1->v2 is on same step, different procs
     schedule.setAssignedSupersteps({0, 0, 2, 1, 2, 2, 3, 3});
 
     schedule.updateNumberOfSupersteps();
@@ -456,7 +445,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_base_1) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -521,8 +509,8 @@ BOOST_AUTO_TEST_CASE(kl_base_1) {
 
     kl.apply_move_test(move_2);
 
-    BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 39.0);       // 42-3
-    BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 5.0); // 2+3
+    BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 39.0);          // 42-3
+    BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 5.0);    // 2+3
     BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 1);
     BOOST_CHECK_EQUAL(kl.is_feasible(), false);
     BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001);
@@ -551,7 +539,6 @@ BOOST_AUTO_TEST_CASE(kl_base_1) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_base_2) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -713,7 +700,6 @@ BOOST_AUTO_TEST_CASE(kl_base_2) {
 }
 
 BOOST_AUTO_TEST_CASE(kl_base_3) {
-
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
 
@@ -994,7 +980,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 
 //         schedule.updateNumberOfSupersteps();
 
-//         std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps()
+//         << " number of supersteps"<< std::endl;
 
 //         BspSchedule<graph> schedule_2(schedule);
 
@@ -1010,7 +997,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 
 //         auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
-//         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " <<
+//         schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true);
@@ -1023,7 +1011,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 
 //         // duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
-//         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with "
+//         << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true);
@@ -1073,7 +1062,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 
 //         schedule.updateNumberOfSupersteps();
 
-//         std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps()
+//         << " number of supersteps"<< std::endl;
 
 //         BspSchedule<graph> schedule_2(schedule);
 
@@ -1089,7 +1079,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 
 //         auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
-//         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " <<
+//         schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true);
@@ -1102,10 +1093,11 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 
 //         // duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
-//         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
+//         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with "
+//         << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true);
 
 //     }
-// }
\ No newline at end of file
+// }
diff --git a/tests/kl_util.cpp b/tests/kl_util.cpp
index a275f425..0057779f 100644
--- a/tests/kl_util.cpp
+++ b/tests/kl_util.cpp
@@ -17,15 +17,15 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE kl_util
+#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp"
+
 #include <boost/test/unit_test.hpp>
 #include <numeric>
 #include <set>
 
-
-#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp"
+#include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/bsp/model/BspSchedule.hpp"
 
 using namespace osp;
 using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -39,7 +39,7 @@ struct ScheduleFixture {
 
     ScheduleFixture() : schedule(instance) {
         // Setup a simple graph and schedule
-        auto& dag = instance.getComputationalDag();
+        auto &dag = instance.getComputationalDag();
         for (int i = 0; i < 20; ++i) {
             dag.add_vertex(i + 1, i + 1, i + 1);
         }
@@ -83,7 +83,7 @@ BOOST_AUTO_TEST_CASE(reward_penalty_strategy_test) {
 }
 
 // Tests for lock managers
-template<typename LockManager>
+template <typename LockManager>
 void test_lock_manager() {
     LockManager lm;
     lm.initialize(10);
@@ -123,7 +123,7 @@ BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) {
     BOOST_CHECK_EQUAL(table.size(), 1);
     BOOST_CHECK(table.is_selected(0));
     BOOST_CHECK(!table.is_selected(1));
-    BOOST_CHECK(!table.insert(0)); // already present
+    BOOST_CHECK(!table.insert(0));    // already present
 
     // Remove
     table.remove(0);
@@ -151,13 +151,13 @@ BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) {
     // After trim, the gaps should be filled.
     std::set<unsigned> expected_selected = {0, 1, 2, 4, 6, 8, 9};
     std::set<unsigned> actual_selected;
-    const auto& selected_nodes_vec = table.get_selected_nodes();
-    for(size_t i = 0; i < table.size(); ++i) {
+    const auto &selected_nodes_vec = table.get_selected_nodes();
+    for (size_t i = 0; i < table.size(); ++i) {
         actual_selected.insert(static_cast<unsigned>(selected_nodes_vec[i]));
     }
     BOOST_CHECK(expected_selected == actual_selected);
 
-    for(unsigned i = 0; i < 20; ++i) {
+    for (unsigned i = 0; i < 20; ++i) {
         if (expected_selected.count(i)) {
             BOOST_CHECK(table.is_selected(i));
         } else {
@@ -166,7 +166,7 @@ BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) {
     }
 
     // Check that indices are correct
-    for(size_t i = 0; i < table.size(); ++i) {
+    for (size_t i = 0; i < table.size(); ++i) {
         BOOST_CHECK_EQUAL(table.get_selected_nodes_idx(selected_nodes_vec[i]), i);
     }
 
@@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) {
 BOOST_AUTO_TEST_CASE(static_affinity_table_test) {
     using affinity_table_t = static_affinity_table<graph, double, kl_active_schedule_t, 1>;
     affinity_table_t table;
-    table.initialize(active_schedule, 0); // size is ignored
+    table.initialize(active_schedule, 0);    // size is ignored
 
     BOOST_CHECK_EQUAL(table.size(), 0);
 
@@ -190,7 +190,7 @@ BOOST_AUTO_TEST_CASE(static_affinity_table_test) {
     BOOST_CHECK_EQUAL(table.size(), 1);
     BOOST_CHECK(table.is_selected(0));
     BOOST_CHECK(!table.is_selected(1));
-    table.insert(0); // should be a no-op on size
+    table.insert(0);    // should be a no-op on size
     BOOST_CHECK_EQUAL(table.size(), 1);
 
     // Remove
@@ -210,12 +210,11 @@ BOOST_AUTO_TEST_CASE(static_affinity_table_test) {
     BOOST_CHECK(!table.is_selected(0));
 }
 
-
 // Tests for vertex_selection_strategy
 BOOST_AUTO_TEST_CASE(vertex_selection_strategy_test) {
     using affinity_table_t = adaptive_affinity_table<graph, double, kl_active_schedule_t, 1>;
     using selection_strategy_t = vertex_selection_strategy<graph, affinity_table_t, kl_active_schedule_t>;
-    
+
     selection_strategy_t strategy;
     std::mt19937 gen(0);
     const unsigned end_step = active_schedule.num_steps() - 1;
@@ -239,7 +238,7 @@ BOOST_AUTO_TEST_CASE(vertex_selection_strategy_test) {
 
     strategy.select_nodes_permutation_threshold(15, table);
     BOOST_CHECK_EQUAL(table.size(), 20);
-    BOOST_CHECK_EQUAL(strategy.permutation_idx, 0); // should wrap around and reshuffle
+    BOOST_CHECK_EQUAL(strategy.permutation_idx, 0);    // should wrap around and reshuffle
 
     table.reset_node_selection();
     strategy.max_work_counter = 0;
@@ -276,7 +275,7 @@ BOOST_AUTO_TEST_CASE(kl_move_struct_test) {
 }
 
 BOOST_AUTO_TEST_CASE(work_datastructures_initialization_test) {
-    auto& wd = active_schedule.work_datastructures;
+    auto &wd = active_schedule.work_datastructures;
 
     // Step 0: node 0 on proc 0, work 1. Other procs have 0 work.
     BOOST_CHECK_EQUAL(wd.step_proc_work(0, 0), 1);
@@ -298,13 +297,13 @@ BOOST_AUTO_TEST_CASE(work_datastructures_initialization_test) {
 }
 
 BOOST_AUTO_TEST_CASE(work_datastructures_apply_move_test) {
-    auto& wd = active_schedule.work_datastructures;
+    auto &wd = active_schedule.work_datastructures;
     using kl_move = kl_move_struct<double, VertexType>;
 
     // Move within same superstep
     // Move node 0 (work 1) from proc 0 to proc 3 in step 0
     kl_move move1(0, 0.0, 0, 0, 3, 0);
-    wd.apply_move(move1, 1); // work_weight of node 0 is 1
+    wd.apply_move(move1, 1);    // work_weight of node 0 is 1
 
     // Before: {1,0,0,0}, After: {0,0,0,1}
     BOOST_CHECK_EQUAL(wd.step_proc_work(0, 0), 0);
@@ -318,7 +317,7 @@ BOOST_AUTO_TEST_CASE(work_datastructures_apply_move_test) {
     // Move to different superstep
     // Move node 4 (work 5) from proc 0, step 4 to proc 1, step 0
     kl_move move2(4, 0.0, 0, 4, 1, 0);
-    wd.apply_move(move2, 5); // work_weight of node 4 is 5
+    wd.apply_move(move2, 5);    // work_weight of node 4 is 5
 
     // Step 0 state after move1: {0,0,0,1}. max=1
     // After move2: {0,5,0,1}. max=5
@@ -338,7 +337,7 @@ BOOST_AUTO_TEST_CASE(work_datastructures_apply_move_test) {
     BOOST_CHECK_EQUAL(wd.step_proc_work(4, 3), 0);
     BOOST_CHECK_EQUAL(wd.step_max_work(4), 0);
     BOOST_CHECK_EQUAL(wd.step_second_max_work(4), 0);
-    BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[4], 3); // All 4 procs have work 0, so count is 3.
+    BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[4], 3);    // All 4 procs have work 0, so count is 3.
 }
 
 BOOST_AUTO_TEST_CASE(active_schedule_initialization_test) {
@@ -376,11 +375,11 @@ BOOST_AUTO_TEST_CASE(active_schedule_compute_violations_test) {
     thread_data_t thread_data;
 
     // Manually create a violation
-    schedule.setAssignedSuperstep(1, 0); // node 1 is now in step 0 (was 1)
-    schedule.setAssignedSuperstep(0, 1); // node 0 is now in step 1 (was 0)
+    schedule.setAssignedSuperstep(1, 0);    // node 1 is now in step 0 (was 1)
+    schedule.setAssignedSuperstep(0, 1);    // node 0 is now in step 1 (was 0)
     // Now we have a violation for edge 0 -> 1, since step(0) > step(1)
     active_schedule.initialize(schedule);
-    
+
     active_schedule.compute_violations(thread_data);
 
     BOOST_CHECK(!thread_data.feasible);
@@ -390,7 +389,7 @@ BOOST_AUTO_TEST_CASE(active_schedule_compute_violations_test) {
 BOOST_AUTO_TEST_CASE(active_schedule_revert_moves_test) {
     using kl_move = kl_move_struct<double, VertexType>;
     using thread_data_t = thread_local_active_schedule_data<graph, double>;
-    
+
     kl_active_schedule_t original_schedule;
     original_schedule.initialize(schedule);
 
@@ -406,7 +405,7 @@ BOOST_AUTO_TEST_CASE(active_schedule_revert_moves_test) {
     BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(1), 1);
 
     struct dummy_comm_ds {
-        void update_datastructure_after_move(const kl_move&, unsigned, unsigned) {}
+        void update_datastructure_after_move(const kl_move &, unsigned, unsigned) {}
     } comm_ds;
 
     // Revert both moves
@@ -426,46 +425,46 @@ BOOST_AUTO_TEST_CASE(active_schedule_revert_to_best_schedule_test) {
     thread_data.initialize_cost(100);
 
     // Apply 3 moves
-    kl_move move1(0, 0.0, 0, 0, 1, 0); // node 0 from (p0,s0) to (p1,s0)
+    kl_move move1(0, 0.0, 0, 0, 1, 0);    // node 0 from (p0,s0) to (p1,s0)
     active_schedule.apply_move(move1, thread_data);
-    thread_data.update_cost(-10); // cost 90
+    thread_data.update_cost(-10);    // cost 90
 
-    kl_move move2(1, 0.0, 1, 1, 2, 1); // node 1 from (p1,s1) to (p2,s1)
+    kl_move move2(1, 0.0, 1, 1, 2, 1);    // node 1 from (p1,s1) to (p2,s1)
     active_schedule.apply_move(move2, thread_data);
-    thread_data.update_cost(-10); // cost 80, best is here
+    thread_data.update_cost(-10);    // cost 80, best is here
 
-    kl_move move3(2, 0.0, 2, 2, 3, 2); // node 2 from (p2,s2) to (p3,s2)
+    kl_move move3(2, 0.0, 2, 2, 3, 2);    // node 2 from (p2,s2) to (p3,s2)
     active_schedule.apply_move(move3, thread_data);
-    thread_data.update_cost(+5); // cost 85
+    thread_data.update_cost(+5);    // cost 85
 
     BOOST_CHECK_EQUAL(thread_data.best_schedule_idx, 2);
     BOOST_CHECK_EQUAL(thread_data.applied_moves.size(), 3);
 
     struct dummy_comm_ds {
-        void update_datastructure_after_move(const kl_move&, unsigned, unsigned) {}
+        void update_datastructure_after_move(const kl_move &, unsigned, unsigned) {}
     } comm_ds;
-    
+
     unsigned end_step = active_schedule.num_steps() - 1;
     // Revert to best. start_move=0 means no step removal logic is triggered.
     active_schedule.revert_to_best_schedule(0, 0, comm_ds, thread_data, 0, end_step);
 
-    BOOST_CHECK_EQUAL(thread_data.cost, 80.0); // Check cost is reverted to best
+    BOOST_CHECK_EQUAL(thread_data.cost, 80.0);    // Check cost is reverted to best
     BOOST_CHECK_EQUAL(thread_data.applied_moves.size(), 0);
-    BOOST_CHECK_EQUAL(thread_data.best_schedule_idx, 0); // Reset for next iteration
+    BOOST_CHECK_EQUAL(thread_data.best_schedule_idx, 0);    // Reset for next iteration
 
     // Check schedule state is after move2
-    BOOST_CHECK_EQUAL(active_schedule.assigned_processor(0), 1); // from move1
+    BOOST_CHECK_EQUAL(active_schedule.assigned_processor(0), 1);    // from move1
     BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(0), 0);
-    BOOST_CHECK_EQUAL(active_schedule.assigned_processor(1), 2); // from move2
+    BOOST_CHECK_EQUAL(active_schedule.assigned_processor(1), 2);    // from move2
     BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(1), 1);
-    BOOST_CHECK_EQUAL(active_schedule.assigned_processor(2), 2); // Reverted, so original
-    BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(2), 2); // Reverted, so original
+    BOOST_CHECK_EQUAL(active_schedule.assigned_processor(2), 2);    // Reverted, so original
+    BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(2), 2);    // Reverted, so original
 }
 
 BOOST_AUTO_TEST_CASE(active_schedule_swap_empty_step_fwd_test) {
     // Make step 1 empty by moving node 1 to step 0
     active_schedule.getVectorSchedule().setAssignedSuperstep(1, 0);
-    active_schedule.initialize(active_schedule.getVectorSchedule()); // re-init to update set_schedule and work_ds
+    active_schedule.initialize(active_schedule.getVectorSchedule());    // re-init to update set_schedule and work_ds
 
     BOOST_CHECK_EQUAL(active_schedule.get_step_total_work(1), 0);
 
@@ -486,15 +485,15 @@ BOOST_AUTO_TEST_CASE(active_schedule_remove_empty_step_test) {
     active_schedule.initialize(active_schedule.getVectorSchedule());
 
     unsigned original_num_steps = active_schedule.num_steps();
-    unsigned original_step_of_node_8 = active_schedule.assigned_superstep(8); // should be 2
+    unsigned original_step_of_node_8 = active_schedule.assigned_superstep(8);    // should be 2
 
     active_schedule.remove_empty_step(1);
 
     BOOST_CHECK_EQUAL(active_schedule.num_steps(), original_num_steps - 1);
     // Node 8 should be shifted back by one step
-    BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(8), original_step_of_node_8 - 1); // 8 -> 7
+    BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(8), original_step_of_node_8 - 1);    // 8 -> 7
     // Node 3 (in step 3) should be shifted back by one step
     BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(3), 2);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp
index 5d86b6b7..cb13a16d 100644
--- a/tests/max_bsp_schedulers.cpp
+++ b/tests/max_bsp_schedulers.cpp
@@ -18,27 +18,25 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE BSP_SCHEDULERS
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
 #include <string>
 #include <vector>
 
-
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp"
 #include "osp/bsp/scheduler/MaxBspScheduler.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "osp/auxiliary/io/general_file_reader.hpp"
 #include "test_graphs.hpp"
 
 using namespace osp;
 
 std::vector<std::string> test_architectures() { return {"data/machine_params/p3.arch"}; }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test(Scheduler<Graph_t> *test_scheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
@@ -65,13 +63,11 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
 
             BspInstance<Graph_t> instance;
 
-            bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                                instance.getComputationalDag());
-            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
-                                                                        instance.getArchitecture());
+            bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag());
+            bool status_architecture
+                = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
             if (!status_graph || !status_architecture) {
-
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -85,8 +81,8 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
     }
 }
 
-template<typename Graph_t>
-void run_test_max_bsp(MaxBspScheduler<Graph_t>* test_scheduler) {
+template <typename Graph_t>
+void run_test_max_bsp(MaxBspScheduler<Graph_t> *test_scheduler) {
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
     std::vector<std::string> filenames_architectures = test_architectures();
 
@@ -96,8 +92,8 @@ void run_test_max_bsp(MaxBspScheduler<Graph_t>* test_scheduler) {
         cwd = cwd.parent_path();
     }
 
-    for (auto& filename_graph : filenames_graph) {
-        for (auto& filename_machine : filenames_architectures) {
+    for (auto &filename_graph : filenames_graph) {
+        for (auto &filename_machine : filenames_architectures) {
             std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
             name_graph = name_graph.substr(0, name_graph.find_last_of("."));
             std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1);
@@ -112,8 +108,7 @@ void run_test_max_bsp(MaxBspScheduler<Graph_t>* test_scheduler) {
             BspArchitecture<Graph_t> arch;
 
             bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph);
-            bool status_architecture =
-                file_reader::readBspArchitecture((cwd / filename_machine).string(), arch);
+            bool status_architecture = file_reader::readBspArchitecture((cwd / filename_machine).string(), arch);
 
             BOOST_REQUIRE_MESSAGE(status_graph, "Failed to read graph: " << filename_graph);
             BOOST_REQUIRE_MESSAGE(status_architecture, "Failed to read architecture: " << filename_machine);
diff --git a/tests/maxbsp_converter_and_hc.cpp b/tests/maxbsp_converter_and_hc.cpp
index 1ecb25d8..a36fa7c4 100644
--- a/tests/maxbsp_converter_and_hc.cpp
+++ b/tests/maxbsp_converter_and_hc.cpp
@@ -18,20 +18,18 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE MAXBSP_SCHEDULERS
 #include <boost/test/unit_test.hpp>
+#include <filesystem>
 
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp"
 #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include <filesystem>
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 
 using namespace osp;
 
-
 BOOST_AUTO_TEST_CASE(maxbsp_scheduling) {
-
     using graph = computational_dag_vector_impl_def_t;
 
     BspInstance<graph> instance;
@@ -57,15 +55,14 @@ BOOST_AUTO_TEST_CASE(maxbsp_scheduling) {
     BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(bsp_initial));
     BOOST_CHECK(bsp_initial.satisfiesPrecedenceConstraints());
 
-
     // PART I: from BspSchedule to MaxBspSchedule conversion
 
-    std::cout<<"Original Bsp Cost: "<<bsp_initial.computeCosts()<<std::endl;
+    std::cout << "Original Bsp Cost: " << bsp_initial.computeCosts() << std::endl;
     GreedyBspToMaxBspConverter<graph> converter;
     MaxBspSchedule<graph> maxbsp = converter.Convert(bsp_initial);
     BOOST_CHECK(maxbsp.satisfiesPrecedenceConstraints());
     auto cost_conversion = maxbsp.computeCosts();
-    std::cout<<"Cost after maxBsp conversion: "<<cost_conversion<<std::endl;
+    std::cout << "Cost after maxBsp conversion: " << cost_conversion << std::endl;
 
     // hill climbing
 
@@ -73,21 +70,20 @@ BOOST_AUTO_TEST_CASE(maxbsp_scheduling) {
     HC.improveSchedule(maxbsp);
     BOOST_CHECK(maxbsp.satisfiesPrecedenceConstraints());
     auto cost_hc = maxbsp.computeCosts();
-    std::cout<<"Cost after Hill Climbing: "<<cost_hc<<std::endl;
+    std::cout << "Cost after Hill Climbing: " << cost_hc << std::endl;
     BOOST_CHECK(cost_hc <= cost_conversion);
 
-    
     // PART II: from BspScheduleCS to MaxBspScheduleCS conversion
 
     BspScheduleCS<graph> bsp_initial_cs(bsp_initial);
     BOOST_CHECK(bsp_initial_cs.hasValidCommSchedule());
-    std::cout<<"Original BspCS Cost: "<<bsp_initial_cs.computeCosts()<<std::endl;
+    std::cout << "Original BspCS Cost: " << bsp_initial_cs.computeCosts() << std::endl;
 
     MaxBspScheduleCS<graph> maxbsp_cs = converter.Convert(bsp_initial_cs);
     BOOST_CHECK(maxbsp_cs.satisfiesPrecedenceConstraints());
     BOOST_CHECK(maxbsp_cs.hasValidCommSchedule());
     auto cost_conversion_cs = maxbsp_cs.computeCosts();
-    std::cout<<"Cost after maxBsp(CS) conversion: "<<cost_conversion_cs<<std::endl;
+    std::cout << "Cost after maxBsp(CS) conversion: " << cost_conversion_cs << std::endl;
 
     // hill climbing for comm. schedule
 
@@ -96,14 +92,13 @@ BOOST_AUTO_TEST_CASE(maxbsp_scheduling) {
     BOOST_CHECK(maxbsp_cs.satisfiesPrecedenceConstraints());
     BOOST_CHECK(maxbsp_cs.hasValidCommSchedule());
     auto cost_hccs = maxbsp_cs.computeCosts();
-    std::cout<<"Cost after comm. sched. hill climbing: "<<cost_hccs<<std::endl;
+    std::cout << "Cost after comm. sched. hill climbing: " << cost_hccs << std::endl;
     BOOST_CHECK(cost_hccs <= cost_conversion_cs);
 
-
     // PART III: same for larger DAG
 
-    status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(), instance.getComputationalDag());
+    status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(),
+                                                               instance.getComputationalDag());
 
     BOOST_CHECK(status);
     instance.setSynchronisationCosts(7);
@@ -113,18 +108,18 @@ BOOST_AUTO_TEST_CASE(maxbsp_scheduling) {
 
     BspScheduleCS<graph> bsp_initial_large_cs(bsp_initial_large);
     BOOST_CHECK(bsp_initial_large_cs.hasValidCommSchedule());
-    std::cout<<"Original Bsp Cost on large DAG: "<<bsp_initial_large_cs.computeCosts()<<std::endl;
+    std::cout << "Original Bsp Cost on large DAG: " << bsp_initial_large_cs.computeCosts() << std::endl;
 
     MaxBspScheduleCS<graph> maxbsp_cs_large = converter.Convert(bsp_initial_large_cs);
     BOOST_CHECK(maxbsp_cs_large.satisfiesPrecedenceConstraints());
     BOOST_CHECK(maxbsp_cs_large.hasValidCommSchedule());
     auto cost_maxbsp_cs_large = maxbsp_cs_large.computeCosts();
-    std::cout<<"Cost after maxBsp conversion on large DAG: "<<cost_maxbsp_cs_large<<std::endl;
+    std::cout << "Cost after maxBsp conversion on large DAG: " << cost_maxbsp_cs_large << std::endl;
 
     HCcs.improveSchedule(maxbsp_cs_large);
     BOOST_CHECK(maxbsp_cs_large.satisfiesPrecedenceConstraints());
     BOOST_CHECK(maxbsp_cs_large.hasValidCommSchedule());
     auto cost_hccs_large = maxbsp_cs_large.computeCosts();
-    std::cout<<"Cost after comm. sched. hill climbing on large DAG: "<<cost_hccs_large<<std::endl;
+    std::cout << "Cost after comm. sched. hill climbing on large DAG: " << cost_hccs_large << std::endl;
     BOOST_CHECK(cost_hccs_large <= cost_maxbsp_cs_large);
 }
diff --git a/tests/merkle_hash_computer.cpp b/tests/merkle_hash_computer.cpp
index 01fc7915..d8f231a8 100644
--- a/tests/merkle_hash_computer.cpp
+++ b/tests/merkle_hash_computer.cpp
@@ -19,16 +19,15 @@ limitations under the License.
 #define BOOST_TEST_MODULE BSP_SCHEDULE_RECOMP
 #include <boost/test/unit_test.hpp>
 
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "test_utils.hpp"
 
 using namespace osp;
 
-BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test)
-{
+BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) {
     using graph_t = computational_dag_vector_impl_def_t;
     graph_t graph;
 
@@ -38,19 +37,18 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test)
     MerkleHashComputer<graph_t, uniform_node_hash_func<vertex_idx_t<graph_t>>> m_hash(graph);
 
     BOOST_CHECK_EQUAL(m_hash.get_vertex_hashes().size(), graph.num_vertices());
-    
-    for (const auto& v : source_vertices_view(graph)) {
+
+    for (const auto &v : source_vertices_view(graph)) {
         BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(v), 11);
     }
 
     size_t num = 0;
-    for (const auto& pair : m_hash.get_orbits()) {
-
+    for (const auto &pair : m_hash.get_orbits()) {
         num += pair.second.size();
         std::cout << "orbit " << pair.first << ": ";
-        for (const auto& v : pair.second) {
+        for (const auto &v : pair.second) {
             std::cout << v << ", ";
-        } 
+        }
         std::cout << std::endl;
     }
 
@@ -62,43 +60,38 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test)
     BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(29), m_hash.get_vertex_hash(22));
     BOOST_CHECK(m_hash.get_vertex_hash(3) != m_hash.get_vertex_hash(12));
     BOOST_CHECK(m_hash.get_vertex_hash(53) != m_hash.get_vertex_hash(29));
-
 }
 
-BOOST_AUTO_TEST_CASE(MerkleHashComputer_test_fw_bw_precomp)
-{
+BOOST_AUTO_TEST_CASE(MerkleHashComputer_test_fw_bw_precomp) {
     using graph_t = computational_dag_vector_impl_def_t;
     graph_t graph_test;
 
     const auto project_root = get_project_root();
-    file_reader::readComputationalDagHyperdagFormatDB((project_root / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph_test);
+    file_reader::readComputationalDagHyperdagFormatDB((project_root / "data/spaa/tiny/instance_bicgstab.hdag").string(),
+                                                      graph_test);
 
     std::vector<size_t> precom_node_hashes(graph_test.num_vertices(), 5);
 
     MerkleHashComputer<graph_t, precom_bwd_merkle_node_hash_func<graph_t>> m_hash(graph_test, graph_test, precom_node_hashes);
 
     BOOST_CHECK_EQUAL(m_hash.get_vertex_hashes().size(), graph_test.num_vertices());
-    
-    size_t num = 0;
-    for (const auto& pair : m_hash.get_orbits()) {
 
+    size_t num = 0;
+    for (const auto &pair : m_hash.get_orbits()) {
         num += pair.second.size();
         std::cout << "orbit " << pair.first << ": ";
-        for (const auto& v : pair.second) {
+        for (const auto &v : pair.second) {
             std::cout << v << ", ";
-        } 
+        }
         std::cout << std::endl;
     }
 
     BOOST_CHECK_EQUAL(num, graph_test.num_vertices());
-
 }
 
-
 using graphType = computational_dag_vector_impl_def_t;
 using VertexType = vertex_idx_t<graphType>;
 
-
 BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_IdenticalGraphsAreIsomorphic) {
     graphType dag1;
     const auto v1 = dag1.add_vertex(0, 10, 1);
@@ -134,14 +127,14 @@ BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_DifferentVertexCount) {
 
 // Test case 3: Graphs with the same size but different structures should not be isomorphic.
 BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_SameSizeDifferentStructure) {
-    graphType dag1; // A -> B -> C
+    graphType dag1;    // A -> B -> C
     const auto v1_1 = dag1.add_vertex(0, 1, 1);
     const auto v1_2 = dag1.add_vertex(0, 1, 1);
     const auto v1_3 = dag1.add_vertex(0, 1, 1);
     dag1.add_edge(v1_1, v1_2);
     dag1.add_edge(v1_2, v1_3);
 
-    graphType dag2; // A -> B, A -> C
+    graphType dag2;    // A -> B, A -> C
     const auto v2_1 = dag2.add_vertex(0, 1, 1);
     const auto v2_2 = dag2.add_vertex(0, 1, 1);
     const auto v2_3 = dag2.add_vertex(0, 1, 1);
@@ -154,16 +147,16 @@ BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_SameSizeDifferentStructure) {
 // Test case 4: Structurally identical graphs with different vertex labeling should be isomorphic.
 BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_IsomorphicWithDifferentLabels) {
     graphType dag1;
-    const auto v1_1 = dag1.add_vertex(0, 1, 1); // Source
+    const auto v1_1 = dag1.add_vertex(0, 1, 1);    // Source
     const auto v1_2 = dag1.add_vertex(0, 1, 1);
-    const auto v1_3 = dag1.add_vertex(0, 1, 1); // Sink
+    const auto v1_3 = dag1.add_vertex(0, 1, 1);    // Sink
     dag1.add_edge(v1_1, v1_2);
     dag1.add_edge(v1_2, v1_3);
 
     graphType dag2;
     // Same structure as dag1, but vertices are added in a different order.
-    const auto v2_3 = dag2.add_vertex(0, 1, 1); // Sink
-    const auto v2_1 = dag2.add_vertex(0, 1, 1); // Source
+    const auto v2_3 = dag2.add_vertex(0, 1, 1);    // Sink
+    const auto v2_1 = dag2.add_vertex(0, 1, 1);    // Source
     const auto v2_2 = dag2.add_vertex(0, 1, 1);
     dag2.add_edge(v2_1, v2_2);
     dag2.add_edge(v2_2, v2_3);
@@ -175,14 +168,26 @@ BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_IsomorphicWithDifferentLabels) {
 BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_ComplexIsomorphicGraphs) {
     graphType dag1;
     {
-        const auto v1 = dag1.add_vertex(2, 9, 2); const auto v2 = dag1.add_vertex(3, 8, 4);
-        const auto v3 = dag1.add_vertex(4, 7, 3); const auto v4 = dag1.add_vertex(5, 6, 2);
-        const auto v5 = dag1.add_vertex(6, 5, 6); const auto v6 = dag1.add_vertex(7, 4, 2);
-        dag1.add_vertex(8, 3, 4); const auto v8 = dag1.add_vertex(9, 2, 1);
-        dag1.add_edge(v1, v2); dag1.add_edge(v1, v3); dag1.add_edge(v1, v4);
-        dag1.add_edge(v1, v5); dag1.add_edge(v1, v8); dag1.add_edge(v2, v5);
-        dag1.add_edge(v2, v6); dag1.add_edge(v2, v8); dag1.add_edge(v3, v5);
-        dag1.add_edge(v3, v6); dag1.add_edge(v5, v8); dag1.add_edge(v4, v8);
+        const auto v1 = dag1.add_vertex(2, 9, 2);
+        const auto v2 = dag1.add_vertex(3, 8, 4);
+        const auto v3 = dag1.add_vertex(4, 7, 3);
+        const auto v4 = dag1.add_vertex(5, 6, 2);
+        const auto v5 = dag1.add_vertex(6, 5, 6);
+        const auto v6 = dag1.add_vertex(7, 4, 2);
+        dag1.add_vertex(8, 3, 4);
+        const auto v8 = dag1.add_vertex(9, 2, 1);
+        dag1.add_edge(v1, v2);
+        dag1.add_edge(v1, v3);
+        dag1.add_edge(v1, v4);
+        dag1.add_edge(v1, v5);
+        dag1.add_edge(v1, v8);
+        dag1.add_edge(v2, v5);
+        dag1.add_edge(v2, v6);
+        dag1.add_edge(v2, v8);
+        dag1.add_edge(v3, v5);
+        dag1.add_edge(v3, v6);
+        dag1.add_edge(v5, v8);
+        dag1.add_edge(v4, v8);
     }
 
     graphType dag2;
@@ -190,14 +195,25 @@ BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_ComplexIsomorphicGraphs) {
         // Same structure, different vertex variable names and creation order.
         const auto n8 = dag2.add_vertex(9, 2, 1);
         dag2.add_vertex(8, 3, 4);
-        const auto n6 = dag2.add_vertex(7, 4, 2); const auto n5 = dag2.add_vertex(6, 5, 6);
-        const auto n4 = dag2.add_vertex(5, 6, 2); const auto n3 = dag2.add_vertex(4, 7, 3);
-        const auto n2 = dag2.add_vertex(3, 8, 4); const auto n1 = dag2.add_vertex(2, 9, 2);
-        dag2.add_edge(n1, n2); dag2.add_edge(n1, n3); dag2.add_edge(n1, n4);
-        dag2.add_edge(n1, n5); dag2.add_edge(n1, n8); dag2.add_edge(n2, n5);
-        dag2.add_edge(n2, n6); dag2.add_edge(n2, n8); dag2.add_edge(n3, n5);
-        dag2.add_edge(n3, n6); dag2.add_edge(n5, n8); dag2.add_edge(n4, n8);
+        const auto n6 = dag2.add_vertex(7, 4, 2);
+        const auto n5 = dag2.add_vertex(6, 5, 6);
+        const auto n4 = dag2.add_vertex(5, 6, 2);
+        const auto n3 = dag2.add_vertex(4, 7, 3);
+        const auto n2 = dag2.add_vertex(3, 8, 4);
+        const auto n1 = dag2.add_vertex(2, 9, 2);
+        dag2.add_edge(n1, n2);
+        dag2.add_edge(n1, n3);
+        dag2.add_edge(n1, n4);
+        dag2.add_edge(n1, n5);
+        dag2.add_edge(n1, n8);
+        dag2.add_edge(n2, n5);
+        dag2.add_edge(n2, n6);
+        dag2.add_edge(n2, n8);
+        dag2.add_edge(n3, n5);
+        dag2.add_edge(n3, n6);
+        dag2.add_edge(n5, n8);
+        dag2.add_edge(n4, n8);
     }
-    
+
     BOOST_CHECK(are_isomorphic_by_merkle_hash(dag1, dag2));
-}
\ No newline at end of file
+}
diff --git a/tests/orbit_graph_processor.cpp b/tests/orbit_graph_processor.cpp
index d79a83a9..6ac34228 100644
--- a/tests/orbit_graph_processor.cpp
+++ b/tests/orbit_graph_processor.cpp
@@ -18,28 +18,27 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE OrbitGraphProcessor
 #include <boost/test/unit_test.hpp>
-#include "test_utils.hpp"
-#include "test_graphs.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
+#include <filesystem>
+#include <iostream>
+#include <set>
 
+#include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/auxiliary/io/dot_graph_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp"
-#include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-
-#include <filesystem>
-#include <iostream>
-#include <set>
+#include "test_graphs.hpp"
+#include "test_utils.hpp"
 
 using namespace osp;
 using graph_t = computational_dag_vector_impl_def_t;
 
 template <typename Graph_t>
-void check_partitioning(const Graph_t& dag, const OrbitGraphProcessor<Graph_t, Graph_t>& processor) {
-    const auto& final_coarse_graph = processor.get_final_coarse_graph();
-    const auto& final_groups = processor.get_final_groups();
+void check_partitioning(const Graph_t &dag, const OrbitGraphProcessor<Graph_t, Graph_t> &processor) {
+    const auto &final_coarse_graph = processor.get_final_coarse_graph();
+    const auto &final_groups = processor.get_final_groups();
 
     // Check that the final coarse graph is acyclic
     BOOST_CHECK(is_acyclic(final_coarse_graph));
@@ -47,10 +46,10 @@ void check_partitioning(const Graph_t& dag, const OrbitGraphProcessor<Graph_t, G
     // Check that the final groups form a valid partition of the original DAG's vertices
     std::vector<int> vertex_counts(dag.num_vertices(), 0);
     size_t total_vertices_in_groups = 0;
-    for (const auto& group : final_groups) {
-        for (const auto& subgraph : group.subgraphs) {
+    for (const auto &group : final_groups) {
+        for (const auto &subgraph : group.subgraphs) {
             total_vertices_in_groups += subgraph.size();
-            for (const auto& vertex : subgraph) {
+            for (const auto &vertex : subgraph) {
                 BOOST_REQUIRE_LT(vertex, dag.num_vertices());
                 vertex_counts[vertex]++;
             }
@@ -111,10 +110,10 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_SimpleMerge) {
     // Two parallel pipelines that are structurally identical
     // 0 -> 1
     // 2 -> 3
-    dag.add_vertex(10, 1, 1); // 0
-    dag.add_vertex(10, 1, 1); // 1
-    dag.add_vertex(10, 1, 1); // 2
-    dag.add_vertex(10, 1, 1); // 3
+    dag.add_vertex(10, 1, 1);    // 0
+    dag.add_vertex(10, 1, 1);    // 1
+    dag.add_vertex(10, 1, 1);    // 2
+    dag.add_vertex(10, 1, 1);    // 3
     dag.add_edge(0, 1);
     dag.add_edge(2, 3);
 
@@ -124,8 +123,8 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_SimpleMerge) {
     MerkleHashComputer<graph_t, bwd_merkle_node_hash_func<graph_t>, true> hasher(dag, dag);
     processor.discover_isomorphic_groups(dag, hasher);
 
-    const auto& final_coarse_graph = processor.get_final_coarse_graph();
-    const auto& final_groups = processor.get_final_groups();
+    const auto &final_coarse_graph = processor.get_final_coarse_graph();
+    const auto &final_groups = processor.get_final_groups();
 
     // Expect a single node in the final coarse graph
     BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1);
@@ -146,10 +145,10 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_SimpleMerge) {
 BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ForkJoinNoMerge) {
     graph_t dag;
     // 0 -> {1, 2} -> 3. Nodes 1 and 2 are in the same orbit.
-    dag.add_vertex(10, 1, 1); // 0
-    dag.add_vertex(20, 1, 1); // 1
-    dag.add_vertex(20, 1, 1); // 2
-    dag.add_vertex(30, 1, 1); // 3
+    dag.add_vertex(10, 1, 1);    // 0
+    dag.add_vertex(20, 1, 1);    // 1
+    dag.add_vertex(20, 1, 1);    // 2
+    dag.add_vertex(30, 1, 1);    // 3
     dag.add_edge(0, 1);
     dag.add_edge(0, 2);
     dag.add_edge(1, 3);
@@ -162,8 +161,8 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ForkJoinNoMerge) {
     MerkleHashComputer<graph_t, bwd_merkle_node_hash_func<graph_t>, true> hasher(dag, dag);
     processor.discover_isomorphic_groups(dag, hasher);
 
-    const auto& final_coarse_graph = processor.get_final_coarse_graph();
-    const auto& final_groups = processor.get_final_groups();
+    const auto &final_coarse_graph = processor.get_final_coarse_graph();
+    const auto &final_groups = processor.get_final_groups();
 
     // Expect no merges, so final graph is same as initial coarse graph.
     BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 3);
@@ -175,9 +174,13 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ForkJoinNoMerge) {
     // Group 2: {{3}}
     size_t group_of_1_count = 0;
     size_t group_of_2_count = 0;
-    for(const auto& group : final_groups) {
-        if (group.subgraphs.size() == 1) group_of_1_count++;
-        if (group.subgraphs.size() == 2) group_of_2_count++;
+    for (const auto &group : final_groups) {
+        if (group.subgraphs.size() == 1) {
+            group_of_1_count++;
+        }
+        if (group.subgraphs.size() == 2) {
+            group_of_2_count++;
+        }
     }
     BOOST_CHECK_EQUAL(group_of_1_count, 2);
     BOOST_CHECK_EQUAL(group_of_2_count, 1);
@@ -208,12 +211,12 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_MultiPipelineMerge) {
     const auto dag = construct_multi_pipeline_dag<graph_t>(5, 4);
     BOOST_REQUIRE_EQUAL(dag.num_vertices(), 20);
 
-    OrbitGraphProcessor<graph_t, graph_t> processor; // Set threshold to match pipeline count
+    OrbitGraphProcessor<graph_t, graph_t> processor;    // Set threshold to match pipeline count
     MerkleHashComputer<graph_t, bwd_merkle_node_hash_func<graph_t>, true> hasher(dag, dag);
     processor.discover_isomorphic_groups(dag, hasher);
 
-    const auto& final_coarse_graph = processor.get_final_coarse_graph();
-    const auto& final_groups = processor.get_final_groups();
+    const auto &final_coarse_graph = processor.get_final_coarse_graph();
+    const auto &final_groups = processor.get_final_groups();
 
     // Expect a single node in the final coarse graph
     BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1);
@@ -239,9 +242,9 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_LadderNoMerge) {
     OrbitGraphProcessor<graph_t, graph_t> processor;
     MerkleHashComputer<graph_t, bwd_merkle_node_hash_func<graph_t>, true> hasher(dag, dag);
     processor.discover_isomorphic_groups(dag, hasher);
-    
-    const auto& initial_coarse_graph = processor.get_coarse_graph();
-    const auto& final_coarse_graph = processor.get_final_coarse_graph();
+
+    const auto &initial_coarse_graph = processor.get_coarse_graph();
+    const auto &final_coarse_graph = processor.get_final_coarse_graph();
 
     // Expect no merges, so final graph is the same as the initial coarse graph.
     BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), initial_coarse_graph.num_vertices());
@@ -260,7 +263,7 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_AsymmetricNoMerge) {
     MerkleHashComputer<graph_t, bwd_merkle_node_hash_func<graph_t>, true> hasher(dag, dag);
     processor.discover_isomorphic_groups(dag, hasher);
 
-    const auto& final_coarse_graph = processor.get_final_coarse_graph();
+    const auto &final_coarse_graph = processor.get_final_coarse_graph();
 
     // Expect all nodes to be merged into a single coarse node.
     BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1);
@@ -282,7 +285,7 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_BinaryTreeNoMerge) {
     MerkleHashComputer<graph_t, bwd_merkle_node_hash_func<graph_t>, true> hasher(dag, dag);
     processor.discover_isomorphic_groups(dag, hasher);
 
-    const auto& final_coarse_graph = processor.get_final_coarse_graph();
+    const auto &final_coarse_graph = processor.get_final_coarse_graph();
 
     BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 3);
 
@@ -297,8 +300,8 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ButterflyMerge) {
     MerkleHashComputer<graph_t, bwd_merkle_node_hash_func<graph_t>, true> hasher(dag, dag);
     processor.discover_isomorphic_groups(dag, hasher);
 
-    const auto& final_coarse_graph = processor.get_final_coarse_graph();
+    const auto &final_coarse_graph = processor.get_final_coarse_graph();
     BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 4);
 
     check_partitioning(dag, processor);
-}
\ No newline at end of file
+}
diff --git a/tests/pebbling_schedule_class.cpp b/tests/pebbling_schedule_class.cpp
index ce180370..097b1b7f 100644
--- a/tests/pebbling_schedule_class.cpp
+++ b/tests/pebbling_schedule_class.cpp
@@ -18,19 +18,17 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE BSP_MEM_SCHEDULERS
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
 #include <string>
 #include <vector>
 
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
-#include "osp/pebbling/PebblingSchedule.hpp"
-#include "osp/bsp/scheduler/Scheduler.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/auxiliary/io/pebbling_schedule_file_writer.hpp"
-
+#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
+#include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+#include "osp/pebbling/PebblingSchedule.hpp"
 
 using namespace osp;
 
@@ -55,7 +53,7 @@ std::vector<std::string> tiny_spaa_graphs() {
 
 std::vector<std::string> test_architectures() { return {"data/machine_params/p3.arch"}; }
 
-template<typename Graph_t>
+template <typename Graph_t>
 void run_test(Scheduler<Graph_t> *test_scheduler) {
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
     std::vector<std::string> filenames_architectures = test_architectures();
@@ -70,8 +68,8 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
 
     for (auto &filename_graph : filenames_graph) {
         for (auto &filename_machine : filenames_architectures) {
-            std::string name_graph =
-                filename_graph.substr(filename_machine.find_last_of("/\\") + 1, filename_graph.find_last_of("."));
+            std::string name_graph
+                = filename_graph.substr(filename_machine.find_last_of("/\\") + 1, filename_graph.find_last_of("."));
             std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1);
             name_machine = name_machine.substr(0, name_machine.rfind("."));
 
@@ -80,12 +78,13 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
 
             BspInstance<Graph_t> instance;
 
-            bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag());
-            
-            bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
+            bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
+                                                                                  instance.getComputationalDag());
 
-            if (!status_graph || !status_architecture) {
+            bool status_architecture
+                = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture());
 
+            if (!status_graph || !status_architecture) {
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
@@ -95,15 +94,18 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
             RETURN_STATUS result = test_scheduler->computeSchedule(bsp_schedule);
             BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result);
 
-            std::vector<v_memw_t<Graph_t> > minimum_memory_required_vector = PebblingSchedule<Graph_t>::minimumMemoryRequiredPerNodeType(instance);
-            v_memw_t<Graph_t> max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end());
+            std::vector<v_memw_t<Graph_t> > minimum_memory_required_vector
+                = PebblingSchedule<Graph_t>::minimumMemoryRequiredPerNodeType(instance);
+            v_memw_t<Graph_t> max_required
+                = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end());
             instance.getArchitecture().setMemoryBound(max_required);
 
             PebblingSchedule<Graph_t> memSchedule1(bsp_schedule, PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::LARGEST_ID);
             BOOST_CHECK_EQUAL(&memSchedule1.getInstance(), &instance);
-            BOOST_CHECK(memSchedule1.isValid());            
+            BOOST_CHECK(memSchedule1.isValid());
 
-            PebblingSchedule<Graph_t> memSchedule3(bsp_schedule, PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED);
+            PebblingSchedule<Graph_t> memSchedule3(bsp_schedule,
+                                                   PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED);
             BOOST_CHECK(memSchedule3.isValid());
 
             PebblingSchedule<Graph_t> memSchedule5(bsp_schedule, PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::FORESIGHT);
@@ -114,7 +116,8 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
             PebblingSchedule<Graph_t> memSchedule2(bsp_schedule, PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::LARGEST_ID);
             BOOST_CHECK(memSchedule2.isValid());
 
-            PebblingSchedule<Graph_t> memSchedule4(bsp_schedule, PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED);
+            PebblingSchedule<Graph_t> memSchedule4(bsp_schedule,
+                                                   PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED);
             BOOST_CHECK(memSchedule4.isValid());
 
             PebblingSchedule<Graph_t> memSchedule6(bsp_schedule, PebblingSchedule<Graph_t>::CACHE_EVICTION_STRATEGY::FORESIGHT);
@@ -123,14 +126,12 @@ void run_test(Scheduler<Graph_t> *test_scheduler) {
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test) {
     GreedyBspScheduler<computational_dag_vector_impl_def_t> test;
     run_test(&test);
 }
 
 BOOST_AUTO_TEST_CASE(test_pebbling_schedule_writer) {
-
     using graph = computational_dag_vector_impl_def_int_t;
 
     BspInstance<graph> instance;
@@ -146,8 +147,8 @@ BOOST_AUTO_TEST_CASE(test_pebbling_schedule_writer) {
         std::cout << cwd << std::endl;
     }
 
-    bool status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag());
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(),
+                                                                    instance.getComputationalDag());
 
     BOOST_CHECK(status);
     BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54);
@@ -159,13 +160,14 @@ BOOST_AUTO_TEST_CASE(test_pebbling_schedule_writer) {
     RETURN_STATUS result = scheduler.computeSchedule(bsp_schedule);
     BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result);
 
-    std::vector<v_memw_t<graph> > minimum_memory_required_vector = PebblingSchedule<graph>::minimumMemoryRequiredPerNodeType(instance);
+    std::vector<v_memw_t<graph> > minimum_memory_required_vector
+        = PebblingSchedule<graph>::minimumMemoryRequiredPerNodeType(instance);
     v_memw_t<graph> max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end());
-    instance.getArchitecture().setMemoryBound(max_required + 3);          
+    instance.getArchitecture().setMemoryBound(max_required + 3);
 
     PebblingSchedule<graph> memSchedule(bsp_schedule, PebblingSchedule<graph>::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED);
     BOOST_CHECK(memSchedule.isValid());
 
     std::cout << "Writing pebbling schedule" << std::endl;
     file_writer::write_txt(std::cout, memSchedule);
-}
\ No newline at end of file
+}
diff --git a/tests/permutations.cpp b/tests/permutations.cpp
index 67eb7e71..05622968 100644
--- a/tests/permutations.cpp
+++ b/tests/permutations.cpp
@@ -17,13 +17,11 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE permutations
-#include <boost/test/unit_test.hpp>
-
 #include <algorithm>
+#include <boost/test/unit_test.hpp>
 #include <numeric>
 #include <random>
 
-
 #include "osp/auxiliary/permute.hpp"
 
 namespace osp {
@@ -50,7 +48,7 @@ BOOST_AUTO_TEST_CASE(In_Place_Permutation_random) {
 
 BOOST_AUTO_TEST_CASE(In_Place_Permutation_char) {
     std::vector<char> vec({'a', 'b', 'c', 'd', 'e', 'f', 'g'});
-    std::vector<std::size_t> perm({4,0,1,2,3,6,5});
+    std::vector<std::size_t> perm({4, 0, 1, 2, 3, 6, 5});
     std::vector<char> sol({'b', 'c', 'd', 'e', 'a', 'g', 'f'});
     std::vector<std::size_t> perm_sol(perm.size());
     std::iota(perm_sol.begin(), perm_sol.end(), 0);
@@ -62,7 +60,6 @@ BOOST_AUTO_TEST_CASE(In_Place_Permutation_char) {
     }
 }
 
-
 BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_random) {
     std::vector<unsigned> vec(20);
     std::iota(vec.begin(), vec.end(), 0);
@@ -89,7 +86,7 @@ BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_random) {
 
 BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_char) {
     std::vector<char> vec({'a', 'b', 'c', 'd', 'e', 'f', 'g'});
-    std::vector<std::size_t> perm({4,0,1,2,3,6,5});
+    std::vector<std::size_t> perm({4, 0, 1, 2, 3, 6, 5});
     std::vector<char> sol({'e', 'a', 'b', 'c', 'd', 'g', 'f'});
     std::vector<std::size_t> perm_sol(perm.size());
     std::iota(perm_sol.begin(), perm_sol.end(), 0);
@@ -101,7 +98,4 @@ BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_char) {
     }
 }
 
-
-
-
-} // namespace osp
\ No newline at end of file
+}    // namespace osp
diff --git a/tests/random_graph_gen.cpp b/tests/random_graph_gen.cpp
index 7f3a1b01..7a7fce52 100644
--- a/tests/random_graph_gen.cpp
+++ b/tests/random_graph_gen.cpp
@@ -18,7 +18,6 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE coarse_refine_scheduler
 #include <boost/test/unit_test.hpp>
-
 #include <iostream>
 #include <iterator>
 #include <map>
@@ -28,13 +27,12 @@ limitations under the License.
 
 #include "osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp"
 #include "osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(Erdos_Renyi_graph_test) {
-
     std::vector<size_t> graph_sizes({100, 500, 500});
     std::vector<double> graph_chances({10, 8, 20});
 
@@ -48,10 +46,9 @@ BOOST_AUTO_TEST_CASE(Erdos_Renyi_graph_test) {
 }
 
 BOOST_AUTO_TEST_CASE(near_diag_random_graph_test) {
-
     std::vector<size_t> graph_sizes({100, 500, 500});
     std::vector<double> graph_bw({10, 20, 30});
-    std::vector<double> graph_prob({0.14, 0.02 , 0.07});
+    std::vector<double> graph_prob({0.14, 0.02, 0.07});
 
     for (size_t i = 0; i < graph_sizes.size(); i++) {
         computational_dag_vector_impl_def_int_t graph;
diff --git a/tests/set_operations.cpp b/tests/set_operations.cpp
index 6252aa5f..73496e6e 100644
--- a/tests/set_operations.cpp
+++ b/tests/set_operations.cpp
@@ -18,13 +18,11 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE Sets
 #include <boost/test/unit_test.hpp>
-
-#include "osp/auxiliary/misc.hpp"
-
 #include <numeric>
 #include <unordered_set>
 #include <vector>
 
+#include "osp/auxiliary/misc.hpp"
 
 using namespace osp;
 
@@ -60,16 +58,16 @@ BOOST_AUTO_TEST_CASE(SetIntersectionLarge) {
 
     std::unordered_set<int> iota_0_to_10k_set(iota_0_to_10k.begin(), iota_0_to_10k.end());
 
-    { // Intersection of [0,10k] and [10k,20k]  -->  []
+    {    // Intersection of [0,10k] and [10k,20k]  -->  []
         std::unordered_set<int> iota_10k_to_20k_set(iota_10k_to_20k.begin(), iota_10k_to_20k.end());
         BOOST_CHECK(get_intersection(iota_0_to_10k_set, iota_10k_to_20k_set).empty());
     }
 
-    { // Intersection of [0,10k] and [0k,10k]  -->  [0k,10k]
+    {    // Intersection of [0,10k] and [0k,10k]  -->  [0k,10k]
         BOOST_CHECK(get_intersection(iota_0_to_10k_set, iota_0_to_10k_set) == iota_0_to_10k_set);
     }
 
-    { // Intersection of [0,10k] and [5k,10k]  -->  [5k,10k]
+    {    // Intersection of [0,10k] and [5k,10k]  -->  [5k,10k]
         std::vector<int> iota_5k_to_10k(5'000);
         std::iota(iota_5k_to_10k.begin(), iota_5k_to_10k.end(), 5'000);
         std::unordered_set<int> iota_5k_to_10k_set(iota_5k_to_10k.begin(), iota_5k_to_10k.end());
@@ -112,18 +110,18 @@ BOOST_AUTO_TEST_CASE(SetUnionLarge) {
 
     std::unordered_set<int> iota_0_to_10k_set(iota_0_to_10k.begin(), iota_0_to_10k.end());
 
-    { // Union of [0,10k] and [10k,20k]  -->  [0k,20k]
+    {    // Union of [0,10k] and [10k,20k]  -->  [0k,20k]
         std::unordered_set<int> iota_10k_to_20k_set(iota_10k_to_20k.begin(), iota_10k_to_20k.end());
         std::unordered_set<int> expected_union(iota_0_to_10k.begin(), iota_0_to_10k.end());
         expected_union.insert(iota_10k_to_20k.begin(), iota_10k_to_20k.end());
         BOOST_CHECK(get_union(iota_0_to_10k_set, iota_10k_to_20k_set) == expected_union);
     }
 
-    { // Union of [0,10k] and [0k,10k]  -->  [0k,10k]
+    {    // Union of [0,10k] and [0k,10k]  -->  [0k,10k]
         BOOST_CHECK(get_union(iota_0_to_10k_set, iota_0_to_10k_set) == iota_0_to_10k_set);
     }
 
-    { // Union of [0,10k] and [5k,15k]  -->  [0k,15k]
+    {    // Union of [0,10k] and [5k,15k]  -->  [0k,15k]
         std::vector<int> iota_5k_to_15k(10'000);
         std::iota(iota_5k_to_15k.begin(), iota_5k_to_15k.end(), 5'000);
         std::unordered_set<int> iota_5k_to_15k_set(iota_5k_to_15k.begin(), iota_5k_to_15k.end());
diff --git a/tests/sorts_and_arrangements.cpp b/tests/sorts_and_arrangements.cpp
index 5e2d1582..328f228e 100644
--- a/tests/sorts_and_arrangements.cpp
+++ b/tests/sorts_and_arrangements.cpp
@@ -18,7 +18,6 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE Sorts_and_Arrangements
 #include <boost/test/unit_test.hpp>
-
 #include <iostream>
 #include <string>
 #include <vector>
diff --git a/tests/sparse_matrix_impl.cpp b/tests/sparse_matrix_impl.cpp
index fd23aa84..8957fae7 100644
--- a/tests/sparse_matrix_impl.cpp
+++ b/tests/sparse_matrix_impl.cpp
@@ -18,45 +18,45 @@ limitations under the License.
 
 #ifdef EIGEN_FOUND
 
-#define BOOST_TEST_MODULE SparseMatrixImpl
+#    define BOOST_TEST_MODULE SparseMatrixImpl
 
-#include <boost/test/unit_test.hpp>
-#include <iostream>
-#include <vector>
+#    include <boost/test/unit_test.hpp>
+#    include <iostream>
+#    include <vector>
 
-#include "osp/graph_algorithms/directed_graph_util.hpp"
-#include "osp/graph_algorithms/directed_graph_path_util.hpp"
-#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
+#    include "osp/graph_algorithms/directed_graph_path_util.hpp"
+#    include "osp/graph_algorithms/directed_graph_util.hpp"
+#    include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(test_sparse_matrix_adapter_1) {
-/*
-            
-       ---0
-     /  / | \
-  --|--1  2--|-\
-  | |  |   \ |  |  
-  | |  4 <-- 3  /   
-  |  \ |       / 
-   \   5      /
-    \  | /----
-      \|/
-       6
-
-
-    j→  0     1     2     3     4     5     6
-i ↓
-   -------------------------------------------
-  0 |   0     0     0     0     0     0     0
-  1 | 2.0     0     0     0     0     0     0
-  2 | 3.0     0     0     0     0     0     0
-  3 | 4.0     0   5.0     0     0     0     0
-  4 | 0.0   6.0     0   7.0     0     0     0
-  5 | 8.0     0     0     0   9.0     0     0
-  6 | 0.0  10.0  11.0     0     0  12.0     0
-
-*/
+    /*
+
+           ---0
+         /  / | \
+      --|--1  2--|-\
+      | |  |   \ |  |
+      | |  4 <-- 3  /
+      |  \ |       /
+       \   5      /
+        \  | /----
+          \|/
+           6
+
+
+        j→  0     1     2     3     4     5     6
+    i ↓
+       -------------------------------------------
+      0 |   0     0     0     0     0     0     0
+      1 | 2.0     0     0     0     0     0     0
+      2 | 3.0     0     0     0     0     0     0
+      3 | 4.0     0   5.0     0     0     0     0
+      4 | 0.0   6.0     0   7.0     0     0     0
+      5 | 8.0     0     0     0   9.0     0     0
+      6 | 0.0  10.0  11.0     0     0  12.0     0
+
+    */
     using SM_csr = Eigen::SparseMatrix<double, Eigen::RowMajor, int32_t>;
     using SM_csc = Eigen::SparseMatrix<double, Eigen::ColMajor, int32_t>;
     using Triplet = Eigen::Triplet<double>;
@@ -64,21 +64,22 @@ i ↓
     std::vector<Triplet> triplets;
 
     // Diagonal entries
-    for (int i = 0; i < size; ++i)
+    for (int i = 0; i < size; ++i) {
         triplets.emplace_back(i, i, 1.0);
+    }
 
     // Dependencies (i depends on j if L(i,j) ≠ 0, j < i)
-    triplets.emplace_back(1, 0, 2.0);   // x1 ← x0
-    triplets.emplace_back(2, 0, 3.0);   // x2 ← x0
-    triplets.emplace_back(3, 0, 4.0);   // x3 ← x0
-    triplets.emplace_back(3, 2, 5.0);   // x3 ← x2
-    triplets.emplace_back(4, 1, 6.0);   // x4 ← x1
-    triplets.emplace_back(4, 3, 7.0);   // x4 ← x3
-    triplets.emplace_back(5, 0, 8.0);   // x5 ← x0
-    triplets.emplace_back(5, 4, 9.0);   // x5 ← x4
-    triplets.emplace_back(6, 1, 10.0);  // x6 ← x1
-    triplets.emplace_back(6, 2, 11.0);  // x6 ← x2
-    triplets.emplace_back(6, 5, 12.0);  // x6 ← x5
+    triplets.emplace_back(1, 0, 2.0);     // x1 ← x0
+    triplets.emplace_back(2, 0, 3.0);     // x2 ← x0
+    triplets.emplace_back(3, 0, 4.0);     // x3 ← x0
+    triplets.emplace_back(3, 2, 5.0);     // x3 ← x2
+    triplets.emplace_back(4, 1, 6.0);     // x4 ← x1
+    triplets.emplace_back(4, 3, 7.0);     // x4 ← x3
+    triplets.emplace_back(5, 0, 8.0);     // x5 ← x0
+    triplets.emplace_back(5, 4, 9.0);     // x5 ← x4
+    triplets.emplace_back(6, 1, 10.0);    // x6 ← x1
+    triplets.emplace_back(6, 2, 11.0);    // x6 ← x2
+    triplets.emplace_back(6, 5, 12.0);    // x6 ← x5
 
     // Construct matrix
     SM_csr L_csr(size, size);
@@ -105,16 +106,31 @@ i ↓
 
     std::vector<vertex_idx> vertices{0, 1, 2, 3, 4, 5, 6};
 
-    std::vector<std::vector<vertex_idx>> out_neighbors{{1, 2, 3, 5}, {4, 6}, {3, 6}, {4}, {5}, {6}, {}};
-
-    std::vector<std::vector<vertex_idx>> in_neighbors{{}, {0}, {0}, {0, 2}, {1, 3}, {0, 4}, {1, 2, 5}};
+    std::vector<std::vector<vertex_idx>> out_neighbors{
+        {1, 2, 3, 5},
+        {4, 6},
+        {3, 6},
+        {4},
+        {5},
+        {6},
+        {}
+    };
+
+    std::vector<std::vector<vertex_idx>> in_neighbors{
+        {},
+        {0},
+        {0},
+        {0, 2},
+        {1, 3},
+        {0, 4},
+        {1, 2, 5}
+    };
 
     size_t idx = 0;
 
     for (const long unsigned int &v : graph.vertices()) {
-
         BOOST_CHECK_EQUAL(v, vertices[idx++]);
-        
+
         size_t i = 0;
         const size_t vi = static_cast<size_t>(v);
 
@@ -137,20 +153,16 @@ i ↓
             BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[vi][i++]);
         }
 
-
         BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[vi].size());
         BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[vi].size());
-        
     }
 
     unsigned count = 0;
-    for (const auto & e: edges(graph)) {
-        
+    for (const auto &e : edges(graph)) {
         std::cout << e.source << " -> " << e.target << std::endl;
         count++;
     }
     BOOST_CHECK_EQUAL(count, 11);
-
 }
 
 #endif
diff --git a/tests/sptrsv.cpp b/tests/sptrsv.cpp
index f6dc8e17..6bc49c93 100644
--- a/tests/sptrsv.cpp
+++ b/tests/sptrsv.cpp
@@ -18,30 +18,27 @@ limitations under the License.
 
 // #define EIGEN_FOUND 1
 
-
 #ifdef EIGEN_FOUND
 
-#define BOOST_TEST_MODULE SPTRSV
+#    define BOOST_TEST_MODULE SPTRSV
 
-#include <boost/test/unit_test.hpp>
-#include <iostream>
-#include <filesystem>
-#include <vector>
-#include <iostream>
-#include <Eigen/Sparse>
-#include <unsupported/Eigen/SparseExtra>
+#    include "osp/auxiliary/sptrsv_simulator/sptrsv.hpp"
 
+#    include <Eigen/Sparse>
+#    include <boost/test/unit_test.hpp>
+#    include <filesystem>
+#    include <iostream>
+#    include <unsupported/Eigen/SparseExtra>
+#    include <vector>
 
-#include "osp/graph_algorithms/directed_graph_util.hpp"
-#include "osp/graph_algorithms/directed_graph_path_util.hpp"
-#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
-#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp"
-#include "osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp"
-#include "osp/auxiliary/sptrsv_simulator/sptrsv.hpp"
+#    include "osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp"
+#    include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp"
+#    include "osp/graph_algorithms/directed_graph_path_util.hpp"
+#    include "osp/graph_algorithms/directed_graph_util.hpp"
+#    include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp"
 
 using namespace osp;
 
-
 bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) {
     std::cout << std::fixed;
     std::cout << std::setprecision(15);
@@ -49,9 +46,9 @@ bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) {
     assert(v1.size() == v2.size());
     bool same = true;
     const double epsilon = 1e-10;
-    for (long long int i=0; i < v1.size(); ++i){
-        //std::cout << "Ind: " << i << ": | " << v1[i] << " - " << v2[i] << " | = " << abs(v1[i]-v2[i]) << "\n";  
-        if( std::abs(v1[i] - v2[i]) / (std::abs(v1[i]) + std::abs(v2[i]) + epsilon) > epsilon ){
+    for (long long int i = 0; i < v1.size(); ++i) {
+        // std::cout << "Ind: " << i << ": | " << v1[i] << " - " << v2[i] << " | = " << abs(v1[i]-v2[i]) << "\n";
+        if (std::abs(v1[i] - v2[i]) / (std::abs(v1[i]) + std::abs(v2[i]) + epsilon) > epsilon) {
             std::cout << "We have differences in the matrix in position: " << i << std::endl;
             std::cout << v1[i] << " , " << v2[i] << std::endl;
             same = false;
@@ -72,8 +69,8 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
         cwd = cwd.parent_path();
         std::cout << cwd << std::endl;
     }
-    const std::string filename  = (cwd / "data/mtx_tests/ErdosRenyi_2k_14k_A.mtx").string();
-    
+    const std::string filename = (cwd / "data/mtx_tests/ErdosRenyi_2k_14k_A.mtx").string();
+
     SparseMatrixImp<int32_t> graph;
 
     SM_csr L_csr;
@@ -85,8 +82,8 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
         return;
     }
 
-    std::cout << "Loaded matrix of size " << L_csr.rows() << " x " << L_csr.cols()
-              << " with " << L_csr.nonZeros() << " non-zeros.\n";
+    std::cout << "Loaded matrix of size " << L_csr.rows() << " x " << L_csr.cols() << " with " << L_csr.nonZeros()
+              << " non-zeros.\n";
 
     graph.setCSR(&L_csr);
     SM_csc L_csc{};
@@ -115,20 +112,20 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
         std::cout << std::endl;
     }
     */
-   
+
     BOOST_CHECK_EQUAL(result_cs, RETURN_STATUS::OSP_SUCCESS);
     BOOST_CHECK(schedule_cs.hasValidCommSchedule());
 
-    //std::cout << "Scheduling Costs:" << schedule_cs.computeCosts() << std::endl;
-    //std::cout << "lazy com Costs:" <<schedule_cs.compute_lazy_communication_costs() << std::endl;
+    // std::cout << "Scheduling Costs:" << schedule_cs.computeCosts() << std::endl;
+    // std::cout << "lazy com Costs:" <<schedule_cs.compute_lazy_communication_costs() << std::endl;
 
     // Eigen L solve
-    Eigen::VectorXd L_b_ref, L_x_ref; // Declare vectors
-    auto n = L_csc.cols(); // Get the number of columns (assuming square matrix)
-    L_x_ref.resize(n); // Resize solution vector
-    L_b_ref.resize(n); // Resize RHS vector
+    Eigen::VectorXd L_b_ref, L_x_ref;    // Declare vectors
+    auto n = L_csc.cols();               // Get the number of columns (assuming square matrix)
+    L_x_ref.resize(n);                   // Resize solution vector
+    L_b_ref.resize(n);                   // Resize RHS vector
     auto L_view = L_csc.triangularView<Eigen::Lower>();
-    L_b_ref.setOnes();  // Initialize RHS vector with all ones
+    L_b_ref.setOnes();    // Initialize RHS vector with all ones
     L_x_ref.setZero();
     L_x_ref = L_view.solve(L_b_ref);
 
@@ -136,16 +133,15 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
     Sptrsv<int32_t> sim{instance};
     sim.setup_csr_no_permutation(schedule_cs);
 
-
-    //osp no permutation L_solve
+    // osp no permutation L_solve
     auto L_x_osp = L_x_ref;
     auto L_b_osp = L_b_ref;
     L_b_osp.setOnes();
-    //L_x_osp.setZero();
+    // L_x_osp.setZero();
     sim.x = &L_x_osp[0];
     sim.b = &L_b_osp[0];
     sim.lsolve_no_permutation();
-    BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp));
+    BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp));
 
     // Comparisson with osp serial L solve
     // Eigen
@@ -154,10 +150,9 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
     L_x_ref = L_view.solve(L_b_ref);
     // OSP
     L_b_osp.setOnes();
-    //L_x_osp.setZero();
+    // L_x_osp.setZero();
     sim.lsolve_serial();
-    BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp));
-
+    BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp));
 
     // INPLACE case eigen L solve vs osp L solve
     // Eigen
@@ -166,9 +161,9 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
     L_x_ref = L_view.solve(L_b_ref);
     // OSP
     L_x_osp.setConstant(0.1);
-    L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values
+    L_b_osp.setZero();    // this will not be used as x will take the values that already has instead of the b values
     sim.lsolve_no_permutation_in_place();
-    BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp));
+    BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp));
 
     // Comparisson with osp serial in place L solve
     // Eigen
@@ -177,13 +172,13 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
     L_x_ref = L_view.solve(L_b_ref);
     // OSP
     L_x_osp.setConstant(0.1);
-    L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values
+    L_b_osp.setZero();    // this will not be used as x will take the values that already has instead of the b values
     sim.lsolve_serial_in_place();
-    BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp));
+    BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp));
 
     // Upper Solve
     SM_csr U_csr = L_csc.transpose();
-    SM_csc U_csc = U_csr;  // Convert to column-major
+    SM_csc U_csc = U_csr;    // Convert to column-major
     Eigen::VectorXd U_b_ref(n), U_x_ref(n);
     Eigen::VectorXd U_b_osp(n), U_x_osp(n);
     // Eigen reference U solve
@@ -208,8 +203,8 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
     U_b_osp.setOnes();
     U_x_osp.setZero();
     sim.usolve_serial();
-    BOOST_CHECK(compare_vectors(U_x_ref,U_x_osp));
-    
+    BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp));
+
     // INPLACE case eigen U solve vs osp U solve
     // Eigen
     U_b_ref.setConstant(0.1);
@@ -217,9 +212,9 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
     U_x_ref = U_view.solve(U_b_ref);
     // OSP
     U_x_osp.setConstant(0.1);
-    U_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values
+    U_b_osp.setZero();    // this will not be used as x will take the values that already has instead of the b values
     sim.usolve_no_permutation_in_place();
-    BOOST_CHECK(compare_vectors(U_x_ref,U_x_osp));
+    BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp));
 
     // Comparisson with osp serial in place U solve
     // Eigen
@@ -228,14 +223,13 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
     U_x_ref = U_view.solve(U_b_ref);
     // OSP
     U_x_osp.setConstant(0.1);
-    U_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values
+    U_b_osp.setZero();    // this will not be used as x will take the values that already has instead of the b values
     sim.usolve_serial_in_place();
-    BOOST_CHECK(compare_vectors(U_x_ref,U_x_osp));
-
+    BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp));
 
     // Lsolve in-place With PERMUTATION
     std::vector<size_t> perm = schedule_node_permuter_basic(schedule_cs, LOOP_PROCESSORS);
-    sim.setup_csr_with_permutation (schedule_cs, perm);
+    sim.setup_csr_with_permutation(schedule_cs, perm);
 
     // Comparisson with osp serial in place L solve
     // Eigen
@@ -244,17 +238,14 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) {
     L_x_ref = L_view.solve(L_b_ref);
     // OSP
     L_x_osp.setConstant(0.1);
-    L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values
+    L_b_osp.setZero();    // this will not be used as x will take the values that already has instead of the b values
     sim.x = &L_x_osp[0];
     sim.b = &L_b_osp[0];
-    //sim.permute_x_vector(perm);
+    // sim.permute_x_vector(perm);
     sim.lsolve_with_permutation_in_place();
 
     sim.permute_x_vector(perm);
-    BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp));
-
+    BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp));
 }
 
-
-
 #endif
diff --git a/tests/stepbystep_coarsen_and_multilevel.cpp b/tests/stepbystep_coarsen_and_multilevel.cpp
index bfc4931f..270893e9 100644
--- a/tests/stepbystep_coarsen_and_multilevel.cpp
+++ b/tests/stepbystep_coarsen_and_multilevel.cpp
@@ -19,24 +19,21 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE STEPBYSTEP_AND_MULTILEVEL
 #include <boost/test/unit_test.hpp>
-
 #include <filesystem>
 #include <string>
 #include <vector>
 
-#include "osp/coarser/StepByStep/StepByStepCoarser.hpp"
-#include "osp/bsp/scheduler/Scheduler.hpp"
-#include "osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
+#include "osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp"
+#include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/coarser/StepByStep/StepByStepCoarser.hpp"
 #include "osp/coarser/coarser_util.hpp"
-
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
 
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(StepByStepCoarser_test) {
-
     using graph = boost_graph_uint_t;
     StepByStepCoarser<graph> test;
 
@@ -57,7 +54,7 @@ BOOST_AUTO_TEST_CASE(StepByStepCoarser_test) {
 
     StepByStepCoarser<graph> coarser;
 
-    coarser.setTargetNumberOfNodes(static_cast<unsigned>(DAG.num_vertices())/2);
+    coarser.setTargetNumberOfNodes(static_cast<unsigned>(DAG.num_vertices()) / 2);
 
     graph coarsened_dag1, coarsened_dag2;
     std::vector<std::vector<vertex_idx_t<graph>>> old_vertex_ids;
@@ -66,14 +63,12 @@ BOOST_AUTO_TEST_CASE(StepByStepCoarser_test) {
     coarser.coarsenDag(DAG, coarsened_dag1, new_vertex_id);
     old_vertex_ids = coarser_util::invert_vertex_contraction_map<graph, graph>(new_vertex_id);
 
-    coarser.setTargetNumberOfNodes(static_cast<unsigned>(DAG.num_vertices())*2/3);
+    coarser.setTargetNumberOfNodes(static_cast<unsigned>(DAG.num_vertices()) * 2 / 3);
     coarser.coarsenForPebbling(DAG, coarsened_dag2, new_vertex_id);
     old_vertex_ids = coarser_util::invert_vertex_contraction_map<graph, graph>(new_vertex_id);
-
 }
 
 BOOST_AUTO_TEST_CASE(Multilevel_test) {
-
     using graph = boost_graph_uint_t;
     StepByStepCoarser<graph> test;
 
@@ -90,27 +85,25 @@ BOOST_AUTO_TEST_CASE(Multilevel_test) {
         std::cout << cwd << std::endl;
     }
 
-    bool status = file_reader::readComputationalDagHyperdagFormatDB(
-        (cwd / "data/spaa/tiny/instance_pregel.hdag").string(), instance.getComputationalDag());
+    bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(),
+                                                                    instance.getComputationalDag());
 
     BOOST_CHECK(status);
 
-
     MultiLevelHillClimbingScheduler<graph> multi1, multi2;
     BspSchedule<graph> schedule1(instance), schedule2(instance);
 
-    multi1.setContractionRate(0.3); 
+    multi1.setContractionRate(0.3);
     multi1.useLinearRefinementSteps(5);
 
     auto result = multi1.computeSchedule(schedule1);
     BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result);
     BOOST_CHECK(schedule1.satisfiesPrecedenceConstraints());
 
-    multi2.setContractionRate(0.3); 
+    multi2.setContractionRate(0.3);
     multi2.useExponentialRefinementPoints(1.2);
 
     result = multi2.computeSchedule(schedule2);
     BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result);
     BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints());
-
-}
\ No newline at end of file
+}
diff --git a/tests/strongly_connected_components.cpp b/tests/strongly_connected_components.cpp
index 8e85a8ff..65e294e8 100644
--- a/tests/strongly_connected_components.cpp
+++ b/tests/strongly_connected_components.cpp
@@ -17,19 +17,18 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE StronglyConnectedComponentsTest
-#include <boost/test/unit_test.hpp>
-
 #include "osp/graph_algorithms/strongly_connected_components.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 
 #include <algorithm>
+#include <boost/test/unit_test.hpp>
 #include <set>
 #include <vector>
 
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+
 // Helper function to compare SCC results.
 template <typename VertexType>
-void check_sccs_equal(const std::vector<std::vector<VertexType>> &result,
-                      const std::vector<std::vector<VertexType>> &expected) {
+void check_sccs_equal(const std::vector<std::vector<VertexType>> &result, const std::vector<std::vector<VertexType>> &expected) {
     auto to_set_of_sets = [](const std::vector<std::vector<VertexType>> &vec_of_vecs) {
         std::set<std::set<VertexType>> set_of_sets;
         for (const auto &inner_vec : vec_of_vecs) {
@@ -57,9 +56,9 @@ BOOST_AUTO_TEST_CASE(EmptyGraphTest) {
 
 BOOST_AUTO_TEST_CASE(NoEdgesTest) {
     graph g;
-    g.add_vertex(1,1,1);
-    g.add_vertex(1,1,1);
-    g.add_vertex(1,1,1);
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
 
     auto sccs = osp::strongly_connected_components(g);
     std::vector<std::vector<VertexType>> expected = {{0}, {1}, {2}};
@@ -68,10 +67,10 @@ BOOST_AUTO_TEST_CASE(NoEdgesTest) {
 
 BOOST_AUTO_TEST_CASE(LineGraphTest) {
     graph g;
-    g.add_vertex(1,1,1); 
-    g.add_vertex(1,1,1); 
-    g.add_vertex(1,1,1); 
-    g.add_vertex(1,1,1); 
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
     g.add_edge(0, 1);
     g.add_edge(1, 2);
     g.add_edge(2, 3);
@@ -83,23 +82,25 @@ BOOST_AUTO_TEST_CASE(LineGraphTest) {
 
 BOOST_AUTO_TEST_CASE(SimpleCycleTest) {
     graph g;
-    g.add_vertex(1,1,1);
-    g.add_vertex(1,1,1);
-    g.add_vertex(1,1,1);
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
     g.add_edge(0, 1);
     g.add_edge(1, 2);
     g.add_edge(2, 0);
 
     auto sccs = osp::strongly_connected_components(g);
-    std::vector<std::vector<VertexType>> expected = {{0, 1, 2}};
+    std::vector<std::vector<VertexType>> expected = {
+        {0, 1, 2}
+    };
     check_sccs_equal(sccs, expected);
 }
 
 BOOST_AUTO_TEST_CASE(FullGraphIsSCCTest) {
     graph g;
-    g.add_vertex(1,1,1);
-    g.add_vertex(1,1,1);
-    g.add_vertex(1,1,1);
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
+    g.add_vertex(1, 1, 1);
     g.add_edge(0, 1);
     g.add_edge(1, 0);
     g.add_edge(1, 2);
@@ -108,40 +109,69 @@ BOOST_AUTO_TEST_CASE(FullGraphIsSCCTest) {
     g.add_edge(2, 0);
 
     auto sccs = osp::strongly_connected_components(g);
-    std::vector<std::vector<VertexType>> expected = {{0, 1, 2}};
+    std::vector<std::vector<VertexType>> expected = {
+        {0, 1, 2}
+    };
     check_sccs_equal(sccs, expected);
 }
 
 BOOST_AUTO_TEST_CASE(MultipleSCCsTest) {
- 
     graph g;
-    for (int i = 0; i < 8; ++i)
-        g.add_vertex(1,1,1); 
+    for (int i = 0; i < 8; ++i) {
+        g.add_vertex(1, 1, 1);
+    }
 
-    g.add_edge(0, 1); g.add_edge(1, 2); g.add_edge(2, 0); // SCC {0,1,2}
-    g.add_edge(3, 4); g.add_edge(4, 3); // SCC {3,4}
-    g.add_edge(5, 6); g.add_edge(6, 5); // SCC {5,6}
+    g.add_edge(0, 1);
+    g.add_edge(1, 2);
+    g.add_edge(2, 0);    // SCC {0,1,2}
+    g.add_edge(3, 4);
+    g.add_edge(4, 3);    // SCC {3,4}
+    g.add_edge(5, 6);
+    g.add_edge(6, 5);    // SCC {5,6}
     // SCC {7}
 
-    g.add_edge(2, 3); g.add_edge(3, 5); g.add_edge(4, 6); g.add_edge(5, 7);
+    g.add_edge(2, 3);
+    g.add_edge(3, 5);
+    g.add_edge(4, 6);
+    g.add_edge(5, 7);
 
     auto sccs = osp::strongly_connected_components(g);
-    std::vector<std::vector<VertexType>> expected = {{0, 1, 2}, {3, 4}, {5, 6}, {7}};
+    std::vector<std::vector<VertexType>> expected = {
+        {0, 1, 2},
+        {3, 4},
+        {5, 6},
+        {7}
+    };
     check_sccs_equal(sccs, expected);
 }
 
 BOOST_AUTO_TEST_CASE(ComplexGraphFromPaperTest) {
-
     graph g;
-    for (int i = 0; i < 8; ++i) g.add_vertex(1,1,1); 
-    g.add_edge(0, 1); g.add_edge(1, 2); g.add_edge(1, 4); g.add_edge(1, 5);
-    g.add_edge(2, 3); g.add_edge(2, 6); g.add_edge(3, 2); g.add_edge(3, 7);
-    g.add_edge(4, 0); g.add_edge(4, 5); g.add_edge(5, 6); g.add_edge(6, 5);
-    g.add_edge(7, 3); g.add_edge(7, 6);
+    for (int i = 0; i < 8; ++i) {
+        g.add_vertex(1, 1, 1);
+    }
+    g.add_edge(0, 1);
+    g.add_edge(1, 2);
+    g.add_edge(1, 4);
+    g.add_edge(1, 5);
+    g.add_edge(2, 3);
+    g.add_edge(2, 6);
+    g.add_edge(3, 2);
+    g.add_edge(3, 7);
+    g.add_edge(4, 0);
+    g.add_edge(4, 5);
+    g.add_edge(5, 6);
+    g.add_edge(6, 5);
+    g.add_edge(7, 3);
+    g.add_edge(7, 6);
 
     auto sccs = osp::strongly_connected_components(g);
-    std::vector<std::vector<VertexType>> expected = {{0, 1, 4}, {2, 3, 7}, {5, 6}};
+    std::vector<std::vector<VertexType>> expected = {
+        {0, 1, 4},
+        {2, 3, 7},
+        {5, 6}
+    };
     check_sccs_equal(sccs, expected);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/test_graphs.hpp b/tests/test_graphs.hpp
index 8db94585..fd31c74a 100644
--- a/tests/test_graphs.hpp
+++ b/tests/test_graphs.hpp
@@ -18,10 +18,11 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/concepts/constructable_computational_dag_concept.hpp"
 #include <cmath>
 #include <numeric>
 
+#include "osp/concepts/constructable_computational_dag_concept.hpp"
+
 namespace osp {
 
 std::vector<std::string> tiny_spaa_graphs() {
@@ -40,25 +41,22 @@ std::vector<std::string> tiny_spaa_graphs() {
             "data/spaa/tiny/instance_pregel.hdag",
             "data/spaa/tiny/instance_spmv_N6_nzP0d4.hdag",
             "data/spaa/tiny/instance_spmv_N7_nzP0d35.hdag",
-            "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag"  
-        };
+            "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag"};
 }
 
 std::vector<std::string> large_spaa_graphs() {
     return {"data/spaa/large/instance_exp_N50_K12_nzP0d15.hdag",
             "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag",
             "data/spaa/large/instance_kNN_N45_K15_nzP0d16.hdag",
-            "data/spaa/large/instance_spmv_N120_nzP0d18.hdag"
-};
+            "data/spaa/large/instance_spmv_N120_nzP0d18.hdag"};
 }
 
 std::vector<std::string> test_graphs() {
-    return {"data/spaa/tiny/instance_k-means.hdag", 
-        "data/spaa/tiny/instance_bicgstab.hdag",
+    return {"data/spaa/tiny/instance_k-means.hdag",
+            "data/spaa/tiny/instance_bicgstab.hdag",
             "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag"};
 }
 
-
 /**
  * @brief Constructs a DAG with multiple identical, parallel pipelines.
  *
@@ -73,11 +71,13 @@ std::vector<std::string> test_graphs() {
  * @param pipeline_len The length of each pipeline.
  * @return A Graph_t object representing the DAG.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline Graph_t construct_multi_pipeline_dag(unsigned num_pipelines, unsigned pipeline_len) {
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG");
     Graph_t dag;
-    if (num_pipelines == 0 || pipeline_len == 0) return dag;
+    if (num_pipelines == 0 || pipeline_len == 0) {
+        return dag;
+    }
 
     for (unsigned i = 0; i < num_pipelines; ++i) {
         for (unsigned j = 0; j < pipeline_len; ++j) {
@@ -104,15 +104,17 @@ inline Graph_t construct_multi_pipeline_dag(unsigned num_pipelines, unsigned pip
  * @param num_rungs The number of rungs in the ladder.
  * @return A Graph_t object representing the DAG.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline Graph_t construct_ladder_dag(unsigned num_rungs) {
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG");
     Graph_t dag;
-    if (num_rungs == 0) return dag;
+    if (num_rungs == 0) {
+        return dag;
+    }
 
     for (unsigned i = 0; i < num_rungs + 1; ++i) {
-        dag.add_vertex(10, 1, 1); // Left side node
-        dag.add_vertex(20, 1, 1); // Right side node
+        dag.add_vertex(10, 1, 1);    // Left side node
+        dag.add_vertex(20, 1, 1);    // Right side node
     }
 
     for (unsigned i = 0; i < num_rungs; ++i) {
@@ -138,7 +140,7 @@ inline Graph_t construct_ladder_dag(unsigned num_rungs) {
  * @param num_nodes The number of nodes in the chain.
  * @return A Graph_t object representing the DAG.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline Graph_t construct_asymmetric_dag(unsigned num_nodes) {
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG");
     Graph_t dag;
@@ -157,12 +159,14 @@ inline Graph_t construct_asymmetric_dag(unsigned num_nodes) {
  * @param height The height of the tree. A height of 0 is a single node. Total nodes: 2^(height+1) - 1.
  * @return A Graph_t object representing the out-tree.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline Graph_t construct_binary_out_tree(unsigned height) {
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG");
     Graph_t dag;
     unsigned num_nodes = (1U << (height + 1)) - 1;
-    if (num_nodes == 0) return dag;
+    if (num_nodes == 0) {
+        return dag;
+    }
 
     for (unsigned i = 0; i < num_nodes; ++i) {
         dag.add_vertex(10, 1, 1);
@@ -181,12 +185,14 @@ inline Graph_t construct_binary_out_tree(unsigned height) {
  * @param height The height of the tree. A height of 0 is a single node. Total nodes: 2^(height+1) - 1.
  * @return A Graph_t object representing the in-tree.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline Graph_t construct_binary_in_tree(unsigned height) {
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG");
     Graph_t dag;
     unsigned num_nodes = (1U << (height + 1)) - 1;
-    if (num_nodes == 0) return dag;
+    if (num_nodes == 0) {
+        return dag;
+    }
 
     for (unsigned i = 0; i < num_nodes; ++i) {
         dag.add_vertex(10, 1, 1);
@@ -206,11 +212,13 @@ inline Graph_t construct_binary_in_tree(unsigned height) {
  * @param cols The number of columns in the grid.
  * @return A Graph_t object representing the grid.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline Graph_t construct_grid_dag(unsigned rows, unsigned cols) {
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG");
     Graph_t dag;
-    if (rows == 0 || cols == 0) return dag;
+    if (rows == 0 || cols == 0) {
+        return dag;
+    }
 
     for (unsigned i = 0; i < rows * cols; ++i) {
         dag.add_vertex(10, 1, 1);
@@ -218,8 +226,12 @@ inline Graph_t construct_grid_dag(unsigned rows, unsigned cols) {
 
     for (unsigned r = 0; r < rows; ++r) {
         for (unsigned c = 0; c < cols; ++c) {
-            if (r + 1 < rows) dag.add_edge(r * cols + c, (r + 1) * cols + c);
-            if (c + 1 < cols) dag.add_edge(r * cols + c, r * cols + (c + 1));
+            if (r + 1 < rows) {
+                dag.add_edge(r * cols + c, (r + 1) * cols + c);
+            }
+            if (c + 1 < cols) {
+                dag.add_edge(r * cols + c, r * cols + (c + 1));
+            }
         }
     }
     return dag;
@@ -231,11 +243,13 @@ inline Graph_t construct_grid_dag(unsigned rows, unsigned cols) {
  * @param stages The number of stages (log2 of the number of inputs). Total nodes: (stages+1) * 2^stages.
  * @return A Graph_t object representing the butterfly graph.
  */
-template<typename Graph_t>
+template <typename Graph_t>
 inline Graph_t construct_butterfly_dag(unsigned stages) {
     static_assert(is_constructable_cdag_v<Graph_t>, "Graph_t must be a constructable computational DAG");
     Graph_t dag;
-    if (stages == 0) return dag;
+    if (stages == 0) {
+        return dag;
+    }
 
     unsigned N = 1U << stages;
     for (unsigned i = 0; i < (stages + 1) * N; ++i) {
@@ -254,4 +268,4 @@ inline Graph_t construct_butterfly_dag(unsigned stages) {
     return dag;
 }
 
-} // namespace osp::test
\ No newline at end of file
+}    // namespace osp
diff --git a/tests/transitive_reduction.cpp b/tests/transitive_reduction.cpp
index b2758f31..272b3807 100644
--- a/tests/transitive_reduction.cpp
+++ b/tests/transitive_reduction.cpp
@@ -17,12 +17,13 @@ limitations under the License.
 */
 
 #define BOOST_TEST_MODULE TransitiveReduction
+#include "osp/graph_algorithms/transitive_reduction.hpp"
+
 #include <boost/test/unit_test.hpp>
 
-#include "test_graphs.hpp"
-#include "osp/graph_algorithms/transitive_reduction.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/graph_algorithms/subgraph_algorithms.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+#include "test_graphs.hpp"
 
 using namespace osp;
 using graph_t = computational_dag_vector_impl_def_t;
@@ -32,12 +33,12 @@ BOOST_AUTO_TEST_SUITE(TransitiveReduction)
 // Test with a simple chain graph that has a transitive edge
 BOOST_AUTO_TEST_CASE(SimpleTransitiveEdge) {
     graph_t dag;
-    dag.add_vertex(1, 1, 1); // 0
-    dag.add_vertex(1, 1, 1); // 1
-    dag.add_vertex(1, 1, 1); // 2
+    dag.add_vertex(1, 1, 1);    // 0
+    dag.add_vertex(1, 1, 1);    // 1
+    dag.add_vertex(1, 1, 1);    // 2
     dag.add_edge(0, 1);
     dag.add_edge(1, 2);
-    dag.add_edge(0, 2); // Transitive edge
+    dag.add_edge(0, 2);    // Transitive edge
 
     BOOST_REQUIRE_EQUAL(dag.num_vertices(), 3);
     BOOST_REQUIRE_EQUAL(dag.num_edges(), 3);
@@ -56,7 +57,7 @@ BOOST_AUTO_TEST_CASE(SimpleTransitiveEdge) {
 
 // Test with a graph that has no transitive edges
 BOOST_AUTO_TEST_CASE(NoTransitiveEdges) {
-    const auto dag = construct_ladder_dag<graph_t>(3); // A ladder graph has no transitive edges
+    const auto dag = construct_ladder_dag<graph_t>(3);    // A ladder graph has no transitive edges
     BOOST_REQUIRE_EQUAL(dag.num_vertices(), 8);
     BOOST_REQUIRE_EQUAL(dag.num_edges(), 11);
 
@@ -78,11 +79,11 @@ BOOST_AUTO_TEST_CASE(ComplexGraph) {
     // 2 -> 3
     // 3 -> 4
     // 0 -> 4 (transitive)
-    dag.add_vertex(1, 1, 1); // 0
-    dag.add_vertex(1, 1, 1); // 1
-    dag.add_vertex(1, 1, 1); // 2
-    dag.add_vertex(1, 1, 1); // 3
-    dag.add_vertex(1, 1, 1); // 4
+    dag.add_vertex(1, 1, 1);    // 0
+    dag.add_vertex(1, 1, 1);    // 1
+    dag.add_vertex(1, 1, 1);    // 2
+    dag.add_vertex(1, 1, 1);    // 3
+    dag.add_vertex(1, 1, 1);    // 4
 
     dag.add_edge(0, 1);
     dag.add_edge(0, 2);
@@ -90,8 +91,8 @@ BOOST_AUTO_TEST_CASE(ComplexGraph) {
     dag.add_edge(2, 3);
     dag.add_edge(3, 4);
     // Add transitive edges
-    dag.add_edge(0, 3); // transitive via 0->1->3 or 0->2->3
-    dag.add_edge(0, 4); // transitive via 0->...->3->4
+    dag.add_edge(0, 3);    // transitive via 0->1->3 or 0->2->3
+    dag.add_edge(0, 4);    // transitive via 0->...->3->4
 
     BOOST_REQUIRE_EQUAL(dag.num_vertices(), 5);
     BOOST_REQUIRE_EQUAL(dag.num_edges(), 7);
diff --git a/tests/trimmed_group_scheduler.cpp b/tests/trimmed_group_scheduler.cpp
index ccbfee8a..63ba9ed9 100644
--- a/tests/trimmed_group_scheduler.cpp
+++ b/tests/trimmed_group_scheduler.cpp
@@ -30,7 +30,7 @@ using namespace osp;
 using graph_t = computational_dag_vector_impl_def_t;
 
 // Mock SubScheduler for TrimmedGroupScheduler tests
-template<typename Constr_Graph_t>
+template <typename Constr_Graph_t>
 class MockSubScheduler : public Scheduler<Constr_Graph_t> {
   public:
     // This mock scheduler assigns all nodes to local processor 0 and superstep 0.
@@ -57,7 +57,7 @@ struct TrimmedGroupSchedulerFixture {
         // Default architecture: 1 processor type, 100 memory bound
         arch.setCommunicationCosts(1);
         arch.setSynchronisationCosts(1);
-        instance.setAllOnesCompatibilityMatrix(); // All node types compatible with all processor types
+        instance.setAllOnesCompatibilityMatrix();    // All node types compatible with all processor types
     }
 };
 
@@ -78,9 +78,9 @@ BOOST_AUTO_TEST_CASE(EmptyGraphTest) {
 
 BOOST_AUTO_TEST_CASE(SingleComponentSingleProcessorTypeTest) {
     // Graph: 0-1-2 (single component)
-    dag.add_vertex(1, 1, 1, 0); // 0
-    dag.add_vertex(1, 1, 1, 0); // 1
-    dag.add_vertex(1, 1, 1, 0); // 2
+    dag.add_vertex(1, 1, 1, 0);    // 0
+    dag.add_vertex(1, 1, 1, 0);    // 1
+    dag.add_vertex(1, 1, 1, 0);    // 2
     dag.add_edge(0, 1);
     dag.add_edge(1, 2);
     instance.getComputationalDag() = dag;
@@ -109,10 +109,10 @@ BOOST_AUTO_TEST_CASE(SingleComponentSingleProcessorTypeTest) {
 
 BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeEvenDistributionTest) {
     // Graph: 0-1 (component 0), 2-3 (component 1)
-    dag.add_vertex(1, 1, 1, 0); // 0
-    dag.add_vertex(1, 1, 1, 0); // 1
-    dag.add_vertex(1, 1, 1, 0); // 2
-    dag.add_vertex(1, 1, 1, 0); // 3
+    dag.add_vertex(1, 1, 1, 0);    // 0
+    dag.add_vertex(1, 1, 1, 0);    // 1
+    dag.add_vertex(1, 1, 1, 0);    // 2
+    dag.add_vertex(1, 1, 1, 0);    // 3
     dag.add_edge(0, 1);
     dag.add_edge(2, 3);
     instance.getComputationalDag() = dag;
@@ -147,9 +147,9 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeEvenDistributionTest)
 
 BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeUnevenDistributionTest) {
     // Graph: 0 (component 0), 1 (component 1), 2 (component 2) - all isolated
-    dag.add_vertex(1, 1, 1, 0); // 0
-    dag.add_vertex(1, 1, 1, 0); // 1
-    dag.add_vertex(1, 1, 1, 0); // 2
+    dag.add_vertex(1, 1, 1, 0);    // 0
+    dag.add_vertex(1, 1, 1, 0);    // 1
+    dag.add_vertex(1, 1, 1, 0);    // 2
     instance.getComputationalDag() = dag;
 
     // Architecture: 6 processors of type 0
@@ -184,14 +184,14 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeUnevenDistributionTest
 
 BOOST_AUTO_TEST_CASE(MultipleComponentsHeterogeneousArchitectureTest) {
     // Graph: 0 (type 0), 1 (type 1) - isolated nodes
-    dag.add_vertex(1, 1, 1, 0); // 0 (component 0, type 0)
-    dag.add_vertex(1, 1, 1, 1); // 1 (component 1, type 1)
+    dag.add_vertex(1, 1, 1, 0);    // 0 (component 0, type 0)
+    dag.add_vertex(1, 1, 1, 1);    // 1 (component 1, type 1)
     instance.getComputationalDag() = dag;
 
     // Architecture: 2 processors of type 0 (global 0,1), 2 processors of type 1 (global 2,3)
     arch.setProcessorsWithTypes({0, 0, 1, 1});
     instance.getArchitecture() = arch;
-    instance.setDiagonalCompatibilityMatrix(2); // Node type 0 compatible with proc type 0, etc.
+    instance.setDiagonalCompatibilityMatrix(2);    // Node type 0 compatible with proc type 0, etc.
 
     // min_non_zero_procs_ = 2 (2 components, 2 groups)
     // sub_proc_counts for type 0: 2 / 2 = 1
diff --git a/tests/uf_structures.cpp b/tests/uf_structures.cpp
index 61245443..f19bc9be 100644
--- a/tests/uf_structures.cpp
+++ b/tests/uf_structures.cpp
@@ -16,15 +16,14 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-
 #define BOOST_TEST_MODULE Union_Find
 #include <boost/test/unit_test.hpp>
-
-#include "osp/auxiliary/datastructures/union_find.hpp"
 #include <iostream>
 #include <string>
 #include <vector>
 
+#include "osp/auxiliary/datastructures/union_find.hpp"
+
 using namespace osp;
 
 BOOST_AUTO_TEST_CASE(Union_find_structure1) {
@@ -175,21 +174,20 @@ BOOST_AUTO_TEST_CASE(Union_find_weight_structure) {
     BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("e"), 2);
     BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("b"), 7);
 
-    std::vector<std::pair<std::vector<std::string>, unsigned>> components_n_weights =
-        test_universe.get_connected_components_and_weights();
+    std::vector<std::pair<std::vector<std::string>, unsigned>> components_n_weights
+        = test_universe.get_connected_components_and_weights();
     unsigned total_comp_weights = 0;
     unsigned total_elements = 0;
     for (auto &[comp, wt] : components_n_weights) {
         total_comp_weights += wt;
         total_elements += static_cast<unsigned>(comp.size());
         for (auto &name : comp) {
-            BOOST_CHECK(std::any_of(names.cbegin(), names.cend(),
-                                    [name](std::string other_name) { return name == other_name; }));
+            BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; }));
         }
     }
 
-    std::vector<std::tuple<std::vector<std::string>, unsigned, unsigned>> components_n_weights_n_memory =
-        test_universe.get_connected_components_weights_and_memories();
+    std::vector<std::tuple<std::vector<std::string>, unsigned, unsigned>> components_n_weights_n_memory
+        = test_universe.get_connected_components_weights_and_memories();
     unsigned total_comp_weights_2 = 0;
     unsigned total_comp_memory = 0;
     unsigned total_elements_2 = 0;
@@ -198,8 +196,7 @@ BOOST_AUTO_TEST_CASE(Union_find_weight_structure) {
         total_comp_memory += mem;
         total_elements_2 += static_cast<unsigned>(comp.size());
         for (auto &name : comp) {
-            BOOST_CHECK(std::any_of(names.cbegin(), names.cend(),
-                                    [name](std::string other_name) { return name == other_name; }));
+            BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; }));
         }
     }
 
@@ -215,9 +212,11 @@ BOOST_AUTO_TEST_CASE(Union_find_weight_structure) {
     BOOST_CHECK_EQUAL(total_weight, total_comp_memory);
 
     for (auto &name : names) {
-        BOOST_CHECK(std::any_of(components_n_weights.cbegin(), components_n_weights.cend(),
+        BOOST_CHECK(std::any_of(components_n_weights.cbegin(),
+                                components_n_weights.cend(),
                                 [name](std::pair<std::vector<std::string>, unsigned> comp_pair) {
-                                    return std::any_of(comp_pair.first.cbegin(), comp_pair.first.cend(),
+                                    return std::any_of(comp_pair.first.cbegin(),
+                                                       comp_pair.first.cend(),
                                                        [name](std::string other_name) { return name == other_name; });
                                 }));
     }
@@ -263,36 +262,35 @@ BOOST_AUTO_TEST_CASE(Union_find_structure_weight_comp_count) {
     BOOST_CHECK_NE(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("e"));
     BOOST_CHECK_NE(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("d"));
 
-    std::vector<std::pair<std::vector<std::string>, unsigned>> comp_n_weights =
-        test_universe.get_connected_components_and_weights();
+    std::vector<std::pair<std::vector<std::string>, unsigned>> comp_n_weights
+        = test_universe.get_connected_components_and_weights();
     BOOST_CHECK(comp_n_weights.size() == 2);
     BOOST_CHECK(comp_n_weights.size() == test_universe.get_number_of_connected_components());
     BOOST_CHECK(comp_n_weights[0].first.size() == 3);
     BOOST_CHECK(comp_n_weights[1].first.size() == 3);
-    BOOST_CHECK((comp_n_weights[0].second == 4 && comp_n_weights[1].second == 5) ||
-                (comp_n_weights[0].second == 5 && comp_n_weights[1].second == 4));
+    BOOST_CHECK((comp_n_weights[0].second == 4 && comp_n_weights[1].second == 5)
+                || (comp_n_weights[0].second == 5 && comp_n_weights[1].second == 4));
 
-    std::vector<std::tuple<std::vector<std::string>, unsigned, unsigned>> comp_n_weight_n_memory =
-        test_universe.get_connected_components_weights_and_memories();
+    std::vector<std::tuple<std::vector<std::string>, unsigned, unsigned>> comp_n_weight_n_memory
+        = test_universe.get_connected_components_weights_and_memories();
     BOOST_CHECK(comp_n_weight_n_memory.size() == 2);
     BOOST_CHECK(comp_n_weight_n_memory.size() == test_universe.get_number_of_connected_components());
     BOOST_CHECK(std::get<0>(comp_n_weight_n_memory[0]).size() == 3);
     BOOST_CHECK(std::get<0>(comp_n_weight_n_memory[1]).size() == 3);
-    BOOST_CHECK((std::get<1>(comp_n_weight_n_memory[0]) == 4 && std::get<1>(comp_n_weight_n_memory[1]) == 5) ||
-                (std::get<1>(comp_n_weight_n_memory[0]) == 5 && std::get<1>(comp_n_weight_n_memory[1]) == 4));
-    BOOST_CHECK((std::get<2>(comp_n_weight_n_memory[0]) == 4 && std::get<2>(comp_n_weight_n_memory[1]) == 5) ||
-                (std::get<2>(comp_n_weight_n_memory[0]) == 5 && std::get<2>(comp_n_weight_n_memory[1]) == 4));
+    BOOST_CHECK((std::get<1>(comp_n_weight_n_memory[0]) == 4 && std::get<1>(comp_n_weight_n_memory[1]) == 5)
+                || (std::get<1>(comp_n_weight_n_memory[0]) == 5 && std::get<1>(comp_n_weight_n_memory[1]) == 4));
+    BOOST_CHECK((std::get<2>(comp_n_weight_n_memory[0]) == 4 && std::get<2>(comp_n_weight_n_memory[1]) == 5)
+                || (std::get<2>(comp_n_weight_n_memory[0]) == 5 && std::get<2>(comp_n_weight_n_memory[1]) == 4));
 
-    std::vector<std::pair<std::vector<std::string>, unsigned>> components_n_weights =
-        test_universe.get_connected_components_and_weights();
+    std::vector<std::pair<std::vector<std::string>, unsigned>> components_n_weights
+        = test_universe.get_connected_components_and_weights();
     unsigned total_comp_weights = 0;
     unsigned total_elements = 0;
     for (auto &[comp, wt] : components_n_weights) {
         total_comp_weights += wt;
         total_elements += static_cast<unsigned>(comp.size());
         for (auto &name : comp) {
-            BOOST_CHECK(std::any_of(names.cbegin(), names.cend(),
-                                    [name](std::string other_name) { return name == other_name; }));
+            BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; }));
         }
     }
 
@@ -304,9 +302,11 @@ BOOST_AUTO_TEST_CASE(Union_find_structure_weight_comp_count) {
     BOOST_CHECK_EQUAL(total_elements, names.size());
     BOOST_CHECK_EQUAL(total_weight, total_comp_weights);
     for (auto &name : names) {
-        BOOST_CHECK(std::any_of(components_n_weights.cbegin(), components_n_weights.cend(),
+        BOOST_CHECK(std::any_of(components_n_weights.cbegin(),
+                                components_n_weights.cend(),
                                 [name](std::pair<std::vector<std::string>, unsigned> comp_pair) {
-                                    return std::any_of(comp_pair.first.cbegin(), comp_pair.first.cend(),
+                                    return std::any_of(comp_pair.first.cbegin(),
+                                                       comp_pair.first.cend(),
                                                        [name](std::string other_name) { return name == other_name; });
                                 }));
     }
@@ -366,16 +366,15 @@ BOOST_AUTO_TEST_CASE(Union_find_structure_weight_chains_comp_count) {
     BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("e"), test_universe.find_origin_by_name("h"));
     BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("i"));
 
-    std::vector<std::pair<std::vector<std::string>, unsigned>> components_n_weights =
-        test_universe.get_connected_components_and_weights();
+    std::vector<std::pair<std::vector<std::string>, unsigned>> components_n_weights
+        = test_universe.get_connected_components_and_weights();
     unsigned total_comp_weights = 0;
     unsigned total_elements = 0;
     for (auto &[comp, wt] : components_n_weights) {
         total_comp_weights += wt;
         total_elements += static_cast<unsigned>(comp.size());
         for (auto &name : comp) {
-            BOOST_CHECK(std::any_of(names.cbegin(), names.cend(),
-                                    [name](std::string other_name) { return name == other_name; }));
+            BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; }));
         }
     }
 
@@ -387,9 +386,11 @@ BOOST_AUTO_TEST_CASE(Union_find_structure_weight_chains_comp_count) {
     BOOST_CHECK_EQUAL(total_elements, names.size());
     BOOST_CHECK_EQUAL(total_weight, total_comp_weights);
     for (auto &name : names) {
-        BOOST_CHECK(std::any_of(components_n_weights.cbegin(), components_n_weights.cend(),
+        BOOST_CHECK(std::any_of(components_n_weights.cbegin(),
+                                components_n_weights.cend(),
                                 [name](std::pair<std::vector<std::string>, unsigned> comp_pair) {
-                                    return std::any_of(comp_pair.first.cbegin(), comp_pair.first.cend(),
+                                    return std::any_of(comp_pair.first.cbegin(),
+                                                       comp_pair.first.cend(),
                                                        [name](std::string other_name) { return name == other_name; });
                                 }));
     }
diff --git a/tests/wavefront_component_divider.cpp b/tests/wavefront_component_divider.cpp
index e2e3be52..37cd28fa 100644
--- a/tests/wavefront_component_divider.cpp
+++ b/tests/wavefront_component_divider.cpp
@@ -16,15 +16,15 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-
 #define BOOST_TEST_MODULE SequenceSplitterTest
 #include <boost/test/unit_test.hpp>
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/dag_divider/wavefront_divider/SequenceSplitter.hpp" 
-#include "osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp"
-#include "osp/dag_divider/wavefront_divider/SequenceGenerator.hpp" 
-#include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp"
+
 #include "osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp"
+#include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp"
+#include "osp/dag_divider/wavefront_divider/SequenceGenerator.hpp"
+#include "osp/dag_divider/wavefront_divider/SequenceSplitter.hpp"
+#include "osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 
 BOOST_AUTO_TEST_CASE(VarianceSplitterTest) {
     osp::VarianceSplitter splitter(0.8, 0.1);
@@ -76,7 +76,7 @@ BOOST_AUTO_TEST_CASE(LargestStepSplitterTest) {
     std::vector<size_t> splits3 = splitter.split(seq3);
     std::vector<size_t> expected3 = {3};
     BOOST_CHECK_EQUAL_COLLECTIONS(splits3.begin(), splits3.end(), expected3.begin(), expected3.end());
-    
+
     // Test case 4: Sequence too short
     std::vector<double> seq4 = {1, 10};
     std::vector<size_t> splits4 = splitter.split(seq4);
@@ -114,7 +114,7 @@ BOOST_AUTO_TEST_CASE(ThresholdScanSplitterTest) {
     std::vector<double> seq4 = {1, 2, 3, 4, 5};
     std::vector<size_t> splits4 = splitter.split(seq4);
     BOOST_CHECK(splits4.empty());
-    
+
     // Test case 5: Empty sequence
     std::vector<double> seq5 = {};
     std::vector<size_t> splits5 = splitter.split(seq5);
@@ -125,7 +125,6 @@ using graph = osp::computational_dag_edge_idx_vector_impl_def_int_t;
 using VertexType = graph::vertex_idx;
 
 BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) {
-    
     graph dag;
     const auto v1 = dag.add_vertex(2, 1, 9);
     const auto v2 = dag.add_vertex(3, 1, 8);
@@ -133,7 +132,7 @@ BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) {
     const auto v4 = dag.add_vertex(5, 1, 6);
     const auto v5 = dag.add_vertex(6, 1, 5);
     const auto v6 = dag.add_vertex(7, 1, 4);
-    const auto v7 = dag.add_vertex(8, 1, 3); // Note: v7 is not connected in the example
+    const auto v7 = dag.add_vertex(8, 1, 3);    // Note: v7 is not connected in the example
     const auto v8 = dag.add_vertex(9, 1, 2);
 
     dag.add_edge(v1, v2);
@@ -148,11 +147,11 @@ BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) {
 
     // Manually defined level sets for this DAG
     const std::vector<std::vector<VertexType>> level_sets = {
-        {v1},       // Level 0
+        {v1}, // Level 0
         {v2, v3, v4}, // Level 1
-        {v5, v6},   // Level 2
-        {v8},       // Level 3
-        {v7}        // Level 4 (isolated vertex)
+        {v5, v6}, // Level 2
+        {v8}, // Level 3
+        {v7}  // Level 4 (isolated vertex)
     };
 
     osp::WavefrontStatisticsCollector<graph> collector(dag, level_sets);
@@ -168,43 +167,42 @@ BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) {
 
     // Level 1
     BOOST_CHECK_EQUAL(forward_stats[1].connected_components_vertices.size(), 1);
-    BOOST_CHECK_EQUAL(forward_stats[1].connected_components_weights[0], 2 + 3 + 4 + 5); // v1,v2,v3,v4
+    BOOST_CHECK_EQUAL(forward_stats[1].connected_components_weights[0], 2 + 3 + 4 + 5);    // v1,v2,v3,v4
     BOOST_CHECK_EQUAL(forward_stats[1].connected_components_memories[0], 9 + 8 + 7 + 6);
 
     // Level 2
     BOOST_CHECK_EQUAL(forward_stats[2].connected_components_vertices.size(), 1);
-    BOOST_CHECK_EQUAL(forward_stats[2].connected_components_weights[0], 14 + 6 + 7); // v1-v6
+    BOOST_CHECK_EQUAL(forward_stats[2].connected_components_weights[0], 14 + 6 + 7);    // v1-v6
     BOOST_CHECK_EQUAL(forward_stats[2].connected_components_memories[0], 30 + 5 + 4);
 
     // Level 3
     BOOST_CHECK_EQUAL(forward_stats[3].connected_components_vertices.size(), 1);
-    BOOST_CHECK_EQUAL(forward_stats[3].connected_components_weights[0], 27 + 9); // v1-v6, v8
+    BOOST_CHECK_EQUAL(forward_stats[3].connected_components_weights[0], 27 + 9);    // v1-v6, v8
     BOOST_CHECK_EQUAL(forward_stats[3].connected_components_memories[0], 39 + 2);
 
     // Level 4 (isolated vertex shows up as a new component)
     BOOST_CHECK_EQUAL(forward_stats[4].connected_components_vertices.size(), 2);
 
-
     // --- Test Backward Pass ---
     auto backward_stats = collector.compute_backward();
     BOOST_REQUIRE_EQUAL(backward_stats.size(), 5);
 
     // Level 4
     BOOST_CHECK_EQUAL(backward_stats[4].connected_components_vertices.size(), 1);
-    BOOST_CHECK_EQUAL(backward_stats[4].connected_components_weights[0], 8); // v7
+    BOOST_CHECK_EQUAL(backward_stats[4].connected_components_weights[0], 8);    // v7
     BOOST_CHECK_EQUAL(backward_stats[4].connected_components_memories[0], 3);
 
     // Level 3
-    BOOST_CHECK_EQUAL(backward_stats[3].connected_components_vertices.size(), 2); // {v8}, {v7}
+    BOOST_CHECK_EQUAL(backward_stats[3].connected_components_vertices.size(), 2);    // {v8}, {v7}
 
     // Level 2
-    BOOST_CHECK_EQUAL(backward_stats[2].connected_components_vertices.size(), 3); // {v5,v8}, {v6}, {v7}
+    BOOST_CHECK_EQUAL(backward_stats[2].connected_components_vertices.size(), 3);    // {v5,v8}, {v6}, {v7}
 
     // Level 1
-    BOOST_CHECK_EQUAL(backward_stats[1].connected_components_vertices.size(), 2); // {v2,v3,v4,v5,v6,v8}, {v7}
+    BOOST_CHECK_EQUAL(backward_stats[1].connected_components_vertices.size(), 2);    // {v2,v3,v4,v5,v6,v8}, {v7}
 
     // Level 0
-    BOOST_CHECK_EQUAL(backward_stats[0].connected_components_vertices.size(), 2); // {v1-v6,v8}, {v7}
+    BOOST_CHECK_EQUAL(backward_stats[0].connected_components_vertices.size(), 2);    // {v1-v6,v8}, {v7}
 }
 
 BOOST_AUTO_TEST_CASE(SequenceGenerationTest) {
@@ -216,7 +214,7 @@ BOOST_AUTO_TEST_CASE(SequenceGenerationTest) {
     const auto v4 = dag.add_vertex(5, 1, 6);
     const auto v5 = dag.add_vertex(6, 1, 5);
     const auto v6 = dag.add_vertex(7, 1, 4);
-    const auto v7 = dag.add_vertex(8, 1, 3); // Isolated vertex
+    const auto v7 = dag.add_vertex(8, 1, 3);    // Isolated vertex
     const auto v8 = dag.add_vertex(9, 1, 2);
 
     dag.add_edge(v1, v2);
@@ -230,7 +228,11 @@ BOOST_AUTO_TEST_CASE(SequenceGenerationTest) {
     dag.add_edge(v4, v8);
 
     const std::vector<std::vector<VertexType>> level_sets = {
-        {v1}, {v2, v3, v4}, {v5, v6}, {v8}, {v7}
+        {v1},
+        {v2, v3, v4},
+        {v5, v6},
+        {v8},
+        {v7}
     };
 
     osp::SequenceGenerator<graph> generator(dag, level_sets);
@@ -238,12 +240,12 @@ BOOST_AUTO_TEST_CASE(SequenceGenerationTest) {
     // --- Test Component Count ---
     auto component_seq = generator.generate(osp::SequenceMetric::COMPONENT_COUNT);
     std::vector<double> expected_components = {1.0, 1.0, 1.0, 1.0, 2.0};
-    BOOST_CHECK_EQUAL_COLLECTIONS(component_seq.begin(), component_seq.end(),
-                                  expected_components.begin(), expected_components.end());
+    BOOST_CHECK_EQUAL_COLLECTIONS(
+        component_seq.begin(), component_seq.end(), expected_components.begin(), expected_components.end());
 
     // --- Test Available Parallelism ---
     auto parallelism_seq = generator.generate(osp::SequenceMetric::AVAILABLE_PARALLELISM);
-    
+
     // Manual calculation for expected values:
     // L0: 2 / 1 = 2
     // L1: (2 + 3+4+5) / 2 = 14 / 2 = 7
@@ -276,9 +278,9 @@ struct TestFixture {
         const auto v4 = dag.add_vertex(5, 1, 6);
         const auto v5 = dag.add_vertex(6, 1, 5);
         const auto v6 = dag.add_vertex(7, 1, 4);
-        const auto v7 = dag.add_vertex(8, 1, 3); // Isolated vertex
+        const auto v7 = dag.add_vertex(8, 1, 3);    // Isolated vertex
         const auto v8 = dag.add_vertex(9, 1, 2);
-        
+
         vertices = {v1, v2, v3, v4, v5, v6, v7, v8};
 
         dag.add_edge(v1, v2);
@@ -298,7 +300,7 @@ BOOST_FIXTURE_TEST_SUITE(ScanWavefrontDividerTestSuite, TestFixture)
 BOOST_AUTO_TEST_CASE(LargestStepDivisionTest) {
     osp::ScanWavefrontDivider<graph> divider;
     divider.set_metric(osp::SequenceMetric::AVAILABLE_PARALLELISM);
-    divider.use_largest_step_splitter(0.9,1);
+    divider.use_largest_step_splitter(0.9, 1);
 
     auto sections = divider.divide(dag);
 
@@ -310,14 +312,14 @@ BOOST_AUTO_TEST_CASE(LargestStepDivisionTest) {
 
     // Section 2: levels 1, 2, 3. The rest of the main component.
     BOOST_REQUIRE_EQUAL(sections[1].size(), 1);
-    BOOST_CHECK_EQUAL(sections[1][0].size(), 6); // v2,v3,v4,v5,v6,v8
+    BOOST_CHECK_EQUAL(sections[1][0].size(), 6);    // v2,v3,v4,v5,v6,v8
 }
 
 BOOST_AUTO_TEST_CASE(ThresholdScanDivisionTest) {
     osp::ScanWavefrontDivider<graph> divider;
     divider.set_metric(osp::SequenceMetric::AVAILABLE_PARALLELISM);
     divider.use_threshold_scan_splitter(2.0, 11.5);
-   
+
     auto sections = divider.divide(dag);
 
     // A cut is expected when the sequence crosses 11.5 (at level 2) and crosses back (at level 3)
@@ -333,17 +335,16 @@ BOOST_AUTO_TEST_CASE(ThresholdScanDivisionTest) {
     BOOST_REQUIRE_EQUAL(sections[2].size(), 1);
 }
 
-
 BOOST_AUTO_TEST_CASE(NoCutDivisionTest) {
     osp::ScanWavefrontDivider<graph> divider;
     divider.set_metric(osp::SequenceMetric::COMPONENT_COUNT);
-    divider.use_largest_step_splitter(2.0, 2); 
+    divider.use_largest_step_splitter(2.0, 2);
 
     auto sections = divider.divide(dag);
 
     // Expecting a single section containing all components
     BOOST_REQUIRE_EQUAL(sections.size(), 1);
-    BOOST_REQUIRE_EQUAL(sections[0].size(), 2); // Two final components
+    BOOST_REQUIRE_EQUAL(sections[0].size(), 2);    // Two final components
 }
 
 BOOST_AUTO_TEST_CASE(EmptyGraphTest) {
@@ -371,7 +372,7 @@ struct TestFixture_2 {
         const auto v5 = dag.add_vertex(1, 1, 1);
         const auto v6 = dag.add_vertex(1, 1, 1);
         const auto v7 = dag.add_vertex(1, 1, 1);
-        
+
         vertices = {v1, v2, v3, v4, v5, v6, v7};
 
         dag.add_edge(v1, v3);
@@ -399,7 +400,7 @@ struct TestFixture_SimpleMerge {
         const auto v4 = dag.add_vertex(1, 1, 1);
         const auto v5 = dag.add_vertex(1, 1, 1);
         const auto v6 = dag.add_vertex(1, 1, 1);
-        
+
         dag.add_edge(v0, v2);
         dag.add_edge(v1, v3);
         dag.add_edge(v2, v4);
@@ -412,9 +413,8 @@ struct TestFixture_SimpleMerge {
 BOOST_FIXTURE_TEST_SUITE(SimpleMergeTests, TestFixture_SimpleMerge)
 
 BOOST_AUTO_TEST_CASE(BasicRecursionTest) {
-
     osp::RecursiveWavefrontDivider<graph> divider;
-    divider.use_largest_step_splitter(0.5, 1); 
+    divider.use_largest_step_splitter(0.5, 1);
     auto sections = divider.divide(dag);
 
     // Expecting a cut after level 2, where component count drops from 2 to 1.
@@ -453,8 +453,7 @@ BOOST_AUTO_TEST_CASE(MinSubsequenceLengthTest) {
 BOOST_AUTO_TEST_CASE(MaxDepthTest) {
     // Setting max_depth to 0 should prevent any recursion.
     osp::RecursiveWavefrontDivider<graph> divider;
-    divider.use_largest_step_splitter(0.5, 2)
-           .set_max_depth(0);
+    divider.use_largest_step_splitter(0.5, 2).set_max_depth(0);
     auto sections = divider.divide(dag);
 
     BOOST_REQUIRE_EQUAL(sections.size(), 1);
@@ -468,8 +467,7 @@ BOOST_AUTO_TEST_CASE(EmptyGraphTest) {
     BOOST_CHECK(sections.empty());
 }
 
-BOOST_AUTO_TEST_SUITE_END() 
-
+BOOST_AUTO_TEST_SUITE_END()
 
 // --- Test Fixture 2: A DAG with multiple merge points for deeper recursion ---
 struct TestFixture_MultiMerge {
@@ -479,18 +477,27 @@ struct TestFixture_MultiMerge {
         // Sequence: {4, 4, 2, 2, 1, 1}. Two significant drops.
         // L0: 4 comp -> L2: 2 comp (drop of 2)
         // L2: 2 comp -> L4: 1 comp (drop of 1)
-        const auto v_l0_1 = dag.add_vertex(1,1,1), v_l0_2 = dag.add_vertex(1,1,1), v_l0_3 = dag.add_vertex(1,1,1), v_l0_4 = dag.add_vertex(1,1,1);
-        const auto v_l1_1 = dag.add_vertex(1,1,1), v_l1_2 = dag.add_vertex(1,1,1), v_l1_3 = dag.add_vertex(1,1,1), v_l1_4 = dag.add_vertex(1,1,1);
-        const auto v_l2_1 = dag.add_vertex(1,1,1), v_l2_2 = dag.add_vertex(1,1,1);
-        const auto v_l3_1 = dag.add_vertex(1,1,1), v_l3_2 = dag.add_vertex(1,1,1);
-        const auto v_l4_1 = dag.add_vertex(1,1,1);
-        const auto v_l5_1 = dag.add_vertex(1,1,1);
-
-        dag.add_edge(v_l0_1, v_l1_1); dag.add_edge(v_l0_2, v_l1_2); dag.add_edge(v_l0_3, v_l1_3); dag.add_edge(v_l0_4, v_l1_4);
-        dag.add_edge(v_l1_1, v_l2_1); dag.add_edge(v_l1_2, v_l2_1);
-        dag.add_edge(v_l1_3, v_l2_2); dag.add_edge(v_l1_4, v_l2_2);
-        dag.add_edge(v_l2_1, v_l3_1); dag.add_edge(v_l2_2, v_l3_2);
-        dag.add_edge(v_l3_1, v_l4_1); dag.add_edge(v_l3_2, v_l4_1);
+        const auto v_l0_1 = dag.add_vertex(1, 1, 1), v_l0_2 = dag.add_vertex(1, 1, 1), v_l0_3 = dag.add_vertex(1, 1, 1),
+                   v_l0_4 = dag.add_vertex(1, 1, 1);
+        const auto v_l1_1 = dag.add_vertex(1, 1, 1), v_l1_2 = dag.add_vertex(1, 1, 1), v_l1_3 = dag.add_vertex(1, 1, 1),
+                   v_l1_4 = dag.add_vertex(1, 1, 1);
+        const auto v_l2_1 = dag.add_vertex(1, 1, 1), v_l2_2 = dag.add_vertex(1, 1, 1);
+        const auto v_l3_1 = dag.add_vertex(1, 1, 1), v_l3_2 = dag.add_vertex(1, 1, 1);
+        const auto v_l4_1 = dag.add_vertex(1, 1, 1);
+        const auto v_l5_1 = dag.add_vertex(1, 1, 1);
+
+        dag.add_edge(v_l0_1, v_l1_1);
+        dag.add_edge(v_l0_2, v_l1_2);
+        dag.add_edge(v_l0_3, v_l1_3);
+        dag.add_edge(v_l0_4, v_l1_4);
+        dag.add_edge(v_l1_1, v_l2_1);
+        dag.add_edge(v_l1_2, v_l2_1);
+        dag.add_edge(v_l1_3, v_l2_2);
+        dag.add_edge(v_l1_4, v_l2_2);
+        dag.add_edge(v_l2_1, v_l3_1);
+        dag.add_edge(v_l2_2, v_l3_2);
+        dag.add_edge(v_l3_1, v_l4_1);
+        dag.add_edge(v_l3_2, v_l4_1);
         dag.add_edge(v_l4_1, v_l5_1);
     }
 };
@@ -530,7 +537,6 @@ BOOST_AUTO_TEST_CASE(VarianceSplitterTest) {
     BOOST_REQUIRE_EQUAL(sections.size(), 3);
 }
 
+BOOST_AUTO_TEST_SUITE_END()    // End of MultiMergeTests
 
-BOOST_AUTO_TEST_SUITE_END() // End of MultiMergeTests
-
-BOOST_AUTO_TEST_SUITE_END() // End of DagDividerTestSuite
\ No newline at end of file
+BOOST_AUTO_TEST_SUITE_END()    // End of DagDividerTestSuite
diff --git a/tests/wavefront_divider.cpp b/tests/wavefront_divider.cpp
index 09caf82b..40b48783 100644
--- a/tests/wavefront_divider.cpp
+++ b/tests/wavefront_divider.cpp
@@ -19,40 +19,36 @@ limitations under the License.
 #define BOOST_TEST_MODULE wavefront_divider
 #include <boost/test/unit_test.hpp>
 
+#include "osp/auxiliary/io/dot_graph_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp"
-#include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp"
-#include "osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp"
 #include "osp/dag_divider/WavefrontComponentScheduler.hpp"
-#include "osp/auxiliary/io/dot_graph_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "test_utils.hpp"
-
+#include "osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp"
+#include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-
+#include "test_utils.hpp"
 
 using namespace osp;
 
 std::vector<std::string> test_graphs_dot() { return {"data/dot/smpl_dot_graph_1.dot"}; }
 
 std::vector<std::string> tiny_spaa_graphs() {
-    return {
-        "data/spaa/tiny/instance_bicgstab.hdag", "data/spaa/tiny/instance_CG_N2_K2_nzP0d75.hdag",
-                 "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag",
-                 "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag"
-    };
+    return {"data/spaa/tiny/instance_bicgstab.hdag",
+            "data/spaa/tiny/instance_CG_N2_K2_nzP0d75.hdag",
+            "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag",
+            "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag"};
 }
 
-template<typename Graph_t>
+template <typename Graph_t>
 bool check_vertex_maps(const std::vector<std::vector<std::vector<vertex_idx_t<Graph_t>>>> &maps, const Graph_t &dag) {
-
     std::unordered_set<vertex_idx_t<Graph_t>> all_vertices;
     for (const auto &step : maps) {
         for (const auto &subgraph : step) {
-
-            for (const auto &vertex : subgraph)
+            for (const auto &vertex : subgraph) {
                 all_vertices.insert(vertex);
+            }
         }
     }
 
@@ -60,7 +56,6 @@ bool check_vertex_maps(const std::vector<std::vector<std::vector<vertex_idx_t<Gr
 }
 
 BOOST_AUTO_TEST_CASE(wavefront_component_divider) {
-
     std::vector<std::string> filenames_graph = test_graphs_dot();
 
     const auto project_root = get_project_root();
@@ -74,25 +69,22 @@ BOOST_AUTO_TEST_CASE(wavefront_component_divider) {
         auto status_graph = file_reader::readComputationalDagDotFormat((project_root / filename_graph).string(), graph);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         } else {
             std::cout << "File read:" << filename_graph << std::endl;
         }
 
-        ScanWavefrontDivider<graph_t> wavefront;       
+        ScanWavefrontDivider<graph_t> wavefront;
         auto maps = wavefront.divide(graph);
 
         if (!maps.empty()) {
-
             BOOST_CHECK(check_vertex_maps(maps, graph));
         }
     }
 }
 
 BOOST_AUTO_TEST_CASE(wavefront_component_parallelism_divider) {
-
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
 
     const auto project_root = get_project_root();
@@ -106,7 +98,6 @@ BOOST_AUTO_TEST_CASE(wavefront_component_parallelism_divider) {
         auto status_graph = file_reader::readComputationalDagHyperdagFormatDB((project_root / filename_graph).string(), graph);
 
         if (!status_graph) {
-
             std::cout << "Reading files failed." << std::endl;
             BOOST_CHECK(false);
         } else {
@@ -115,12 +106,11 @@ BOOST_AUTO_TEST_CASE(wavefront_component_parallelism_divider) {
 
         ScanWavefrontDivider<graph_t> wavefront;
         wavefront.set_metric(SequenceMetric::AVAILABLE_PARALLELISM);
-        wavefront.use_variance_splitter(1.0,1.0,1);
+        wavefront.use_variance_splitter(1.0, 1.0, 1);
 
         auto maps = wavefront.divide(graph);
 
         if (!maps.empty()) {
-
             BOOST_CHECK(check_vertex_maps(maps, graph));
         }
     }
diff --git a/tests/wavefront_scheduler.cpp b/tests/wavefront_scheduler.cpp
index 3e1e2a29..3fb2b203 100644
--- a/tests/wavefront_scheduler.cpp
+++ b/tests/wavefront_scheduler.cpp
@@ -18,52 +18,51 @@ limitations under the License.
 
 #define BOOST_TEST_MODULE AbstractWavefrontSchedulerTest
 #include <boost/test/unit_test.hpp>
+
 #include "osp/dag_divider/AbstractWavefrontScheduler.hpp"
+#include "osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp"
 #include "osp/dag_divider/WavefrontComponentScheduler.hpp"
-#include "osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp" 
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 
 using graph_t = osp::computational_dag_edge_idx_vector_impl_def_t;
 
-
-template<typename Graph_t, typename constr_graph_t>
+template <typename Graph_t, typename constr_graph_t>
 class ConcreteWavefrontScheduler : public osp::AbstractWavefrontScheduler<Graph_t, constr_graph_t> {
-public:
-    ConcreteWavefrontScheduler(osp::IDagDivider<Graph_t>& div, osp::Scheduler<constr_graph_t>& sched)
+  public:
+    ConcreteWavefrontScheduler(osp::IDagDivider<Graph_t> &div, osp::Scheduler<constr_graph_t> &sched)
         : osp::AbstractWavefrontScheduler<Graph_t, constr_graph_t>(div, sched) {}
-    
+
     // Expose the protected method for testing with the new signature
-    bool test_distributeProcessors(
-        unsigned total_processors, 
-        const std::vector<double>& work_weights,
-        std::vector<unsigned>& allocation) const {
+    bool test_distributeProcessors(unsigned total_processors,
+                                   const std::vector<double> &work_weights,
+                                   std::vector<unsigned> &allocation) const {
         return this->distributeProcessors(total_processors, work_weights, allocation);
     }
 
     // Dummy implementation for the pure virtual method
-    osp::RETURN_STATUS computeSchedule(osp::BspSchedule<Graph_t>&) override {
-        return osp::RETURN_STATUS::OSP_SUCCESS;
-    }
+    osp::RETURN_STATUS computeSchedule(osp::BspSchedule<Graph_t> &) override { return osp::RETURN_STATUS::OSP_SUCCESS; }
+
     std::string getScheduleName() const override { return "ConcreteScheduler"; }
 };
 
 // Mock dependencies for the test
 struct MockDivider : public osp::IDagDivider<graph_t> {
-    std::vector<std::vector<std::vector<graph_t::vertex_idx>>> divide(const graph_t&) override { return {}; }
+    std::vector<std::vector<std::vector<graph_t::vertex_idx>>> divide(const graph_t &) override { return {}; }
 };
+
 struct MockScheduler : public osp::Scheduler<graph_t> {
-    osp::RETURN_STATUS computeSchedule(osp::BspSchedule<graph_t>&) override { return osp::RETURN_STATUS::OSP_SUCCESS; }
+    osp::RETURN_STATUS computeSchedule(osp::BspSchedule<graph_t> &) override { return osp::RETURN_STATUS::OSP_SUCCESS; }
+
     std::string getScheduleName() const override { return "Mock"; }
 };
 
-
 BOOST_AUTO_TEST_SUITE(AbstractWavefrontSchedulerTestSuite)
 
 BOOST_AUTO_TEST_CASE(DistributeProcessorsTest) {
     MockDivider mock_divider;
     MockScheduler mock_scheduler;
     ConcreteWavefrontScheduler<graph_t, graph_t> scheduler(mock_divider, mock_scheduler);
-    
+
     std::vector<unsigned> allocation;
     bool starvation_hit;
 
@@ -108,7 +107,7 @@ BOOST_AUTO_TEST_CASE(DistributeProcessorsTest) {
     std::vector<unsigned> expected6 = {0, 0, 0};
     BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected6.begin(), expected6.end());
     BOOST_CHECK(!starvation_hit);
-    
+
     // Test 7: Inactive components (work is zero)
     std::vector<double> work7 = {100.0, 0.0, 300.0, 0.0};
     starvation_hit = scheduler.test_distributeProcessors(8, work7, allocation);
@@ -139,12 +138,10 @@ BOOST_AUTO_TEST_CASE(DistributeProcessorsTest) {
     std::vector<unsigned> expected10 = {0, 1, 0};
     BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected10.begin(), expected10.end());
     BOOST_CHECK(starvation_hit);
-
 }
 
 BOOST_AUTO_TEST_SUITE_END()
 
-
 // Mock implementations for dependencies
 using graph_t = osp::computational_dag_edge_idx_vector_impl_def_t;
 using VertexType = graph_t::vertex_idx;
@@ -152,14 +149,13 @@ using VertexType = graph_t::vertex_idx;
 // A mock divider that returns a predictable set of sections.
 struct MockDivider_2 : public osp::IDagDivider<graph_t> {
     std::vector<std::vector<std::vector<VertexType>>> sections_to_return;
-    std::vector<std::vector<std::vector<VertexType>>> divide(const graph_t&) override { 
-        return sections_to_return; 
-    }
+
+    std::vector<std::vector<std::vector<VertexType>>> divide(const graph_t &) override { return sections_to_return; }
 };
 
 // A mock sub-scheduler that returns a simple, predictable schedule.
 struct MockSubScheduler : public osp::Scheduler<graph_t> {
-    osp::RETURN_STATUS computeSchedule(osp::BspSchedule<graph_t>& schedule) override {
+    osp::RETURN_STATUS computeSchedule(osp::BspSchedule<graph_t> &schedule) override {
         // Assign all tasks to the first processor in a single superstep
         for (VertexType v = 0; v < schedule.getInstance().getComputationalDag().num_vertices(); ++v) {
             schedule.setAssignedProcessor(v, 0);
@@ -168,6 +164,7 @@ struct MockSubScheduler : public osp::Scheduler<graph_t> {
         schedule.setNumberOfSupersteps(1);
         return osp::RETURN_STATUS::OSP_SUCCESS;
     }
+
     std::string getScheduleName() const override { return "MockSubScheduler"; }
 };
 
@@ -180,10 +177,10 @@ struct TestFixture {
     TestFixture() {
         // A simple DAG: v0 -> v1, v2 -> v3
         // Two components that will be in the same wavefront set.
-        dag.add_vertex(10, 1, 1); // v0
-        dag.add_vertex(20, 1, 1); // v1
-        dag.add_vertex(30, 1, 1); // v2
-        dag.add_vertex(40, 1, 1); // v3
+        dag.add_vertex(10, 1, 1);    // v0
+        dag.add_vertex(20, 1, 1);    // v1
+        dag.add_vertex(30, 1, 1);    // v2
+        dag.add_vertex(40, 1, 1);    // v3
         dag.add_edge(0, 1);
         dag.add_edge(2, 3);
 
@@ -214,13 +211,16 @@ BOOST_AUTO_TEST_CASE(BasicSchedulingTest) {
     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0);
     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1);
     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(3), 1);
-    
+
     BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 2);
 }
 
 BOOST_AUTO_TEST_CASE(MultipleSectionsTest) {
     // Setup the mock divider to return two separate sections
-    mock_divider.sections_to_return = { {{0},{1}}, {{2}, {3}} };
+    mock_divider.sections_to_return = {
+        {{0}, {1}},
+        {{2}, {3}}
+    };
 
     osp::WavefrontComponentScheduler<graph_t, graph_t> scheduler(mock_divider, mock_sub_scheduler);
     osp::BspInstance<graph_t> instance(dag, arch);
@@ -228,13 +228,12 @@ BOOST_AUTO_TEST_CASE(MultipleSectionsTest) {
 
     auto status = scheduler.computeSchedule(schedule);
     BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::OSP_SUCCESS);
- 
+
     BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0);
     BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 3);
     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0);
     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0);
 
-
     BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 0);
     BOOST_CHECK_EQUAL(schedule.assignedProcessor(3), 4);
     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1);
@@ -249,7 +248,10 @@ BOOST_AUTO_TEST_CASE(StarvationReturnsErrorTest) {
     scarce_arch.setNumberOfProcessors(1);
 
     // Setup the mock divider to return one section with two components
-    mock_divider.sections_to_return = {{{0}, {1}}, {{2, 3}}};
+    mock_divider.sections_to_return = {
+        {{0}, {1}},
+        {{2, 3}}
+    };
 
     osp::WavefrontComponentScheduler<graph_t, graph_t> scheduler(mock_divider, mock_sub_scheduler);
     osp::BspInstance<graph_t> instance(dag, scarce_arch);
@@ -262,7 +264,6 @@ BOOST_AUTO_TEST_CASE(StarvationReturnsErrorTest) {
 
 BOOST_AUTO_TEST_SUITE_END()
 
-
 // struct TestFixture_2 {
 //     graph_t dag;
 //     osp::BspArchitecture<graph_t> arch;
@@ -291,12 +292,12 @@ BOOST_AUTO_TEST_SUITE_END()
 
 //     auto status = scheduler.computeSchedule(schedule);
 //     BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::OSP_SUCCESS);
-    
+
 //     // Member 1 of iso group {0,1} gets 1 proc (global proc 0)
 //     BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0);
 //     BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 0);
 //     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0);
-    
+
 //     // Member 2 of iso group {2,3} gets 1 proc (global proc 1)
 //     BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 1);
 //     BOOST_CHECK_EQUAL(schedule.assignedProcessor(3), 1);
@@ -310,7 +311,6 @@ BOOST_AUTO_TEST_SUITE_END()
 //     BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 1);
 // }
 
-
 // BOOST_AUTO_TEST_CASE(IndivisibleScarcitySchedulingTest) {
 //     // 2 isomorphic components, 1 unique. 3 processors available.
 //     arch.setNumberOfProcessors(3);
@@ -329,7 +329,7 @@ BOOST_AUTO_TEST_SUITE_END()
 //     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1); // Sequential
 
 //     // Unique group scheduled on its 2 processors (global procs 1, 2)
-//     BOOST_CHECK_EQUAL(schedule.assignedProcessor(4), 1); 
+//     BOOST_CHECK_EQUAL(schedule.assignedProcessor(4), 1);
 //     BOOST_CHECK_EQUAL(schedule.assignedSuperstep(4), 0);
 
 //     BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 2);
@@ -338,7 +338,7 @@ BOOST_AUTO_TEST_SUITE_END()
 // BOOST_AUTO_TEST_CASE(StarvationReturnsErrorTest) {
 //     // IsomorphismGroups will find 2 groups: {{0,1}, {2,3}} and {{4,5}}.
 //     // With only 1 processor, this is a starvation scenario.
-//     arch.setNumberOfProcessors(1); 
+//     arch.setNumberOfProcessors(1);
 //     mock_divider.sections_to_return = {{{0, 1}, {2, 3}, {4, 5}}};
 
 //     osp::IsomorphicWavefrontComponentScheduler<graph_t, graph_t> scheduler(mock_divider, mock_sub_scheduler);
@@ -350,5 +350,4 @@ BOOST_AUTO_TEST_SUITE_END()
 //     BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::ERROR);
 // }
 
-
 // BOOST_AUTO_TEST_SUITE_END()