diff --git a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
index 86a9f1ea..f023f937 100644
--- a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
+++ b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
@@ -28,15 +28,16 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include <boost/property_tree/json_parser.hpp>
-#include <boost/property_tree/ptree.hpp>
 #include "ConfigParser.hpp"
 #include "StatsModules/IStatsModule.hpp"
-#include "osp/bsp/model/BspInstance.hpp"
 #include "osp/auxiliary/io/arch_file_reader.hpp"
 #include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/return_status.hpp"
+#include "osp/bsp/model/BspInstance.hpp"
+#include <boost/property_tree/json_parser.hpp>
+#include <boost/property_tree/ptree.hpp>
 
-//#define EIGEN_FOUND 1
+// #define EIGEN_FOUND 1
 
 #ifdef EIGEN_FOUND
 #include <Eigen/Sparse>
@@ -83,7 +84,7 @@ class AbstractTestSuiteRunner {
 
             if (write_target_object_to_file) {
                 output_target_object_dir_path = parser.global_params.get_child("scheduleDirectory")
-                                                    .get_value<std::string>(); 
+                                                    .get_value<std::string>();
                 if (output_target_object_dir_path.substr(0, 1) != "/")
                     output_target_object_dir_path = executable_dir + output_target_object_dir_path;
                 if (!output_target_object_dir_path.empty() && !std::filesystem::exists(output_target_object_dir_path)) {
@@ -167,13 +168,13 @@ class AbstractTestSuiteRunner {
         }
     }
 
-    virtual RETURN_STATUS compute_target_object_impl(const BspInstance<GraphType> &instance, std::unique_ptr<TargetObjectType>& target_object,
-                                                        const pt::ptree &algo_config,  
-                                                        long long &computation_time_ms) = 0;
+    virtual RETURN_STATUS compute_target_object_impl(const BspInstance<GraphType> &instance, std::unique_ptr<TargetObjectType> &target_object,
+                                                     const pt::ptree &algo_config,
+                                                     long long &computation_time_ms) = 0;
 
     virtual void create_and_register_statistic_modules(const std::string &module_name) = 0;
 
-    virtual void write_target_object_hook(const TargetObjectType&, const std::string &, const std::string &,
+    virtual void write_target_object_hook(const TargetObjectType &, const std::string &, const std::string &,
                                           const std::string &) {
     } // default in case TargetObjectType cannot be written to file
 
@@ -250,7 +251,7 @@ class AbstractTestSuiteRunner {
                 log_stream << "Start Graph: " + filename_graph + "\n";
 
                 BspInstance<GraphType> bsp_instance;
-                bsp_instance.setArchitecture(arch);
+                bsp_instance.getArchitecture() = arch;
                 bool graph_status = false;
                 std::string ext;
                 if (filename_graph.rfind('.') != std::string::npos)
@@ -268,12 +269,12 @@ class AbstractTestSuiteRunner {
                 SM_csc_int64 L_csc_int64{};
 
                 if constexpr (std::is_same_v<GraphType, sparse_matrix_graph_int32_t> || std::is_same_v<GraphType, sparse_matrix_graph_int64_t>) {
-                    if (ext != "mtx"){
+                    if (ext != "mtx") {
                         log_stream << "Error: Only .mtx file is accepted for SpTRSV" << std::endl;
                         return 0;
                     }
-                    
-                    if constexpr (std::is_same_v<GraphType, sparse_matrix_graph_int32_t>){
+
+                    if constexpr (std::is_same_v<GraphType, sparse_matrix_graph_int32_t>) {
                         graph_status = Eigen::loadMarket(L_csr_int32, filename_graph);
                         if (!graph_status) {
                             std::cerr << "Failed to read matrix from " << filename_graph << std::endl;
@@ -297,7 +298,7 @@ class AbstractTestSuiteRunner {
                     }
                 } else {
 #endif
-                graph_status = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag());
+                    graph_status = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag());
 
 #ifdef EIGEN_FOUND
                 }
@@ -309,22 +310,20 @@ class AbstractTestSuiteRunner {
 
                 for (auto &algorithm_config_pair : parser.scheduler) {
                     const pt::ptree &algo_config = algorithm_config_pair.second;
-                    
-        
 
                     std::string current_algo_name = algo_config.get_child("name").get_value<std::string>();
                     log_stream << "Start Algorithm " + current_algo_name + "\n";
 
                     long long computation_time_ms;
-                    std::unique_ptr<TargetObjectType> target_object; 
-                    
+                    std::unique_ptr<TargetObjectType> target_object;
+
                     RETURN_STATUS exec_status = compute_target_object_impl(bsp_instance, target_object, algo_config, computation_time_ms);
 
                     if (exec_status != RETURN_STATUS::OSP_SUCCESS && exec_status != RETURN_STATUS::BEST_FOUND) {
                         if (exec_status == RETURN_STATUS::ERROR)
                             log_stream << "Error computing with " << current_algo_name << "." << std::endl;
                         else if (exec_status == RETURN_STATUS::TIMEOUT)
-                            log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl;                           
+                            log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl;
                         continue;
                     }
 
diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
index 97e7e473..08209efd 100644
--- a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
+++ b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
@@ -57,8 +57,8 @@ limitations under the License.
 namespace osp {
 
 const std::set<std::string> get_available_bsp_scheduler_names() {
-    return {"Serial",         "GreedyBsp", "GrowLocal", "BspLocking",  "Cilk",    "Etf",     "GreedyRandom",
-            "GreedyChildren", "Variance",  "MultiHC",   "LocalSearch", "Coarser", "FullILP", "MultiLevel"};
+    return {"Serial", "GreedyBsp", "GrowLocal", "BspLocking", "Cilk", "Etf", "GreedyRandom",
+            "GreedyChildren", "Variance", "MultiHC", "LocalSearch", "Coarser", "FullILP", "MultiLevel"};
 }
 
 template<typename Graph_t>
@@ -247,7 +247,7 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert
         if (!status)
             return RETURN_STATUS::ERROR;
 
-        instance_coarse.setArchitecture(instance.getArchitecture());
+        instance_coarse.getArchitecture() = instance.getArchitecture();
         instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
         BspSchedule<boost_graph_t> schedule_coarse(instance_coarse);
 
diff --git a/include/osp/auxiliary/io/arch_file_reader.hpp b/include/osp/auxiliary/io/arch_file_reader.hpp
index 4e100ba8..71b0f006 100644
--- a/include/osp/auxiliary/io/arch_file_reader.hpp
+++ b/include/osp/auxiliary/io/arch_file_reader.hpp
@@ -18,10 +18,10 @@ limitations under the License.
 
 #pragma once
 
+#include "osp/bsp/model/BspArchitecture.hpp"
 #include <fstream>
 #include <iostream>
 #include <sstream>
-#include "osp/bsp/model/BspArchitecture.hpp"
 
 namespace osp { namespace file_reader {
 
@@ -31,7 +31,8 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture<Graph_t> &archit
 
     // Skip comment lines
     while (std::getline(infile, line)) {
-        if (!line.empty() && line[0] != '%') break;
+        if (!line.empty() && line[0] != '%')
+            break;
     }
 
     // Parse architecture parameters
@@ -58,24 +59,24 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture<Graph_t> &archit
     if (0 <= mem_type && mem_type <= 3) {
         using memw_t = v_memw_t<Graph_t>;
         switch (mem_type) {
-            case 0:
-                architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::NONE);
-                break;
-            case 1:
-                architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL);
-                architecture.setMemoryBound(static_cast<memw_t>(M));
-                break;
-            case 2:
-                architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::GLOBAL);
-                architecture.setMemoryBound(static_cast<memw_t>(M));
-                break;
-            case 3:
-                architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT);
-                architecture.setMemoryBound(static_cast<memw_t>(M));
-                break;
-            default:
-                std::cerr << "Invalid memory type.\n";
-                return false;
+        case 0:
+            architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::NONE);
+            break;
+        case 1:
+            architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL);
+            architecture.setMemoryBound(static_cast<memw_t>(M));
+            break;
+        case 2:
+            architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::GLOBAL);
+            architecture.setMemoryBound(static_cast<memw_t>(M));
+            break;
+        case 3:
+            architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT);
+            architecture.setMemoryBound(static_cast<memw_t>(M));
+            break;
+        default:
+            std::cerr << "Invalid memory type.\n";
+            return false;
         }
     } else if (mem_type == -1) {
         std::cout << "No memory type specified. Assuming \"NONE\".\n";
@@ -116,7 +117,7 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture<Graph_t> &archit
             return false;
         }
 
-        architecture.setSendCosts(fromProc, toProc, static_cast<v_commw_t<Graph_t>>(value));
+        architecture.SetSendCosts(fromProc, toProc, static_cast<v_commw_t<Graph_t>>(value));
     }
 
     // Ensure there are no remaining non-comment lines
@@ -127,7 +128,6 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture<Graph_t> &archit
         }
     }
 
-    architecture.computeCommAverage();
     return true;
 }
 
diff --git a/include/osp/auxiliary/return_status.hpp b/include/osp/auxiliary/return_status.hpp
new file mode 100644
index 00000000..e5f0b870
--- /dev/null
+++ b/include/osp/auxiliary/return_status.hpp
@@ -0,0 +1,56 @@
+/*
+Copyright 2024 Huawei Technologies Co., Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
+*/
+
+#pragma once
+
+#include <iostream>
+
+namespace osp {
+
+enum class RETURN_STATUS { OSP_SUCCESS,
+                           BEST_FOUND,
+                           TIMEOUT,
+                           ERROR };
+
+/**
+ * @brief Converts the enum to a string literal.
+ * Returns const char* to avoid std::string allocation overhead.
+ */
+inline const char *to_string(const RETURN_STATUS status) {
+    switch (status) {
+    case RETURN_STATUS::OSP_SUCCESS:
+        return "SUCCESS";
+    case RETURN_STATUS::BEST_FOUND:
+        return "BEST FOUND";
+    case RETURN_STATUS::TIMEOUT:
+        return "TIMEOUT";
+    case RETURN_STATUS::ERROR:
+        return "ERROR";
+    default:
+        return "UNKNOWN";
+    }
+}
+
+/**
+ * @brief Stream operator overload using the helper function.
+ */
+inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) {
+    return os << to_string(status);
+}
+
+} // namespace osp
\ No newline at end of file
diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp
index 8ac1c0a8..5575fad2 100644
--- a/include/osp/bsp/model/BspArchitecture.hpp
+++ b/include/osp/bsp/model/BspArchitecture.hpp
@@ -34,46 +34,51 @@ limitations under the License.
 
 namespace osp {
 
-static constexpr unsigned CacheLineSize = 64;
-
+/**
+ * @enum MEMORY_CONSTRAINT_TYPE
+ * @brief Enumerates the different types of memory constraints.
+ * Memory bounds are set per processor and apply to aggregated memory weights of nodes according to the different types of memory constraints.
+ */
 enum class MEMORY_CONSTRAINT_TYPE {
-    NONE,
-    LOCAL,
-    GLOBAL,
-    PERSISTENT_AND_TRANSIENT,
-    LOCAL_IN_OUT,
-    LOCAL_INC_EDGES,
-    LOCAL_SOURCES_INC_EDGES
+    NONE,                     /** No memory constraints. */
+    LOCAL,                    /** The memory bounds apply to the sum of memory weights of nodes assigned to the same processor and superstep. */
+    GLOBAL,                   /** The memory bounds apply to the sum of memory weights of the nodes assigned to the same processor. */
+    PERSISTENT_AND_TRANSIENT, /** Memory bounds apply to the sum of memory weights of nodes assigned to the same processor plus the maximum communication weight of a node assigned to a processor. */
+    LOCAL_IN_OUT,             /** Memory constraints are local in-out. Experimental. */
+    LOCAL_INC_EDGES,          /** Memory constraints are local incident edges. Experimental. */
+    LOCAL_SOURCES_INC_EDGES   /** Memory constraints are local source incident edges. Experimental. */
 };
 
-inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) {
+/**
+ * @brief Converts the enum to a string literal.
+ * Returns const char* to avoid std::string allocation overhead.
+ */
+inline const char *to_string(MEMORY_CONSTRAINT_TYPE type) {
     switch (type) {
     case MEMORY_CONSTRAINT_TYPE::NONE:
-        os << "NONE";
-        break;
+        return "NONE";
     case MEMORY_CONSTRAINT_TYPE::LOCAL:
-        os << "LOCAL";
-        break;
+        return "LOCAL";
     case MEMORY_CONSTRAINT_TYPE::GLOBAL:
-        os << "GLOBAL";
-        break;
+        return "GLOBAL";
     case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT:
-        os << "PERSISTENT_AND_TRANSIENT";
-        break;
+        return "PERSISTENT_AND_TRANSIENT";
     case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT:
-        os << "LOCAL_IN_OUT";
-        break;
+        return "LOCAL_IN_OUT";
     case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES:
-        os << "LOCAL_INC_EDGES";
-        break;
+        return "LOCAL_INC_EDGES";
     case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES:
-        os << "LOCAL_SOURCES_INC_EDGES";
-        break;
+        return "LOCAL_SOURCES_INC_EDGES";
     default:
-        os << "UNKNOWN";
-        break;
+        return "UNKNOWN";
     }
-    return os;
+}
+
+/**
+ * @brief Stream operator overload using the helper function.
+ */
+inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) {
+    return os << to_string(type);
 }
 
 /**
@@ -81,8 +86,29 @@ inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) {
  * @brief Represents the architecture of a BSP (Bulk Synchronous Parallel) system.
  *
  * The BspArchitecture class stores information about the number of processors, communication costs,
- * synchronization costs, and send costs between processors in a BSP system. It provides methods to
- * set and retrieve these values.
+ * synchronization costs, the send costs between processors, the types of processors, and the memory
+ * bounds. It provides methods to set and retrieve these values.
+ *
+ * **Processors:**
+ * The architecture consists of p processors, indexed from 0 to p-1. Note that processor indices are represented using `unsigned`.
+ *
+ * **Processor Types:**
+ * Processors can have different types, which are represented by non-negative integers.
+ * Processor types are assumed to be consecutive integers starting from 0. Note that processor types are represented using `unsigned`.
+ * Processor types are used to express compatabilities, which can be specified in the BspInstance, regarding node types.
+ *
+ * **Communication and Synchronization Costs:**
+ * - Communication Cost (g): The cost of communicating a unit of data between processors, i.e., the bandwidth.
+ * - Synchronization Cost (L): The cost of synchronizing all processors at the end of a superstep.
+ *
+ * **Send Costs (NUMA):**
+ * The architecture supports Non-Uniform Memory Access (NUMA) effects via a send cost matrix.
+ * The cost to send data from processor i to processor j is given by g * sendCosts[i][j].
+ * By default, send costs are uniform (1 for distinct processors, 0 for self).
+ *
+ * **Memory Constraints:**
+ * Each processor has a memory bound. The `MEMORY_CONSTRAINT_TYPE` determines how these bounds are applied
+ * (e.g., local per superstep, global per processor).
  */
 template<typename Graph_t>
 class BspArchitecture {
@@ -90,84 +116,135 @@ class BspArchitecture {
     static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
 
   private:
-    unsigned number_processors;
-    unsigned number_of_processor_types;
+    /** @brief The number of processors in the architecture. Must be at least 1. */
+    unsigned numberOfProcessors_;
+
+    /** @brief The number of processor types in the architecture. See processorTypes_ for more details. */
+    unsigned numberOfProcessorTypes_;
+
+    /** @brief The communication costs, typically denoted 'g' for the BSP model. */
+    v_commw_t<Graph_t> communicationCosts_;
 
-    v_commw_t<Graph_t> communication_costs;
-    v_commw_t<Graph_t> synchronisation_costs;
+    /** @brief The synchronisation costs, typically denoted 'L' for the BSP model. */
+    v_commw_t<Graph_t> synchronisationCosts_;
 
-    std::vector<v_memw_t<Graph_t>> memory_bound;
+    /** @brief The architecture allows to specify memory bounds per processor. */
+    std::vector<v_memw_t<Graph_t>> memoryBound_;
 
-    bool isNuma;
+    /** @brief Flag to indicate whether the architecture is NUMA , i.e., whether the send costs are different for different pairs of processors. */
+    bool isNuma_;
 
-    std::vector<unsigned> processor_type;
+    /** @brief The architecture allows to specify processor types. Processor types are used to express compatabilities, which can be specified in the BspInstance, regarding node types. */
+    std::vector<unsigned> processorTypes_;
 
-    std::vector<std::vector<v_commw_t<Graph_t>>> send_costs;
+    /** @brief A flattened p x p matrix of send costs. Access via index [i * numberOfProcessors_ + j]. */
+    std::vector<v_commw_t<Graph_t>> sendCosts_;
 
-    MEMORY_CONSTRAINT_TYPE memory_const_type = MEMORY_CONSTRAINT_TYPE::NONE;
+    /** @brief The memory constraint type. */
+    MEMORY_CONSTRAINT_TYPE memoryConstraintType_ = MEMORY_CONSTRAINT_TYPE::NONE;
+
+    /** @brief Helper function to calculate the index of a flattened p x p matrix. */
+    std::size_t FlatIndex(const unsigned row, const unsigned col) const {
+        return static_cast<std::size_t>(row) * numberOfProcessors_ + col;
+    }
 
-    bool are_send_cost_numa() {
-        if (number_processors == 1)
+    bool AreSendCostsNuma() {
+        if (numberOfProcessors_ == 1U)
             return false;
 
-        v_commw_t<Graph_t> val = send_costs[0][1];
-        for (unsigned p1 = 0; p1 < number_processors; p1++) {
-            for (unsigned p2 = 0; p2 < number_processors; p2++) {
+        const v_commw_t<Graph_t> val = sendCosts_[1U];
+        for (unsigned p1 = 0U; p1 < numberOfProcessors_; p1++) {
+            for (unsigned p2 = 0U; p2 < numberOfProcessors_; p2++) {
                 if (p1 == p2)
                     continue;
-                if (send_costs[p1][p2] != val)
+                if (sendCosts_[FlatIndex(p1, p2)] != val)
                     return true;
             }
         }
         return false;
     }
 
-  public:
-    BspArchitecture()
-        : number_processors(2), number_of_processor_types(1), communication_costs(1), synchronisation_costs(2),
-          memory_bound(std::vector<v_memw_t<Graph_t>>(number_processors, 100)), isNuma(false),
-          processor_type(std::vector<unsigned>(number_processors, 0)),
-          send_costs(std::vector<std::vector<v_commw_t<Graph_t>>>(
-              number_processors, std::vector<v_commw_t<Graph_t>>(number_processors, 1))) {
-        for (unsigned i = 0; i < number_processors; i++) {
-            send_costs[i][i] = 0;
+    void UpdateNumberOfProcessorTypes() {
+        numberOfProcessorTypes_ = 0U;
+        for (unsigned p = 0U; p < numberOfProcessors_; p++) {
+            if (processorTypes_[p] >= numberOfProcessorTypes_) {
+                numberOfProcessorTypes_ = processorTypes_[p] + 1U;
+            }
         }
     }
 
-    BspArchitecture(const BspArchitecture &other) = default;
-    BspArchitecture(BspArchitecture &&other) = default;
-    BspArchitecture &operator=(const BspArchitecture &other) = default;
-    BspArchitecture &operator=(BspArchitecture &&other) = default;
-    ~BspArchitecture() = default;
+    void SetSendCostDiagonalToZero() {
+        for (unsigned i = 0U; i < numberOfProcessors_; i++) {
+            sendCosts_[FlatIndex(i, i)] = 0U;
+        }
+    }
+
+    void InitializeUniformSendCosts() {
+        sendCosts_.assign(numberOfProcessors_ * numberOfProcessors_, 1U);
+        SetSendCostDiagonalToZero();
+        isNuma_ = false;
+    }
 
+  public:
     /**
      * @brief Constructs a BspArchitecture object with the specified number of processors, communication cost, and
      * synchronization cost.
      *
-     * @param processors The number of processors in the architecture.
-     * @param comm_cost The communication cost between processors.
-     * @param synch_cost The synchronization cost between processors.
-     */
-    BspArchitecture(unsigned processors, v_commw_t<Graph_t> comm_cost, v_commw_t<Graph_t> synch_cost,
-                    v_memw_t<Graph_t> memory_bound_ = 100)
-        : number_processors(processors), number_of_processor_types(1), communication_costs(comm_cost),
-          synchronisation_costs(synch_cost),
-          memory_bound(std::vector<v_memw_t<Graph_t>>(number_processors, memory_bound_)), isNuma(false),
-          processor_type(std::vector<unsigned>(number_processors, 0)),
-          send_costs(std::vector<std::vector<v_commw_t<Graph_t>>>(
-              number_processors, std::vector<v_commw_t<Graph_t>>(number_processors, 1))) {
-
-        for (unsigned i = 0; i < number_processors; i++) {
-            send_costs[i][i] = 0;
+     * @param NumberOfProcessors The number of processors in the architecture. Must be greater than 0. Default: 2.
+     * @param CommunicationCost The communication cost between processors. Default: 1.
+     * @param SynchronisationCost The synchronization cost between processors. Default: 2.
+     * @param MemoryBound The memory bound for each processor (default: 100).
+     * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero. Default: empty (uniform costs).
+     */
+    BspArchitecture(const unsigned NumberOfProcessors = 2U, const v_commw_t<Graph_t> CommunicationCost = 1U, const v_commw_t<Graph_t> SynchronisationCost = 2U,
+                    const v_memw_t<Graph_t> MemoryBound = 100U, const std::vector<std::vector<v_commw_t<Graph_t>>> &SendCosts = {})
+        : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost),
+          synchronisationCosts_(SynchronisationCost),
+          memoryBound_(NumberOfProcessors, MemoryBound), isNuma_(false),
+          processorTypes_(NumberOfProcessors, 0U) {
+        if (NumberOfProcessors == 0U) {
+            throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0.");
+        }
+
+        if (SendCosts.empty()) {
+            InitializeUniformSendCosts();
+        } else {
+            if (NumberOfProcessors != SendCosts.size()) {
+                throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
+            }
+            if (std::any_of(SendCosts.begin(), SendCosts.end(),
+                            [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) {
+                throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
+            }
+
+            sendCosts_.reserve(NumberOfProcessors * NumberOfProcessors);
+            for (const auto &row : SendCosts) {
+                sendCosts_.insert(sendCosts_.end(), row.begin(), row.end());
+            }
+
+            SetSendCostDiagonalToZero();
+            isNuma_ = AreSendCostsNuma();
         }
     }
 
+    BspArchitecture(const BspArchitecture &other) = default;
+    BspArchitecture(BspArchitecture &&other) noexcept = default;
+    BspArchitecture &operator=(const BspArchitecture &other) = default;
+    BspArchitecture &operator=(BspArchitecture &&other) noexcept = default;
+    virtual ~BspArchitecture() = default;
+
+    /**
+     * @brief Copy constructor from a BspArchitecture with a different graph type.
+     *
+     * @tparam Graph_t_other The graph type of the other BspArchitecture.
+     * @param other The other BspArchitecture object.
+     */
     template<typename Graph_t_other>
     BspArchitecture(const BspArchitecture<Graph_t_other> &other)
-        : number_processors(other.numberOfProcessors()), number_of_processor_types(other.getNumberOfProcessorTypes()),
-          communication_costs(other.communicationCosts()), synchronisation_costs(other.synchronisationCosts()),
-          memory_bound(other.memoryBound()), isNuma(other.isNumaArchitecture()), processor_type(other.processorTypes()),
-          send_costs(other.sendCosts()) {
+        : numberOfProcessors_(other.numberOfProcessors()), numberOfProcessorTypes_(other.getNumberOfProcessorTypes()),
+          communicationCosts_(other.communicationCosts()), synchronisationCosts_(other.synchronisationCosts()),
+          memoryBound_(other.memoryBound()), isNuma_(other.isNumaArchitecture()), processorTypes_(other.processorTypes()),
+          sendCosts_(other.sendCostsVector()) {
 
         static_assert(std::is_same_v<v_memw_t<Graph_t>, v_memw_t<Graph_t_other>>,
                       "BspArchitecture: Graph_t and Graph_t_other have the same memory weight type.");
@@ -180,81 +257,32 @@ class BspArchitecture {
     }
 
     /**
-     * @brief Constructs a BspArchitecture object with the specified number of processors, communication cost, and
-     * synchronization cost.
-     *
-     * @param processors The number of processors in the architecture.
-     * @param comm_cost The communication cost between processors.
-     * @param synch_cost The synchronization cost between processors.
-     */
-    BspArchitecture(unsigned int processors, v_commw_t<Graph_t> comm_cost, v_commw_t<Graph_t> synch_cost,
-                    std::vector<std::vector<v_commw_t<Graph_t>>> send_costs_)
-        : number_processors(processors), number_of_processor_types(1), communication_costs(comm_cost),
-          synchronisation_costs(synch_cost), memory_bound(std::vector<v_memw_t<Graph_t>>(number_processors, 100)),
-          processor_type(std::vector<unsigned>(number_processors, 0)), send_costs(send_costs_) {
-
-        if (number_processors != send_costs.size()) {
-            throw std::invalid_argument("send_costs_ needs to be a processors x processors matrix.\n");
-        }
-        if (std::any_of(send_costs.begin(), send_costs.end(),
-                        [processors](const auto &thing) { return thing.size() != processors; })) {
-            throw std::invalid_argument("send_costs_ needs to be a processors x processors matrix.\n");
-        }
-
-        for (unsigned i = 0; i < number_processors; i++) {
-            send_costs[i][i] = 0;
-        }
-
-        isNuma = are_send_cost_numa();
-    }
-
-    /**
-     * @brief Constructs a BspArchitecture object with the specified number of processors, communication cost, and
-     * synchronization cost.
+     * @brief Constructs a BspArchitecture object with custom send costs.
      *
-     * @param processors The number of processors in the architecture.
-     * @param comm_cost The communication cost between processors.
-     * @param synch_cost The synchronization cost between processors.
-     */
-    BspArchitecture(unsigned int processors, v_commw_t<Graph_t> comm_cost, v_commw_t<Graph_t> synch_cost,
-                    v_memw_t<Graph_t> memory_bound_, std::vector<std::vector<v_commw_t<Graph_t>>> send_costs_)
-        : number_processors(processors), number_of_processor_types(1), communication_costs(comm_cost),
-          synchronisation_costs(synch_cost),
-          memory_bound(std::vector<v_memw_t<Graph_t>>(number_processors, memory_bound_)),
-          processor_type(std::vector<unsigned>(number_processors, 0)), send_costs(send_costs_) {
-
-        if (number_processors != send_costs.size()) {
-            throw std::invalid_argument("send_costs_ needs to be a processors x processors matrix.\n");
-        }
-        if (std::any_of(send_costs.begin(), send_costs.end(),
-                        [processors](const auto &thing) { return thing.size() != processors; })) {
-            throw std::invalid_argument("send_costs_ needs to be a processors x processors matrix.\n");
-        }
-
-        for (unsigned i = 0u; i < number_processors; i++) {
-            send_costs[i][i] = 0u;
-        }
-
-        isNuma = are_send_cost_numa();
-    }
+     * @param NumberOfProcessors The number of processors. Must be greater than 0.
+     * @param CommunicationCost The communication cost.
+     * @param SynchronisationCost The synchronization cost.
+     * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero.
+     */
+    BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t<Graph_t> CommunicationCost, const v_commw_t<Graph_t> SynchronisationCost,
+                    const std::vector<std::vector<v_commw_t<Graph_t>>> &SendCosts)
+        : BspArchitecture(NumberOfProcessors, CommunicationCost, SynchronisationCost, 100U, SendCosts) {}
 
     /**
-     * Sets the uniform send cost for each pair of processors in the BSP architecture.
+     * @brief Sets the uniform send cost for each pair of processors.
      * The send cost is set to 0 if the processors are the same, and 1 otherwise.
-     * This function assumes that the number of processors has already been set.
      */
     void SetUniformSendCost() {
-
-        for (unsigned i = 0; i < number_processors; i++) {
-            for (unsigned j = 0; j < number_processors; j++) {
+        for (unsigned i = 0U; i < numberOfProcessors_; i++) {
+            for (unsigned j = 0U; j < numberOfProcessors_; j++) {
                 if (i == j) {
-                    send_costs[i][j] = 0;
+                    sendCosts_[FlatIndex(i, j)] = 0U;
                 } else {
-                    send_costs[i][j] = 1;
+                    sendCosts_[FlatIndex(i, j)] = 1U;
                 }
             }
         }
-        isNuma = false;
+        isNuma_ = false;
     }
 
     /**
@@ -265,80 +293,59 @@ class BspArchitecture {
      *
      * @param base The base value used to calculate the send cost.
      */
-    void SetExpSendCost(v_commw_t<Graph_t> base) {
-
-        isNuma = true;
+    void SetExpSendCost(const v_commw_t<Graph_t> base) {
+        isNuma_ = true;
 
         unsigned maxPos = 1;
         constexpr unsigned two = 2;
-        for (; intpow(two, maxPos + 1) <= number_processors - 1; ++maxPos) {
+        for (; intpow(two, maxPos + 1) <= numberOfProcessors_ - 1; ++maxPos) {
         }
-        for (unsigned i = 0; i < number_processors; ++i)
-            for (unsigned j = i + 1; j < number_processors; ++j)
-                for (unsigned pos = maxPos; pos <= maxPos; --pos)
-                    if (((1 << pos) & i) != ((1 << pos) & j)) {
-                        send_costs[i][j] = send_costs[j][i] = intpow(base, pos);
+
+        for (unsigned i = 0U; i < numberOfProcessors_; ++i) {
+            for (unsigned j = i + 1U; j < numberOfProcessors_; ++j) {
+                // Corrected loop to avoid underflow issues with unsigned
+                for (int pos = static_cast<int>(maxPos); pos >= 0; --pos) {
+                    if (((1U << pos) & i) != ((1U << pos) & j)) {
+                        sendCosts_[FlatIndex(i, j)] = sendCosts_[FlatIndex(j, i)] = intpow(base, static_cast<unsigned>(pos));
                         break;
                     }
+                }
+            }
+        }
     }
 
-    inline auto processors() const { return integral_range<unsigned>(number_processors); }    
-
     /**
-     * @brief Computes the average communication cost of the BspArchitecture.
-     *
-     * This function computes the average communication cost of the BspArchitecture object.
-     * The average communication cost is calculated as the sum of the send costs between processors divided by the
-     * number of processors.
-     *
-     * @return The average communication cost as an unsigned integer.
+     * @brief Returns a view of processor indices from 0 to numberOfProcessors_ - 1.
+     * @return An integral view of processor indices.
      */
-    v_commw_t<Graph_t> computeCommAverage() const {
-
-        double avg = 0;
-        for (unsigned i = 0; i < number_processors; ++i)
-            for (unsigned j = 0; j < number_processors; ++j)
-                avg += static_cast<double>(send_costs[i][j]);
-        avg = avg * static_cast<double>(communication_costs) / static_cast<double>(number_processors) / static_cast<double>(number_processors);
-
-        if (avg > static_cast<double>(std::numeric_limits<unsigned>::max())) {
-            throw std::invalid_argument("avg comm exceeds the limit (something is very wrong)");
-        }
-
-        return static_cast<v_commw_t<Graph_t>>(std::round(avg));
-    }
+    [[nodiscard]] auto processors() const { return integral_range<unsigned>(numberOfProcessors_); }
 
     /**
-     * Sets the send costs for the BspArchitecture.
+     * @brief Sets the send costs for the BspArchitecture.
      *
      * @param vec A 2D vector representing the send costs between processors.
-     *            The size of the vector must be equal to the number of processors.
-     *            Each inner vector must also have a size equal to the number of processors.
-     * @throws std::invalid_argument if the size of the vector or inner vectors is invalid.
+     * @throws std::invalid_argument if the size of the vector is invalid or diagonal elements are not 0.
      */
-    void setSendCosts(const std::vector<std::vector<v_commw_t<Graph_t>>> &vec) {
-
-        if (vec.size() != number_processors) {
-            throw std::invalid_argument("Invalid Argument");
+    void SetSendCosts(const std::vector<std::vector<v_commw_t<Graph_t>>> &vec) {
+        if (vec.size() != numberOfProcessors_) {
+            throw std::invalid_argument("Invalid Argument: Vector size mismatch.");
         }
 
-        isNuma = false;
-        for (unsigned i = 0; i < number_processors; i++) {
-
-            if (vec[i].size() != number_processors) {
-                throw std::invalid_argument("Invalid Argument");
+        isNuma_ = false;
+        for (unsigned i = 0U; i < numberOfProcessors_; i++) {
+            if (vec.at(i).size() != numberOfProcessors_) {
+                throw std::invalid_argument("Invalid Argument: Inner vector size mismatch.");
             }
 
-            for (unsigned j = 0; j < number_processors; j++) {
-
+            for (unsigned j = 0U; j < numberOfProcessors_; j++) {
                 if (i == j) {
-                    if (vec[i][j] != 0)
-                        throw std::invalid_argument("Invalid Argument, Diagonal elements should be 0");
+                    if (vec.at(i).at(j) != 0U)
+                        throw std::invalid_argument("Invalid Argument: Diagonal elements should be 0.");
                 } else {
-                    send_costs[i][j] = vec[i][j];
+                    sendCosts_.at(FlatIndex(i, j)) = vec.at(i).at(j);
 
-                    if (number_processors > 1 && vec[i][j] != vec[0][1]) {
-                        isNuma = true;
+                    if (numberOfProcessors_ > 1U && vec.at(i).at(j) != vec.at(0U).at(1U)) {
+                        isNuma_ = true;
                     }
                 }
             }
@@ -346,324 +353,310 @@ class BspArchitecture {
     }
 
     /**
-     * Sets the send costs between two processors.
+     * @brief Sets the send costs between two processors.
      *
-     * @param p1 The index of the first processor.
-     * @param p2 The index of the second processor.
+     * @param p1 The index of the first processor. Must be less than numberOfProcessors_.
+     * @param p2 The index of the second processor. Must be less than numberOfProcessors_.
      * @param cost The cost of sending data between the processors.
-     *
-     * @remarks If the two processors are the same, the send cost is not set.
-     *          If the cost is not equal to 1, the architecture is considered NUMA.
+     * @throws std::invalid_argument if the processor indices are out of bounds.
      */
-    void setSendCosts(unsigned p1, unsigned p2, v_commw_t<Graph_t> cost) {
-
-        if (p1 >= number_processors || p2 > number_processors)
-            throw std::invalid_argument("Invalid Argument");
+    void SetSendCosts(const unsigned p1, const unsigned p2, const v_commw_t<Graph_t> cost) {
+        if (p1 >= numberOfProcessors_ || p2 >= numberOfProcessors_) // Fixed condition: p2 >= number_processors
+            throw std::invalid_argument("Invalid Argument: Processor index out of bounds.");
 
         if (p1 != p2) {
-            send_costs[p1][p2] = cost;
-
-            isNuma = are_send_cost_numa();
+            sendCosts_.at(FlatIndex(p1, p2)) = cost;
+            isNuma_ = AreSendCostsNuma();
         }
     }
 
     /**
-     * Sets the memory bound for all processors of the BspArchitecture.
-     *
-     * @param memory_bound_ The new memory bound for all processors.
+     * @brief Sets the memory bound for all processors.
+     * @param MemoryBound The new memory bound for all processors.
      */
-    inline void setMemoryBound(v_memw_t<Graph_t> memory_bound_) {
-        memory_bound = std::vector<v_memw_t<Graph_t>>(number_processors, memory_bound_);
+    void setMemoryBound(const v_memw_t<Graph_t> MemoryBound) {
+        memoryBound_.assign(numberOfProcessors_, MemoryBound);
     }
 
-    inline void setMemoryBound(const std::vector<v_memw_t<Graph_t>> &memory_bound_) { memory_bound = memory_bound_; }
-
-    inline void setMemoryBound(v_memw_t<Graph_t> memory_bound_, unsigned proc) {
-
-        if (proc >= number_processors) {
-            throw std::invalid_argument("Invalid Argument setMemoryBound");
+    /**
+     * @brief Sets the memory bound for all processors using a vector.
+     * @param MemoryBound The vector of memory bounds.
+     * @throws std::invalid_argument if the size of the vector is invalid.
+     */
+    void setMemoryBound(const std::vector<v_memw_t<Graph_t>> &MemoryBound) {
+        if (MemoryBound.size() != numberOfProcessors_) {
+            throw std::invalid_argument("Invalid Argument: Memory bound vector size does not match number of processors.");
         }
+        memoryBound_ = MemoryBound;
+    }
 
-        memory_bound[proc] = memory_bound_;
+    /**
+     * @brief Sets the memory bound for a specific processor.
+     * @param MemoryBound The new memory bound for the processor.
+     * @param processorIndex The processor index. Must be less than numberOfProcessors_.
+     */
+    void setMemoryBound(const v_memw_t<Graph_t> MemoryBound, const unsigned processorIndex) {
+        memoryBound_.at(processorIndex) = MemoryBound;
     }
 
     /**
-     * @brief Sets the synchronization costs for the BspArchitecture.
-     *
-     * This function sets the synchronization costs for the BspArchitecture object.
-     * The synchronization costs represent the costs of establishing communication between processors.
-     *
-     * @param synch_cost The synchronization costs to be set.
+     * @brief Sets the synchronization costs.
+     * @param SynchCost The new synchronization costs.
      */
-    inline void setSynchronisationCosts(v_commw_t<Graph_t> synch_cost) { synchronisation_costs = synch_cost; }
+    void setSynchronisationCosts(const v_commw_t<Graph_t> SynchCost) { synchronisationCosts_ = SynchCost; }
 
     /**
-     * @brief Sets the communication costs for the BspArchitecture.
-     *
-     * This function sets the communication costs for the BspArchitecture object.
-     * The communication costs represent the costs of sending messages between processors.
-     *
-     * @param comm_cost The communication costs to be set.
+     * @brief Sets the communication costs.
+     * @param CommCost The new communication costs.
      */
-    inline void setCommunicationCosts(v_commw_t<Graph_t> comm_cost) { communication_costs = comm_cost; }
+    void setCommunicationCosts(const v_commw_t<Graph_t> CommCost) { communicationCosts_ = CommCost; }
 
     /**
-     * @brief Sets the number of processors in the BSP architecture.
-     *
-     * This function sets the number of processors in the BSP architecture and sets the send costs between processors
-     * to 1. The send_costs matrix represents the costs of sending messages between processors. The diagonal elements of
-     * the matrix are set to 0, indicating that there is no cost to send a message from a processor to itself.
-     *
-     * @param num_proc The number of processors in the BSP architecture.
+     * @brief Checks if the architecture is NUMA.
+     * @return True if NUMA, false otherwise.
      */
-    void setNumberOfProcessors(unsigned num_proc) {
+    [[nodiscard]] bool isNumaArchitecture() const { return isNuma_; }
 
-        number_processors = num_proc;
-        number_of_processor_types = 1;
-        processor_type = std::vector<unsigned>(number_processors, 0);
-        send_costs = std::vector<std::vector<v_commw_t<Graph_t>>>(
-            number_processors, std::vector<v_commw_t<Graph_t>>(number_processors, 1));
-        for (unsigned i = 0; i < number_processors; i++) {
-            send_costs[i][i] = 0;
+    /**
+     * @brief Sets the number of processors. Processor type is set to 0 for all processors.
+     * Resets send costs to uniform (1) and diagonal to 0. The memory bound is set to 100 for all processors.
+     * @param numberOfProcessors The number of processors. Must be greater than 0.
+     * @throws std::invalid_argument if the number of processors is 0.
+     */
+    void setNumberOfProcessors(const unsigned numberOfProcessors) {
+        if (numberOfProcessors == 0) {
+            throw std::invalid_argument("Invalid Argument: Number of processors must be greater than 0.");
         }
-        memory_bound.resize(num_proc, memory_bound.back());
+        numberOfProcessors_ = numberOfProcessors;
+        numberOfProcessorTypes_ = 1U;
+        processorTypes_.assign(numberOfProcessors_, 0U);
+
+        InitializeUniformSendCosts();
 
-        isNuma = false;
+        // initialize memory bound to 100 for all processors
+        memoryBound_.assign(numberOfProcessors_, 100U);
     }
 
     /**
-     * @brief Sets the number of processors and their types in the BSP architecture.
-     *
-     * This function sets the number of processors in the BSP architecture and sets the send costs between processors
-     * to 1. The send_costs matrix represents the costs of sending messages between processors. The diagonal elements of
-     * the matrix are set to 0, indicating that there is no cost to send a message from a processor to itself.
-     *
-     * @param processor_types_ The type of the respective processors.
+     * @brief Sets the number of processors and their types. Number of processors is set to the size of the processor types vector.
+     * Resets send costs to uniform (1). Resets memory bound to 100 for all processors.
+     * @param processorTypes The types of the respective processors.
      */
-    void setProcessorsWithTypes(const std::vector<v_type_t<Graph_t>> &processor_types_) {
-
-        if (processor_types_.size() > std::numeric_limits<unsigned>::max()) {
-            throw std::invalid_argument("Invalid Argument, number of processors exceeds the limit");
+    void setProcessorsWithTypes(const std::vector<v_type_t<Graph_t>> &processorTypes) {
+        if (processorTypes.empty()) {
+            throw std::invalid_argument("Invalid Argument: Processor types vector is empty.");
         }
-
-        number_processors = static_cast<unsigned>(processor_types_.size());
-
-        number_of_processor_types = 0;
-        processor_type = processor_types_;
-        send_costs = std::vector<std::vector<v_commw_t<Graph_t>>>(
-            number_processors, std::vector<v_commw_t<Graph_t>>(number_processors, 1));
-        for (unsigned i = 0; i < number_processors; i++) {
-            send_costs[i][i] = 0;
+        if (processorTypes.size() > std::numeric_limits<unsigned>::max()) {
+            throw std::invalid_argument("Invalid Argument: Number of processors exceeds the limit.");
         }
-        memory_bound.resize(number_processors, memory_bound.back());
+        numberOfProcessors_ = static_cast<unsigned>(processorTypes.size());
+        processorTypes_ = processorTypes;
 
-        isNuma = false;
-        updateNumberOfProcessorTypes();
+        InitializeUniformSendCosts();
+
+        // initialize memory bound to 100 for all processors
+        memoryBound_.assign(numberOfProcessors_, 100U);
+        UpdateNumberOfProcessorTypes();
     }
 
     /**
-     * Returns whether the architecture is NUMA.
-     *
-     * @return True if the architecture is NUMA, false otherwise.
+     * @brief Sets processors based on counts of consecutive types.
+     * The architecture will have processorTypeCount[0] processors of type 0, processorTypeCount[1] processors of type 1, etc.
+     * The memory bound for each processor of type i is set to processorTypeMemory[i].
+     * The send costs are set to uniform (1).
+     * @param processorTypeCount Vector where index is type and value is count of processors of that type.
+     * @param processorTypeMemory Vector where index is type and value is memory bound for that type.
      */
-    inline bool isNumaArchitecture() const { return isNuma; }
-
-    void set_processors_consequ_types(const std::vector<v_type_t<Graph_t>> &processor_type_count_,
-                                      const std::vector<v_memw_t<Graph_t>> &processor_type_memory_) {
-
-        if (processor_type_count_.size() != processor_type_memory_.size()) {
-            throw std::invalid_argument(
-                "Invalid Argument, processor_type_count_ and processor_type_memory_ must have the same size");
+    void SetProcessorsConsequTypes(const std::vector<v_type_t<Graph_t>> &processorTypeCount,
+                                   const std::vector<v_memw_t<Graph_t>> &processorTypeMemory) {
+        if (processorTypeCount.size() != processorTypeMemory.size()) {
+            throw std::invalid_argument("Invalid Argument: processorTypeCount and processorTypeMemory must have the same size.");
         }
 
-        if (processor_type_count_.size() > std::numeric_limits<unsigned>::max()) {
-            throw std::invalid_argument("Invalid Argument, number of processors exceeds the limit");
+        if (processorTypeCount.size() > std::numeric_limits<unsigned>::max()) {
+            throw std::invalid_argument("Invalid Argument: Number of processors exceeds the limit.");
         }
 
-        number_of_processor_types = static_cast<unsigned>(processor_type_count_.size());
-        number_processors = std::accumulate(processor_type_count_.begin(), processor_type_count_.end(), 0u);
-
-        processor_type = std::vector<v_type_t<Graph_t>>(number_processors, 0);
-        memory_bound = std::vector<v_memw_t<Graph_t>>(number_processors, 0);
+        numberOfProcessorTypes_ = static_cast<unsigned>(processorTypeCount.size());
+        numberOfProcessors_ = std::accumulate(processorTypeCount.begin(), processorTypeCount.end(), 0U);
 
-        unsigned offset = 0;
-        for (unsigned i = 0; i < processor_type_count_.size(); i++) {
+        // initialize processor types and memory bound
+        processorTypes_.assign(numberOfProcessors_, 0U);
+        memoryBound_.assign(numberOfProcessors_, 0U);
 
-            for (unsigned j = 0; j < processor_type_count_[i]; j++) {
-                processor_type[offset + j] = i;
-                memory_bound[offset + j] = processor_type_memory_[i];
+        unsigned offset = 0U;
+        for (unsigned i = 0U; i < processorTypeCount.size(); i++) {
+            for (unsigned j = 0U; j < processorTypeCount.at(i); j++) {
+                processorTypes_.at(offset + j) = i;
+                memoryBound_.at(offset + j) = processorTypeMemory.at(i);
             }
-            offset += processor_type_count_[i];
+            offset += processorTypeCount.at(i);
         }
 
-        send_costs = std::vector<std::vector<v_commw_t<Graph_t>>>(
-            number_processors, std::vector<v_commw_t<Graph_t>>(number_processors, 1));
-        for (unsigned i = 0; i < number_processors; i++) {
-            send_costs[i][i] = 0;
-        }
-        isNuma = false;
+        InitializeUniformSendCosts();
     }
 
     /**
-     * Returns the memory bound of the BspArchitecture.
-     *
-     * @return The memory bound as an unsigned integer.
+     * @brief Returns the memory bounds of all processors.
+     * @return Vector of memory bounds.
      */
-    inline const std::vector<v_memw_t<Graph_t>> &memoryBound() const { return memory_bound; }
+    [[nodiscard]] const std::vector<v_memw_t<Graph_t>> &memoryBound() const { return memoryBound_; }
 
-    inline v_memw_t<Graph_t> memoryBound(unsigned proc) const { return memory_bound[proc]; }
+    /**
+     * @brief Returns the memory bound of a specific processor.
+     * @param proc The processor index.
+     * @return The memory bound.
+     */
+    [[nodiscard]] v_memw_t<Graph_t> memoryBound(const unsigned proc) const { return memoryBound_[proc]; }
 
-    v_memw_t<Graph_t> minMemoryBound() const { return *(std::min_element(memory_bound.begin(), memory_bound.end())); }
-    v_memw_t<Graph_t> maxMemoryBound() const { return *(std::max_element(memory_bound.begin(), memory_bound.end())); }
-    v_memw_t<Graph_t> sumMemoryBound() const { return std::accumulate(memory_bound.begin(), memory_bound.end(), 0); }
+    /**
+     * @brief Returns the maximum memory bound over all processors.
+     * @return The maximum memory bound.
+     */
+    [[nodiscard]] v_memw_t<Graph_t> maxMemoryBound() const { return *(std::max_element(memoryBound_.begin(), memoryBound_.end())); }
 
-    v_memw_t<Graph_t> maxMemoryBoundProcType(v_type_t<Graph_t> procType) const {
-        v_memw_t<Graph_t> max_mem = 0;
-        for (unsigned proc = 0; proc < number_processors; proc++) {
-            if (processor_type[proc] == procType) {
-                max_mem = std::max(max_mem, memory_bound[proc]);
+    /**
+     * @brief Returns the maximum memory bound over all processors of a specific type.
+     *
+     * @param procType The processor type.
+     * @return The maximum memory bound.
+     */
+    [[nodiscard]] v_memw_t<Graph_t> maxMemoryBoundProcType(const v_type_t<Graph_t> procType) const {
+        v_memw_t<Graph_t> max_mem = 0U;
+        for (unsigned proc = 0U; proc < numberOfProcessors_; proc++) {
+            if (processorTypes_[proc] == procType) {
+                max_mem = std::max(max_mem, memoryBound_[proc]);
             }
         }
         return max_mem;
     }
 
     /**
-     * Returns the number of processors in the architecture.
-     *
+     * @brief Returns the number of processors.
      * @return The number of processors.
      */
-    inline unsigned numberOfProcessors() const { return number_processors; }
+    [[nodiscard]] unsigned numberOfProcessors() const { return numberOfProcessors_; }
 
     /**
-     * Returns the communication costs of the BSP architecture.
-     *
-     * @return The communication costs as an unsigned integer.
+     * @brief Returns the communication costs.
+     * @return The communication costs.
      */
-    inline v_commw_t<Graph_t> communicationCosts() const { return communication_costs; }
+    [[nodiscard]] v_commw_t<Graph_t> communicationCosts() const { return communicationCosts_; }
 
     /**
-     * Returns the synchronization costs of the BspArchitecture.
-     *
-     * @return The synchronization costs as an unsigned integer.
+     * @brief Returns the synchronization costs.
+     * @return The synchronization costs.
      */
-    inline v_commw_t<Graph_t> synchronisationCosts() const { return synchronisation_costs; }
+    [[nodiscard]] v_commw_t<Graph_t> synchronisationCosts() const { return synchronisationCosts_; }
 
     /**
-     * Returns a copy of the send costs matrix.
-     *
-     * @return A copy of the send costs matrix.
+     * @brief Returns a the send costs matrix. Internally the matrix is stored as a flattened matrix. The allocates, computes and returns the matrix on the fly.
+     * @return The send costs matrix.
      */
-    inline std::vector<std::vector<v_commw_t<Graph_t>>> sendCostMatrixCopy() const { return send_costs; }
+    [[nodiscard]] std::vector<std::vector<v_commw_t<Graph_t>>> sendCost() const {
+        std::vector<std::vector<v_commw_t<Graph_t>>> matrix(numberOfProcessors_, std::vector<v_commw_t<Graph_t>>(numberOfProcessors_));
+        for (unsigned i = 0; i < numberOfProcessors_; ++i) {
+            for (unsigned j = 0; j < numberOfProcessors_; ++j) {
+                matrix[i][j] = sendCosts_[FlatIndex(i, j)];
+            }
+        }
+        return matrix;
+    }
 
     /**
-     * Returns a reference to the send costs matrix.
-     *
-     * @return A reference to the send costs matrix.
+     * @brief Returns the flattened send costs vector.
+     * @return The send costs vector.
      */
-    inline const std::vector<std::vector<v_commw_t<Graph_t>>> &sendCostMatrix() const { return send_costs; }
+    [[nodiscard]] const std::vector<v_commw_t<Graph_t>> &sendCostsVector() const { return sendCosts_; }
 
-    // the type indeces of the processor (e.g. CPU, vector/tensor core)
-    inline const std::vector<unsigned> &processorTypes() const { return processor_type; }
+    /**
+     * @brief Returns the processor types.
+     * @return Vector of processor types.
+     */
+    [[nodiscard]] const std::vector<unsigned> &processorTypes() const { return processorTypes_; }
 
     /**
-     * Returns the communication costs between two processors. The communication costs are the send costs multiplied by
-     * the communication costs.
+     * @brief Returns the communication costs between two processors. Does not perform bounds checking.
+     * The communication costs are the send costs multiplied by the communication costs factor.
      *
      * @param p1 The index of the first processor.
      * @param p2 The index of the second processor.
-     *
-     * @return The send costs between the two processors.
+     * @return The communication costs between the two processors.
      */
-    inline v_commw_t<Graph_t> communicationCosts(unsigned p1, unsigned p2) const {
-        return communication_costs * send_costs[p1][p2];
+    [[nodiscard]] v_commw_t<Graph_t> communicationCosts(const unsigned p1, const unsigned p2) const {
+        return communicationCosts_ * sendCosts_[FlatIndex(p1, p2)];
     }
 
     /**
-     * Returns the send costs between two processors.
+     * @brief Returns the send costs between two processors. Does not perform bounds checking.
+     * Does not the communication costs into account.
      *
      * @param p1 The index of the first processor.
      * @param p2 The index of the second processor.
-     *
      * @return The send costs between the two processors.
      */
-    inline v_commw_t<Graph_t> sendCosts(unsigned p1, unsigned p2) const { return send_costs[p1][p2]; }
-
-    inline auto sendCosts() const { return send_costs; }
-
-    // the type index of the processor (e.g. CPU, vector/tensor core)
-    inline v_type_t<Graph_t> processorType(unsigned p1) const { return processor_type[p1]; }
+    [[nodiscard]] v_commw_t<Graph_t> sendCosts(const unsigned p1, const unsigned p2) const { return sendCosts_[FlatIndex(p1, p2)]; }
 
-    void setProcessorType(unsigned p1, v_type_t<Graph_t> type) {
-
-        if (p1 >= number_processors)
-            throw std::invalid_argument("Invalid Argument");
+    /**
+     * @brief Returns the type of a specific processor. Does not perform bounds checking.
+     * @param p1 The processor index.
+     * @return The processor type.
+     */
+    [[nodiscard]] v_type_t<Graph_t> processorType(const unsigned p1) const { return processorTypes_[p1]; }
 
-        processor_type[p1] = type;
-        number_of_processor_types = std::max(number_of_processor_types, type + 1u);
+    /**
+     * @brief Sets the type of a specific processor. Performs bounds checking.
+     * @param p1 The processor index.
+     * @param type The new processor type.
+     */
+    void setProcessorType(const unsigned p1, const v_type_t<Graph_t> type) {
+        processorTypes_.at(p1) = type;
+        numberOfProcessorTypes_ = std::max(numberOfProcessorTypes_, type + 1U);
     }
 
-    std::vector<unsigned> getProcessorTypeCount() const {
-
-        std::vector<unsigned> type_count(number_of_processor_types, 0u);
-        for (unsigned p = 0u; p < number_processors; p++) {
-            type_count[processor_type[p]]++;
+    /**
+     * @brief Returns the count of processors for each type.
+     * @return Vector where index is type and value is count.
+     */
+    [[nodiscard]] std::vector<unsigned> getProcessorTypeCount() const {
+        std::vector<unsigned> type_count(numberOfProcessorTypes_, 0U);
+        for (unsigned p = 0U; p < numberOfProcessors_; p++) {
+            type_count[processorTypes_[p]]++;
         }
         return type_count;
     }
 
-    unsigned getMinProcessorTypeCount() const {
-        const auto &type_count = getProcessorTypeCount();
-        if (type_count.empty()) {
-            return 0;
-        }
-        return *std::min_element(type_count.begin(), type_count.end());
-    }
-
-    void print_architecture(std::ostream &os) const {
-
-        os << "Architectur info:  number of processors: " << number_processors
-           << ", Number of processor types: " << number_of_processor_types
-           << ", Communication costs: " << communication_costs << ", Synchronization costs: " << synchronisation_costs
-           << std::endl;
+    /**
+     * @brief Prints the architecture details to the output stream.
+     * @param os The output stream.
+     */
+    void print(std::ostream &os) const {
+        os << "Architecture info:  number of processors: " << numberOfProcessors_
+           << ", Number of processor types: " << numberOfProcessorTypes_
+           << ", Communication costs: " << communicationCosts_ << ", Synchronization costs: " << synchronisationCosts_
+           << "\n";
         os << std::setw(17) << " Processor: ";
-        for (unsigned i = 0; i < number_processors; i++) {
+        for (unsigned i = 0U; i < numberOfProcessors_; i++) {
             os << std::right << std::setw(5) << i << " ";
         }
-        os << std::endl;
+        os << "\n";
         os << std::setw(17) << "Processor type: ";
-        for (unsigned i = 0; i < number_processors; i++) {
-            os << std::right << std::setw(5) << processor_type[i] << " ";
+        for (unsigned i = 0U; i < numberOfProcessors_; i++) {
+            os << std::right << std::setw(5) << processorTypes_.at(i) << " ";
         }
-        os << std::endl;
+        os << "\n";
         os << std::setw(17) << "Memory bound: ";
-        for (unsigned i = 0; i < number_processors; i++) {
-            os << std::right << std::setw(5) << memory_bound[i] << " ";
-        }
-        os << std::endl;
-    }
-
-    void updateNumberOfProcessorTypes() {
-        number_of_processor_types = 0;
-        for (unsigned p = 0; p < number_processors; p++) {
-            if (processor_type[p] >= number_of_processor_types) {
-                number_of_processor_types = processor_type[p] + 1;
-            }
-        }
-    }
-
-    std::vector<std::vector<unsigned>> getProcessorIdsByType() const {
-        std::vector<std::vector<unsigned>> processor_ids_by_type(number_of_processor_types);
-        for (unsigned i = 0; i < numberOfProcessors(); ++i) {
-            processor_ids_by_type[processorType(i)].push_back(i);
+        for (unsigned i = 0U; i < numberOfProcessors_; i++) {
+            os << std::right << std::setw(5) << memoryBound_.at(i) << " ";
         }
-        return processor_ids_by_type;
+        os << "\n";
     }
 
-    inline unsigned getNumberOfProcessorTypes() const { return number_of_processor_types; };
+    [[nodiscard]] unsigned getNumberOfProcessorTypes() const { return numberOfProcessorTypes_; };
 
-    inline MEMORY_CONSTRAINT_TYPE getMemoryConstraintType() const { return memory_const_type; }
-    inline void setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE memory_const_type_) {
-        memory_const_type = memory_const_type_;
+    [[nodiscard]] MEMORY_CONSTRAINT_TYPE getMemoryConstraintType() const { return memoryConstraintType_; }
+    void setMemoryConstraintType(const MEMORY_CONSTRAINT_TYPE memoryConstraintType) {
+        memoryConstraintType_ = memoryConstraintType;
     }
 };
 
diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index 4e31d145..bed4fd40 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -27,54 +27,73 @@ limitations under the License.
 
 namespace osp {
 
-enum class RETURN_STATUS { OSP_SUCCESS, BEST_FOUND, TIMEOUT, ERROR };
-
-inline std::string to_string(const RETURN_STATUS status) {
-    switch (status) {
-    case RETURN_STATUS::OSP_SUCCESS:
-        return "SUCCESS";
-    case RETURN_STATUS::BEST_FOUND:
-        return "BEST FOUND";
-    case RETURN_STATUS::TIMEOUT:
-        return "TIMEOUT";
-    case RETURN_STATUS::ERROR:
-        return "ERROR";
-    default:
-        return "UNKNOWN";
-    }
-}
-
-inline std::ostream& operator<<(std::ostream& os, RETURN_STATUS status) {
-    switch (status) {
-        case RETURN_STATUS::OSP_SUCCESS:        os << "SUCCESS";        break;
-        case RETURN_STATUS::BEST_FOUND:     os << "BEST_FOUND";     break;
-        case RETURN_STATUS::TIMEOUT:        os << "TIMEOUT";        break;
-        case RETURN_STATUS::ERROR:          os << "ERROR";          break;
-        default:                            os << "UNKNOWN";        break; 
-    }
-    return os;
-}
-
 /**
  * @class BspInstance
- * @brief Represents an instance of the BSP (Bulk Synchronous Parallel) model.
+ * @brief Represents a scheduling problem instance for the Bulk Synchronous Parallel (BSP) model.
+ *
+ * The BspInstance class serves as a container for all the necessary information to define a
+ * BSP scheduling problem. It acts as the "ground" object that holds the actual implementation
+ * of the graph and architecture.
+ *
+ * It aggregates three main components:
+ *
+ * 1. **Computational DAG**: The directed acyclic graph representing the program to be executed.
+ *    It defines the tasks (nodes), their dependencies (directed edges), and associated weights (work, memory, communication).
  *
- * The BspInstance class encapsulates the computational DAG (Directed Acyclic Graph) and the BSP architecture
- * for a specific instance of the BSP model. It provides methods to access and modify the architecture and DAG,
- * as well as retrieve information about the instance such as the number of vertices and processors.
+ * 2. **BSP Architecture**: The hardware model description, including the number of processors,
+ *    their types, memory bounds, and communication/synchronization costs.
+ *    Note that processor indices are represented using `unsigned`.
+ *
+ * 3. **Node-Processor Compatibility**: A matrix defining which node types can be executed on which
+ *    processor types. This enables the modeling of heterogeneous systems (e.g., CPU + GPU) where
+ *    certain nodes are restricted to specific hardware accelerators.
+ *
+ * @warning Be careful when assigning an existing graph to a BspInstance. Depending on the
+ * constructor or assignment operator used, this may result in a deep copy of the graph structure,
+ * which can be expensive for large graphs.
+ *
+ * This class provides a unified interface to access and modify these components, facilitating
+ * the development of scheduling algorithms that need to query problem constraints and properties.
+ *
+ * @tparam Graph_t The type of the computational DAG, which must satisfy the `is_computational_dag` concept.
  */
 template<typename Graph_t>
 class BspInstance {
-
-    static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
+    static_assert(is_computational_dag_v<Graph_t>, "BspInstance can only be used with computational DAGs.");
 
   private:
+    /**
+     * @brief The computational DAG representing the program structure.
+     *
+     * It contains the graph topology (nodes and directed edges) as well as attributes such as node types,
+     * work weights, memory weights, and edge communication weights.
+     */
     Graph_t cdag;
+    /**
+     * @brief The BSP architecture model.
+     *
+     * It defines the hardware characteristics including processor types, memory limits,
+     * communication bandwidth/latency (send costs), and global synchronization costs.
+     */
     BspArchitecture<Graph_t> architecture;
 
-    // for problem instances with heterogeneity
+    /**
+     * @brief Stores the compatibility between node types and processor types.
+     *
+     * The architecture defines a type for each processor, and the DAG defines a type for each node.
+     * This matrix stores for each node type and processor type whether they are compatible, i.e.,
+     * if a node of that type can be assigned to a processor of the given type in a schedule.
+     * @note The outer vector is indexed by node type, the inner vector is indexed by processor type.
+     */
     std::vector<std::vector<bool>> nodeProcessorCompatibility = std::vector<std::vector<bool>>({{true}});
 
+    /**
+     * @brief The type of the vectex types in the computational DAG.
+     * If the DAG does not support vertex types, this is `unsigned`.
+     */
+    using vertex_type_t_or_default = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
+    using processor_type_t = unsigned;
+
   public:
     /**
      * @brief Default constructor for the BspInstance class.
@@ -83,6 +102,7 @@ class BspInstance {
 
     /**
      * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture.
+     * Computational DAG and BSP architecture are copied!
      *
      * @param cdag The computational DAG for the instance.
      * @param architecture The BSP architecture for the instance.
@@ -93,6 +113,7 @@ class BspInstance {
 
     /**
      * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture.
+     * Computational DAG and BSP architecture are moved!
      *
      * @param cdag The computational DAG for the instance.
      * @param architecture The BSP architecture for the instance.
@@ -110,191 +131,198 @@ class BspInstance {
     }
 
     BspInstance(const BspInstance<Graph_t> &other) = default;
-    BspInstance(BspInstance<Graph_t> &&other) = default;
+    BspInstance(BspInstance<Graph_t> &&other) noexcept = default;
 
     BspInstance<Graph_t> &operator=(const BspInstance<Graph_t> &other) = default;
-    BspInstance<Graph_t> &operator=(BspInstance<Graph_t> &&other) = default;
+    BspInstance<Graph_t> &operator=(BspInstance<Graph_t> &&other) noexcept = default;
 
     /**
-     * @brief Returns a reference to the BSP architecture for the instance.
-     *
-     * @return A reference to the BSP architecture for the instance.
+     * @brief Returns a reference to the BSP architecture of the instance.
+     * Assigning the BSP architecture via the reference creates a copy of the architecture.
+     * The move operator may be used to transfer ownership of the architecture.
      */
-    inline const BspArchitecture<Graph_t> &getArchitecture() const { return architecture; }
+    [[nodiscard]] const BspArchitecture<Graph_t> &getArchitecture() const { return architecture; }
+    [[nodiscard]] BspArchitecture<Graph_t> &getArchitecture() { return architecture; }
 
     /**
-     * @brief Returns a reference to the BSP architecture for the instance.
-     *
-     * @return A reference to the BSP architecture for the instance.
+     * @brief Returns a reference to the computational DAG of the instance.
+     * Assigning the computational DAG via the reference creates a copy of the DAG.
+     * The move operator may be used to transfer ownership of the DAG.
      */
-    inline BspArchitecture<Graph_t> &getArchitecture() { return architecture; }
+    [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; }
+    [[nodiscard]] Graph_t &getComputationalDag() { return cdag; }
 
     /**
-     * @brief Sets the BSP architecture for the instance.
-     *
-     * @param architecture_ The BSP architecture for the instance.
+     * @brief Returns the number of vertices in the computational DAG.
      */
-    inline void setArchitecture(const BspArchitecture<Graph_t> &architechture_) { architecture = architechture_; }
+    [[nodiscard]] vertex_idx_t<Graph_t> numberOfVertices() const { return cdag.num_vertices(); }
 
     /**
-     * @brief Returns a reference to the computational DAG for the instance.
-     *
-     * @return A reference to the computational DAG for the instance.
+     * @brief Returns a view over the vertex indices of the computational DAG.
      */
-    inline const Graph_t &getComputationalDag() const { return cdag; }
+    [[nodiscard]] auto vertices() const { return cdag.vertices(); }
 
     /**
-     * @brief Returns a reference to the computational DAG for the instance.
-     *
-     * @return A reference to the computational DAG for the instance.
+     * @brief Returns a view over the processor indices of the BSP architecture.
      */
-    inline Graph_t &getComputationalDag() { return cdag; }
-
-    inline vertex_idx_t<Graph_t> numberOfVertices() const { return cdag.num_vertices(); }
-
-    inline auto vertices() const { return cdag.vertices(); }
-
-    inline auto processors() const { return architecture.processors(); }
+    [[nodiscard]] auto processors() const { return architecture.processors(); }
 
     /**
      * @brief Returns the number of processors in the BSP architecture.
-     *
-     * @return The number of processors in the BSP architecture.
      */
-    inline unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); }
+    [[nodiscard]] unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); }
 
     /**
-     * @brief Returns the communication costs between two processors.
-     *
+     * @brief Returns the communication costs between two processors. Does not perform bounds checking.
      * The communication costs are the send costs multiplied by the communication costs.
      *
-     * @param p1 The index of the first processor.
-     * @param p2 The index of the second processor.
-     *
-     * @return The communication costs between the two processors.
+     * @param p_send The index of the sending processor.
+     * @param p_receive The index of the receiving processor.
      */
-    inline v_commw_t<Graph_t> communicationCosts(unsigned int p1, unsigned int p2) const {
-        return architecture.communicationCosts(p1, p2);
+    [[nodiscard]] v_commw_t<Graph_t> communicationCosts(const unsigned p_send, const unsigned p_receive) const {
+        return architecture.communicationCosts(p_send, p_receive);
     }
 
     /**
-     * @brief Returns the send costs between two processors.
-     *
-     *
-     * @param p1 The index of the first processor.
-     * @param p2 The index of the second processor.
+     * @brief Returns the send costs between two processors. Does not perform bounds checking.
+     * Does not the communication costs into account.
      *
-     * @return The send costs between the two processors.
+     * @param p_send The index of the sending processor.
+     * @param p_receive The index of the receiving processor.
      */
-    inline v_commw_t<Graph_t> sendCosts(unsigned int p1, unsigned int p2) const {
-        return architecture.sendCosts(p1, p2);
+    [[nodiscard]] v_commw_t<Graph_t> sendCosts(const unsigned p_send, const unsigned p_receive) const {
+        return architecture.sendCosts(p_send, p_receive);
     }
 
     /**
      * @brief Returns a copy of the send costs matrix.
-     *
-     * @return A copy of the send costs matrix.
      */
-    inline const std::vector<std::vector<v_commw_t<Graph_t>>> &sendCostMatrix() const {
-        return architecture.sendCostMatrix();
+    [[nodiscard]] std::vector<std::vector<v_commw_t<Graph_t>>> sendCosts() const { return architecture.sendCosts(); }
+
+    /**
+     * @brief Returns the flattened send costs vector.
+     */
+    [[nodiscard]] const std::vector<v_commw_t<Graph_t>> &sendCostsVector() const {
+        return architecture.sendCostsVector();
     }
 
     /**
      * @brief Returns the communication costs of the BSP architecture.
-     *
-     * @return The communication costs as an unsigned integer.
      */
-    inline v_commw_t<Graph_t> communicationCosts() const { return architecture.communicationCosts(); }
+    [[nodiscard]] v_commw_t<Graph_t> communicationCosts() const { return architecture.communicationCosts(); }
 
     /**
      * @brief Returns the synchronization costs of the BSP architecture.
-     *
-     * @return The synchronization costs as an unsigned integer.
      */
-    inline v_commw_t<Graph_t> synchronisationCosts() const { return architecture.synchronisationCosts(); }
+    [[nodiscard]] v_commw_t<Graph_t> synchronisationCosts() const { return architecture.synchronisationCosts(); }
 
     /**
-     * @brief Returns whether the architecture is NUMA.
-     *
-     * @return True if the architecture is NUMA, false otherwise.
+     * @brief Returns the memory bound for a specific processor.
+     * @param proc The processor index.
      */
-    inline bool isNumaInstance() const { return architecture.isNumaArchitecture(); }
-
-    inline v_memw_t<Graph_t> memoryBound(unsigned proc) const { return architecture.memoryBound(proc); }
-
-    v_memw_t<Graph_t> maxMemoryBoundProcType(unsigned procType) const {
-        return architecture.maxMemoryBoundProcType(procType);
-    }
-
-    v_memw_t<Graph_t> maxMemoryBoundNodeType(unsigned nodeType) const {
-        int max_mem = 0;
-        for (unsigned proc = 0; proc < architecture.getNumberOfProcessorTypes(); proc++) {
-            if (isCompatibleType(nodeType, architecture.processorType(proc))) {
-                max_mem = std::max(max_mem, architecture.memoryBound(proc));
-            }
-        }
-        return max_mem;
-    }
+    [[nodiscard]] v_memw_t<Graph_t> memoryBound(const unsigned proc) const { return architecture.memoryBound(proc); }
 
     /**
      * @brief Sets the communication costs of the BSP architecture.
-     *
      * @param cost The communication costs to set.
      */
-    inline void setCommunicationCosts(const v_commw_t<Graph_t> cost) { architecture.setCommunicationCosts(cost); }
+    void setCommunicationCosts(const v_commw_t<Graph_t> cost) { architecture.setCommunicationCosts(cost); }
 
     /**
      * @brief Sets the synchronisation costs of the BSP architecture.
-     *
      * @param cost The synchronisation costs to set.
      */
-    inline void setSynchronisationCosts(const v_commw_t<Graph_t> cost) { architecture.setSynchronisationCosts(cost); }
+    void setSynchronisationCosts(const v_commw_t<Graph_t> cost) { architecture.setSynchronisationCosts(cost); }
+
+    /**
+     * @brief Sets the number of processors. Processor type is set to 0 for all processors.
+     * Resets send costs to uniform (1) and diagonal to 0. The memory bound is set to 100 for all processors.
+     * @param numberOfProcessors The number of processors. Must be greater than 0.
+     * @throws std::invalid_argument if the number of processors is 0.
+     */
+    void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); }
+
+    /**
+     * @brief Returns the processor type for a given processor index. Does not perform bounds checking.
+     * @param proc The processor index.
+     */
+    [[nodiscard]] vertex_type_t_or_default processorType(const unsigned proc) const { return architecture.processorType(proc); }
 
     /**
-     * @brief Sets the number of processors in the BSP architecture.
+     * @brief Checks if a node is compatible with a processor. Does not perform bounds checking.
      *
-     * @param num The number of processors to set.
+     * @param node The node index.
+     * @param processor_id The processor index.
+     * @return True if the node is compatible with the processor, false otherwise.
      */
-    inline void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); }
+    [[nodiscard]] bool isCompatible(const vertex_idx_t<Graph_t> &node, const unsigned processor_id) const {
+        return isCompatibleType(cdag.vertex_type(node), architecture.processorType(processor_id));
+    }
 
-    bool check_memory_constraints_feasibility() const {
+    /**
+     * @brief Checks if a node type is compatible with a processor type. Does not perform bounds checking.
+     *
+     * @param nodeType The node type.
+     * @param processorType The processor type.
+     * @return True if the node type is compatible with the processor type, false otherwise.
+     */
+    [[nodiscard]] bool isCompatibleType(const vertex_type_t_or_default nodeType, const processor_type_t processorType) const {
+        return nodeProcessorCompatibility[nodeType][processorType];
+    }
 
-        std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
-            max_memory_per_proc_type[architecture.processorType(proc)] =
-                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
-        }
-        for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) {
-            v_memw_t<Graph_t> max_memory_of_type = max_memory_weight(vertType, cdag);
-            bool fits = false;
+    /**
+     * @brief Sets the node-processor compatibility matrix. The matrix is copied. Dimensions are not checked.
+     * @param compatibility_ The compatibility matrix.
+     */
+    void setNodeProcessorCompatibility(const std::vector<std::vector<bool>> &compatibility_) {
+        nodeProcessorCompatibility = compatibility_;
+    }
 
-            for (unsigned proc_type = 0; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) {
-                if (isCompatibleType(vertType, proc_type)) {
-                    fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]);
-                    if (fits)
-                        break;
-                }
-            }
+    /**
+     * @brief Returns the node-processor compatibility matrix.
+     */
+    [[nodiscard]] const std::vector<std::vector<bool>> &getNodeProcessorCompatibilityMatrix() const {
+        return nodeProcessorCompatibility;
+    }
 
-            if (!fits)
-                return false;
-        }
+    /**
+     * @brief Returns the node type - processor type compatibility matrix.
+     */
+    [[nodiscard]] const std::vector<std::vector<bool>> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; }
 
-        return true;
+    /**
+     * @brief Sets the compatibility matrix to be diagonal. This implies that node type `i` is only compatible with processor type `i`.
+     * @param number_of_types The number of types.
+     */
+    void setDiagonalCompatibilityMatrix(const vertex_type_t_or_default number_of_types) {
+        nodeProcessorCompatibility.assign(number_of_types, std::vector<bool>(number_of_types, false));
+        for (vertex_type_t_or_default i = 0; i < number_of_types; ++i)
+            nodeProcessorCompatibility[i][i] = true;
     }
 
-    void adjust_memory_constraints() {
+    /**
+     * @brief Sets the compatibility matrix to all ones. This implies that all node types are compatible with all processor types.
+     */
+    void setAllOnesCompatibilityMatrix() {
+        nodeProcessorCompatibility.assign(cdag.num_vertex_types(), std::vector<bool>(architecture.getNumberOfProcessorTypes(), true));
+    }
 
+    /**
+     * @brief Returns false if there is a node whose weight does not fit on any of its compatible processors.
+     * @return True if the memory constraints are feasible, false otherwise.
+     */
+    [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const {
         std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
+        for (unsigned proc = 0U; proc < architecture.numberOfProcessors(); proc++) {
             max_memory_per_proc_type[architecture.processorType(proc)] =
                 std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
         }
-        for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) {
+
+        for (vertex_type_t_or_default vertType = 0U; vertType < cdag.num_vertex_types(); vertType++) {
             v_memw_t<Graph_t> max_memory_of_type = max_memory_weight(vertType, cdag);
             bool fits = false;
 
-            for (unsigned proc_type = 0; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) {
+            for (processor_type_t proc_type = 0U; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) {
                 if (isCompatibleType(vertType, proc_type)) {
                     fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]);
                     if (fits)
@@ -302,140 +330,29 @@ class BspInstance {
                 }
             }
 
-            if (!fits) {
-                std::cout << "Warning: Computational DAG memory weight exceeds architecture memory bound." << std::endl;
-                std::cout << "VertexType " << vertType << " has memory "
-                          << " and exceeds compatible processor types memory limit." << std::endl;
-
-                for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
-                    if (isCompatibleType(vertType, architecture.processorType(proc))) {
-                        std::cout << "Increasing memory of processor " << proc << " of type "
-                                  << architecture.processorType(proc) << " to " << max_memory_of_type << "."
-                                  << std::endl;
-                        architecture.setMemoryBound(max_memory_of_type, proc);
-                    }
-                }
-            }
+            if (!fits)
+                return false;
         }
-    }
-
-    inline v_type_t<Graph_t> processorType(unsigned p1) const { return architecture.processorType(p1); }
-
-    inline bool isCompatible(const vertex_idx_t<Graph_t> &node, unsigned processor_id) const {
-        return isCompatibleType(cdag.vertex_type(node), architecture.processorType(processor_id));
-    }
-
-    inline bool isCompatibleType(v_type_t<Graph_t> nodeType, v_type_t<Graph_t> processorType) const {
-
-        return nodeProcessorCompatibility[nodeType][processorType];
-    }
-
-    void setNodeProcessorCompatibility(const std::vector<std::vector<bool>> &compatibility_) {
-
-        nodeProcessorCompatibility = compatibility_;
-    }
-
-    const std::vector<std::vector<bool>> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; }
-
-    void setDiagonalCompatibilityMatrix(unsigned number_of_types) {
 
-        nodeProcessorCompatibility =
-            std::vector<std::vector<bool>>(number_of_types, std::vector<bool>(number_of_types, false));
-        for (unsigned i = 0; i < number_of_types; ++i)
-            nodeProcessorCompatibility[i][i] = true;
-    }
-
-    void setAllOnesCompatibilityMatrix() {
-
-        unsigned number_of_node_types = cdag.num_vertex_types();
-        unsigned number_of_proc_types = architecture.getNumberOfProcessorTypes();
-
-        nodeProcessorCompatibility =
-            std::vector<std::vector<bool>>(number_of_node_types, std::vector<bool>(number_of_proc_types, true));
+        return true;
     }
 
-    std::vector<std::vector<unsigned>> getProcTypesCompatibleWithNodeType() const {
-        unsigned numberOfNodeTypes = cdag.num_vertex_types();
-        unsigned numberOfProcTypes = architecture.getNumberOfProcessorTypes();
-        std::vector<std::vector<unsigned>> compatibleProcTypes(numberOfNodeTypes);
+    /**
+     * @brief Returns a list of compatible processor types for each node type.
+     * @return A vector where the index is the node type and the value is a vector of compatible processor types.
+     */
+    [[nodiscard]] std::vector<std::vector<processor_type_t>> getProcTypesCompatibleWithNodeType() const {
+        vertex_type_t_or_default numberOfNodeTypes = cdag.num_vertex_types();
+        processor_type_t numberOfProcTypes = architecture.getNumberOfProcessorTypes();
+        std::vector<std::vector<processor_type_t>> compatibleProcTypes(numberOfNodeTypes);
 
-        for (unsigned nodeType = 0; nodeType < numberOfNodeTypes; ++nodeType)
-            for (unsigned processorType = 0; processorType < numberOfProcTypes; ++processorType)
+        for (vertex_type_t_or_default nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType)
+            for (processor_type_t processorType = 0U; processorType < numberOfProcTypes; ++processorType)
                 if (isCompatibleType(nodeType, processorType))
                     compatibleProcTypes[nodeType].push_back(processorType);
 
         return compatibleProcTypes;
     }
-
-    std::vector<std::vector<bool>> getNodeNodeCompatabilityMatrix() const {
-        std::vector<std::vector<bool>> compMat(cdag.num_vertex_types(),
-                                               std::vector<bool>(cdag.num_vertex_types(), false));
-        for (unsigned nodeType1 = 0; nodeType1 < cdag.num_vertex_types(); nodeType1++) {
-            for (unsigned nodeType2 = 0; nodeType2 < cdag.num_vertex_types(); nodeType2++) {
-                for (unsigned procType = 0; procType < architecture.getNumberOfProcessorTypes(); procType++) {
-                    if (isCompatibleType(nodeType1, procType) && isCompatibleType(nodeType2, procType)) {
-                        compMat[nodeType1][nodeType2] = true;
-                        break;
-                    }
-                }
-            }
-        }
-        return compMat;
-    }
-
-    inline const std::vector<std::vector<bool>> &getNodeProcessorCompatibilityMatrix() const {
-        return nodeProcessorCompatibility;
-    }
-};
-
-template<typename Graph_t>
-class compatible_processor_range {
-
-    std::vector<std::vector<unsigned>> type_processor_idx;
-    const BspInstance<Graph_t> *instance = nullptr;
-
-    public:
-
-    compatible_processor_range() = default;
-    
-    compatible_processor_range(const BspInstance<Graph_t> &inst) {
-        initialize(inst);
-    }
-    
-    inline void initialize(const BspInstance<Graph_t> &inst) {
-
-        instance = &inst;
-
-        if constexpr (has_typed_vertices_v<Graph_t>) {                
-         
-            type_processor_idx = std::vector<std::vector<unsigned>>(inst.getComputationalDag().num_vertex_types());
-
-            for (v_type_t<Graph_t> v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) {
-                for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++) 
-                    if (inst.isCompatibleType(v_type, inst.processorType(proc))) 
-                        type_processor_idx[v_type].push_back(proc);                     
-                
-            }
-        } 
-    }
-
-    inline const auto & compatible_processors_type(v_type_t<Graph_t> type) const {
-
-        assert(instance != nullptr);
-
-        if constexpr (has_typed_vertices_v<Graph_t>) {
-            return type_processor_idx[type];                       
-        } else {
-            return instance->processors();
-        }
-    }
-
-    inline const auto & compatible_processors_vertex(vertex_idx_t<Graph_t> vertex) const {
-        return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex));
-    }
-
-
 };
 
-
 } // namespace osp
\ No newline at end of file
diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp
index eeeaeec3..9e5a5d52 100644
--- a/include/osp/bsp/model/BspSchedule.hpp
+++ b/include/osp/bsp/model/BspSchedule.hpp
@@ -25,8 +25,8 @@ limitations under the License.
 
 #include "IBspSchedule.hpp"
 #include "IBspScheduleEval.hpp"
-#include "SetSchedule.hpp"
 #include "osp/bsp/model/cost/LazyCommunicationCost.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 
 namespace osp {
@@ -105,9 +105,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()),
           node_to_processor_assignment(schedule.getInstance().numberOfVertices()),
           node_to_superstep_assignment(schedule.getInstance().numberOfVertices()) {
-
         for (const auto &v : schedule.getInstance().getComputationalDag().vertices()) {
-
             node_to_processor_assignment[v] = schedule.assignedProcessor(v);
             node_to_superstep_assignment[v] = schedule.assignedSuperstep(v);
         }
@@ -215,7 +213,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to return the assigned superstep.
      * @return The superstep assigned to the specified node.
      */
-    [[nodiscard]] unsigned assignedSuperstep(vertex_idx node) const override { return node_to_superstep_assignment[node]; }
+    [[nodiscard]] unsigned assignedSuperstep(const vertex_idx node) const override { return node_to_superstep_assignment[node]; }
 
     /**
      * @brief Returns the processor assigned to the specified node.
@@ -223,7 +221,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to return the assigned processor.
      * @return The processor assigned to the specified node.
      */
-    [[nodiscard]] unsigned assignedProcessor(vertex_idx node) const override { return node_to_processor_assignment[node]; }
+    [[nodiscard]] unsigned assignedProcessor(const vertex_idx node) const override { return node_to_processor_assignment[node]; }
 
     /**
      * @brief Returns the superstep assignment for the schedule.
@@ -256,7 +254,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to set the assigned superstep.
      * @param superstep The superstep to assign to the node.
      */
-    void setAssignedSuperstep(vertex_idx node, unsigned superstep) {
+    void setAssignedSuperstep(const vertex_idx node, const unsigned superstep) {
         if (node < instance->numberOfVertices()) {
             node_to_superstep_assignment[node] = superstep;
 
@@ -275,7 +273,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to set the assigned superstep.
      * @param superstep The superstep to assign to the node.
      */
-    void setAssignedSuperstepNoUpdateNumSuperstep(vertex_idx node, unsigned superstep) {
+    void setAssignedSuperstepNoUpdateNumSuperstep(const vertex_idx node, const unsigned superstep) {
         node_to_superstep_assignment.at(node) = superstep;
     }
 
@@ -285,7 +283,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to set the assigned processor.
      * @param processor The processor to assign to the node.
      */
-    void setAssignedProcessor(vertex_idx node, unsigned processor) {
+    void setAssignedProcessor(const vertex_idx node, const unsigned processor) {
         node_to_processor_assignment.at(node) = processor;
     }
 
@@ -479,7 +477,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param processor The processor index.
      * @return A vector of nodes assigned to the specified processor.
      */
-    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(unsigned int processor) const {
+    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(const unsigned processor) const {
         std::vector<vertex_idx_t<Graph_t>> vec;
 
         for (const auto &node : instance->vertices()) {
@@ -498,7 +496,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param superstep The superstep index.
      * @return A vector of nodes assigned to the specified processor and superstep.
      */
-    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(unsigned int processor, unsigned int superstep) const {
+    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(const unsigned processor, const unsigned superstep) const {
         std::vector<vertex_idx_t<Graph_t>> vec;
 
         for (const auto &node : instance->vertices()) {
@@ -515,7 +513,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      *
      * @param number_of_supersteps_ The number of supersteps.
      */
-    void setNumberOfSupersteps(unsigned int number_of_supersteps_) {
+    void setNumberOfSupersteps(const unsigned number_of_supersteps_) {
         number_of_supersteps = number_of_supersteps_;
     }
 
@@ -525,7 +523,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param processor The processor index.
      * @return The number of nodes assigned to the specified processor.
      */
-    [[nodiscard]] unsigned numAssignedNodes(unsigned processor) const {
+    [[nodiscard]] unsigned numAssignedNodes(const unsigned processor) const {
         unsigned num = 0;
 
         for (const auto &node : instance->vertices()) {
@@ -572,11 +570,14 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      */
     virtual void shrinkByMergingSupersteps() {
         std::vector<bool> comm_phase_empty(number_of_supersteps, true);
-        for (const auto &node : instance->vertices())
-            for (const auto &child : instance->getComputationalDag().children(node))
-                if (node_to_processor_assignment[node] != node_to_processor_assignment[child])
+        for (const auto &node : instance->vertices()) {
+            for (const auto &child : instance->getComputationalDag().children(node)) {
+                if (node_to_processor_assignment[node] != node_to_processor_assignment[child]) {
                     for (unsigned offset = 1; offset <= getStaleness(); ++offset)
                         comm_phase_empty[node_to_superstep_assignment[child] - offset] = false;
+                }
+            }
+        }
 
         std::vector<unsigned> new_step_index(number_of_supersteps);
         unsigned current_index = 0;
@@ -585,9 +586,9 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
             if (!comm_phase_empty[step])
                 current_index++;
         }
-        for (const auto &node : instance->vertices())
+        for (const auto &node : instance->vertices()) {
             node_to_superstep_assignment[node] = new_step_index[node_to_superstep_assignment[node]];
-
+        }
         setNumberOfSupersteps(current_index);
     }
 
@@ -633,7 +634,6 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         std::vector<v_memw_t<Graph_t>> current_proc_transient_memory(instance->numberOfProcessors(), 0);
 
         for (const auto &node : instance->vertices()) {
-
             const unsigned proc = node_to_processor_assignment[node];
             current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node);
             current_proc_transient_memory[proc] = std::max(
@@ -659,7 +659,6 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         std::vector<v_memw_t<Graph_t>> current_proc_memory(instance->numberOfProcessors(), 0);
 
         for (const auto &node : instance->vertices()) {
-
             const unsigned proc = node_to_processor_assignment[node];
             current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node);
 
@@ -671,12 +670,10 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
     }
 
     bool satisfiesLocalInOutMemoryConstraints() const {
-
         SetSchedule set_schedule = SetSchedule(*this);
 
         for (unsigned step = 0; step < number_of_supersteps; step++) {
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 v_memw_t<Graph_t> memory = 0;
                 for (const auto &node : set_schedule.step_processor_vertices[step][proc]) {
                     memory += instance->getComputationalDag().vertex_mem_weight(node) +
@@ -701,12 +698,10 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
     }
 
     bool satisfiesLocalIncEdgesMemoryConstraints() const {
-
         SetSchedule set_schedule = SetSchedule(*this);
 
         for (unsigned step = 0; step < number_of_supersteps; step++) {
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 std::unordered_set<vertex_idx_t<Graph_t>> nodes_with_incoming_edges;
 
                 v_memw_t<Graph_t> memory = 0;
@@ -714,7 +709,6 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
                     memory += instance->getComputationalDag().vertex_comm_weight(node);
 
                     for (const auto &parent : instance->getComputationalDag().parents(node)) {
-
                         if (node_to_superstep_assignment[parent] != step) {
                             nodes_with_incoming_edges.insert(parent);
                         }
@@ -734,23 +728,19 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
     }
 
     bool satisfiesLocalSourcesIncEdgesMemoryConstraints() const {
-
         SetSchedule set_schedule = SetSchedule(*this);
 
         for (unsigned step = 0; step < number_of_supersteps; step++) {
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 std::unordered_set<vertex_idx_t<Graph_t>> nodes_with_incoming_edges;
 
                 v_memw_t<Graph_t> memory = 0;
                 for (const auto &node : set_schedule.step_processor_vertices[step][proc]) {
-
                     if (is_source(node, instance->getComputationalDag())) {
                         memory += instance->getComputationalDag().vertex_mem_weight(node);
                     }
 
                     for (const auto &parent : instance->getComputationalDag().parents(node)) {
-
                         if (node_to_superstep_assignment[parent] != step) {
                             nodes_with_incoming_edges.insert(parent);
                         }
diff --git a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
new file mode 100644
index 00000000..c4d8df30
--- /dev/null
+++ b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
@@ -0,0 +1,101 @@
+/*
+Copyright 2024 Huawei Technologies Co., Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
+*/
+
+#pragma once
+
+#include "osp/bsp/model/BspInstance.hpp"
+#include <vector>
+
+namespace osp {
+
+/**
+ * @class CompatibleProcessorRange
+ * @brief Helper class to efficiently iterate over compatible processors for a given node or node type.
+ *
+ * This class precomputes and stores the list of compatible processors for each node type.
+ *
+ * @tparam Graph_t The type of the computational DAG.
+ */
+template<typename Graph_t>
+class CompatibleProcessorRange {
+
+    std::vector<std::vector<unsigned>> typeProcessorIdx;
+    const BspInstance<Graph_t> *instance = nullptr;
+
+  public:
+    /**
+     * @brief Default constructor.
+     */
+    CompatibleProcessorRange() = default;
+
+    /**
+     * @brief Constructs a CompatibleProcessorRange for the given BspInstance.
+     *
+     * @param inst The BspInstance.
+     */
+    CompatibleProcessorRange(const BspInstance<Graph_t> &inst) {
+        initialize(inst);
+    }
+
+    /**
+     * @brief Initializes the CompatibleProcessorRange with a BspInstance.
+     *
+     * @param inst The BspInstance.
+     */
+    void initialize(const BspInstance<Graph_t> &inst) {
+        instance = &inst;
+
+        if constexpr (has_typed_vertices_v<Graph_t>) {
+            typeProcessorIdx.resize(inst.getComputationalDag().num_vertex_types());
+
+            for (v_type_t<Graph_t> v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) {
+                for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++)
+                    if (inst.isCompatibleType(v_type, inst.processorType(proc)))
+                        typeProcessorIdx[v_type].push_back(proc);
+            }
+        }
+    }
+
+    /**
+     * @brief Returns a range of compatible processors for a given node type.
+     *
+     * @param type The node type.
+     * @return A const reference to a vector of compatible processor indices.
+     */
+    [[nodiscard]] const auto &compatible_processors_type(const v_type_t<Graph_t> type) const {
+        assert(instance != nullptr);
+        if constexpr (has_typed_vertices_v<Graph_t>) {
+            return typeProcessorIdx[type];
+        } else {
+            return instance->processors();
+        }
+    }
+
+    /**
+     * @brief Returns a range of compatible processors for a given vertex.
+     *
+     * @param vertex The vertex index.
+     * @return A const reference to a vector of compatible processor indices.
+     */
+    [[nodiscard]] const auto &compatible_processors_vertex(const vertex_idx_t<Graph_t> vertex) const {
+        assert(instance != nullptr);
+        return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex));
+    }
+};
+
+} // namespace osp
\ No newline at end of file
diff --git a/include/osp/bsp/model/SetSchedule.hpp b/include/osp/bsp/model/util/SetSchedule.hpp
similarity index 99%
rename from include/osp/bsp/model/SetSchedule.hpp
rename to include/osp/bsp/model/util/SetSchedule.hpp
index da851f98..61946fae 100644
--- a/include/osp/bsp/model/SetSchedule.hpp
+++ b/include/osp/bsp/model/util/SetSchedule.hpp
@@ -18,7 +18,7 @@ limitations under the License.
 
 #pragma once
 
-#include "IBspSchedule.hpp"
+#include "osp/bsp/model/IBspSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 
 namespace osp {
diff --git a/include/osp/bsp/model/VectorSchedule.hpp b/include/osp/bsp/model/util/VectorSchedule.hpp
similarity index 99%
rename from include/osp/bsp/model/VectorSchedule.hpp
rename to include/osp/bsp/model/util/VectorSchedule.hpp
index a81cc3e5..ea856c1b 100644
--- a/include/osp/bsp/model/VectorSchedule.hpp
+++ b/include/osp/bsp/model/util/VectorSchedule.hpp
@@ -18,7 +18,7 @@ limitations under the License.
 
 #pragma once
 
-#include "IBspSchedule.hpp"
+#include "osp/bsp/model/IBspSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 #include <vector>
 
diff --git a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
index 0e9df967..2e23c22e 100644
--- a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
+++ b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
@@ -42,17 +42,17 @@ class CoarseAndSchedule : public Scheduler<Graph_t> {
         const auto &instance = schedule.getInstance();
 
         BspInstance<Graph_t_coarse> instance_coarse;
-        
+
         std::vector<vertex_idx_t<Graph_t_coarse>> reverse_vertex_map;
 
         bool status = coarser.coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(),
-                                        reverse_vertex_map);
+                                         reverse_vertex_map);
 
         if (!status) {
             return RETURN_STATUS::ERROR;
-        }  
+        }
 
-        instance_coarse.setArchitecture(instance.getArchitecture());
+        instance_coarse.getArchitecture() = instance.getArchitecture();
         instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
 
         BspSchedule<Graph_t_coarse> schedule_coarse(instance_coarse);
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp
index 38fae9ff..b5b4ea95 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp
@@ -39,6 +39,8 @@ limitations under the License.
 
 namespace osp {
 
+static constexpr unsigned CacheLineSize = 64;
+
 template<typename vert_t, typename weight_t>
 struct GrowLocalAutoCoresParallel_Params {
     vert_t minSuperstepSize = 20;
diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
index aa199c45..45b58ca3 100644
--- a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
+++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
@@ -21,14 +21,14 @@ limitations under the License.
 #include <callbackbase.h>
 #include <coptcpp_pch.h>
 
+#include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/BspScheduleCS.hpp"
 #include "osp/bsp/model/BspScheduleRecomp.hpp"
 #include "osp/bsp/model/MaxBspSchedule.hpp"
 #include "osp/bsp/model/MaxBspScheduleCS.hpp"
-#include "osp/bsp/model/VectorSchedule.hpp"
+#include "osp/bsp/model/util/VectorSchedule.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
-#include "osp/auxiliary/io/DotFileWriter.hpp"
 
 namespace osp {
 
@@ -111,17 +111,19 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
                         if (allow_recomputation_cb) {
 
-                        auto sched = constructBspScheduleRecompFromCallback();
-                        DotFileWriter sched_writer;
-                        sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
-                                            std::to_string(counter) + "_schedule.dot", sched);
+                            auto sched = constructBspScheduleRecompFromCallback();
+                            DotFileWriter sched_writer;
+                            sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
+                                                                   std::to_string(counter) + "_schedule.dot",
+                                                               sched);
 
                         } else {
 
-                        BspSchedule<Graph_t> sched = constructBspScheduleFromCallback();
-                        DotFileWriter sched_writer;
-                        sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
-                                                   std::to_string(counter) + "_schedule.dot", sched);
+                            BspSchedule<Graph_t> sched = constructBspScheduleFromCallback();
+                            DotFileWriter sched_writer;
+                            sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
+                                                            std::to_string(counter) + "_schedule.dot",
+                                                        sched);
                         }
                         counter++;
                     }
@@ -259,7 +261,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             }
         }
 
-        if(is_max_bsp && number_of_supersteps>0) // can ignore last 2 comm phases in this case
+        if (is_max_bsp && number_of_supersteps > 0) // can ignore last 2 comm phases in this case
             --number_of_supersteps;
 
         schedule.getCommunicationSchedule().clear();
@@ -268,7 +270,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
                 for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
                     if (p_from != p_to) {
-                        for (unsigned int step = 0; step < number_of_supersteps-1; step++) {
+                        for (unsigned int step = 0; step < number_of_supersteps - 1; step++) {
                             if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step]
                                                                               [static_cast<int>(node)]
                                                                                   .Get(COPT_DBLINFO_VALUE) >= .99) {
@@ -302,7 +304,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
             for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) {
 
-                for (unsigned step = 0; step < number_of_supersteps-1; step++) {
+                for (unsigned step = 0; step < number_of_supersteps - 1; step++) {
 
                     if (node_to_processor_superstep_var[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
                         schedule.assignments(node).emplace_back(processor, step);
@@ -334,46 +336,35 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         }
     }
 
-
     void loadInitialSchedule(Model &model, const BspInstance<Graph_t> &instance) {
 
         if (use_initial_schedule_recomp &&
             (max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() ||
-            instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() ||
-            instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) {
+             instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() ||
+             instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) {
             throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not "
                                         "agree with those of the initial schedule's instance!");
         }
 
         if (!use_initial_schedule_recomp & use_initial_schedule &&
             (max_number_supersteps < initial_schedule->numberOfSupersteps() ||
-            instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
-            instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
+             instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
+             instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
             throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not "
                                         "agree with those of the initial schedule's instance!");
         }
 
-        const auto& DAG = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->getInstance().getComputationalDag() :
-                        initial_schedule->getInstance().getComputationalDag();
+        const auto &DAG = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getComputationalDag() : initial_schedule->getInstance().getComputationalDag();
 
-        const auto& arch = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->getInstance().getArchitecture() :
-                        initial_schedule->getInstance().getArchitecture();
+        const auto &arch = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getArchitecture() : initial_schedule->getInstance().getArchitecture();
 
-        const unsigned& num_processors = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->getInstance().numberOfProcessors() :
-                        initial_schedule->getInstance().numberOfProcessors();
+        const unsigned &num_processors = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().numberOfProcessors() : initial_schedule->getInstance().numberOfProcessors();
 
-        const unsigned& num_supersteps = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->numberOfSupersteps() :
-                        initial_schedule->numberOfSupersteps();
+        const unsigned &num_supersteps = use_initial_schedule_recomp ? initial_schedule_recomp->numberOfSupersteps() : initial_schedule->numberOfSupersteps();
 
-        const auto &cs = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->getCommunicationSchedule() :
-                        initial_schedule->getCommunicationSchedule();
+        const auto &cs = use_initial_schedule_recomp ? initial_schedule_recomp->getCommunicationSchedule() : initial_schedule->getCommunicationSchedule();
 
-        assert(max_number_supersteps <= static_cast<unsigned>( std::numeric_limits<int>::max()) );
+        assert(max_number_supersteps <= static_cast<unsigned>(std::numeric_limits<int>::max()));
         for (unsigned step = 0; step < max_number_supersteps; step++) {
 
             if (step < num_supersteps) {
@@ -387,28 +378,23 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             // model.SetMipStart(max_comm_superstep_var[step], COPT_INFINITY);
         }
 
-        std::vector<std::set<std::pair<unsigned, unsigned> > > computed(DAG.num_vertices());
-        for (const auto &node : DAG.vertices())
-        {
-            if(use_initial_schedule_recomp)
-                for (const std::pair<unsigned, unsigned>& assignment : initial_schedule_recomp->assignments(node))
+        std::vector<std::set<std::pair<unsigned, unsigned>>> computed(DAG.num_vertices());
+        for (const auto &node : DAG.vertices()) {
+            if (use_initial_schedule_recomp)
+                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node))
                     computed[node].emplace(assignment);
             else
-                computed[node].emplace(initial_schedule->assignedProcessor(node),initial_schedule->assignedSuperstep(node));
+                computed[node].emplace(initial_schedule->assignedProcessor(node), initial_schedule->assignedSuperstep(node));
         }
 
-        std::vector<std::vector<unsigned> > first_at(DAG.num_vertices(), std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
-        for (const auto &node : DAG.vertices())
-        {
-            if(use_initial_schedule_recomp)
-            {
-                for (const std::pair<unsigned, unsigned>& assignment : initial_schedule_recomp->assignments(node))
+        std::vector<std::vector<unsigned>> first_at(DAG.num_vertices(), std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
+        for (const auto &node : DAG.vertices()) {
+            if (use_initial_schedule_recomp) {
+                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node))
                     first_at[node][assignment.first] = std::min(first_at[node][assignment.first], assignment.second);
-            }
-            else
-            {
+            } else {
                 first_at[node][initial_schedule->assignedProcessor(node)] = std::min(first_at[node][initial_schedule->assignedProcessor(node)],
-                                                                                    initial_schedule->assignedSuperstep(node) );
+                                                                                     initial_schedule->assignedSuperstep(node));
             }
         }
 
@@ -431,7 +417,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
                                         comm_processor_to_processor_superstep_node_var[p1][p2][step]
                                                                                       [static_cast<int>(node)],
                                         1);
-                                        first_at[node][p2] = std::min(first_at[node][p2], step+staleness);
+                                    first_at[node][p2] = std::min(first_at[node][p2], step + staleness);
                                 } else {
                                     model.SetMipStart(
                                         comm_processor_to_processor_superstep_node_var[p1][p2][step]
@@ -447,14 +433,15 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
         for (const auto &node : DAG.vertices())
             for (unsigned proc = 0; proc < num_processors; proc++)
-                for(unsigned step = 0; step < max_number_supersteps; step++)
-                {
-                    if(step >= first_at[node][proc])
+                for (unsigned step = 0; step < max_number_supersteps; step++) {
+                    if (step >= first_at[node][proc])
                         model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
-                                                                                        [static_cast<int>(node)], 1);
+                                                                                        [static_cast<int>(node)],
+                                          1);
                     else
                         model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
-                                                                                        [static_cast<int>(node)], 0);
+                                                                                        [static_cast<int>(node)],
+                                          0);
                 }
 
         for (const auto &node : DAG.vertices()) {
@@ -478,16 +465,13 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             max_number_supersteps,
             std::vector<v_workw_t<Graph_t>>(num_processors, 0));
 
-        if(use_initial_schedule_recomp)
-        {
+        if (use_initial_schedule_recomp) {
             for (const auto &node : initial_schedule_recomp->getInstance().vertices()) {
-                for (const std::pair<unsigned, unsigned>& assignment : initial_schedule_recomp->assignments(node)) {
+                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node)) {
                     work[assignment.second][assignment.first] += DAG.vertex_work_weight(node);
                 }
             }
-        }
-        else
-        {
+        } else {
             for (const auto &node : initial_schedule->getInstance().vertices())
                 work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] +=
                     DAG.vertex_work_weight(node);
@@ -544,15 +528,14 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
        Variables
        */
 
-        assert(max_number_supersteps <= static_cast<unsigned>( std::numeric_limits<int>::max() ));
-        assert(instance.numberOfProcessors() <= static_cast<unsigned>( std::numeric_limits<int>::max()) );
+        assert(max_number_supersteps <= static_cast<unsigned>(std::numeric_limits<int>::max()));
+        assert(instance.numberOfProcessors() <= static_cast<unsigned>(std::numeric_limits<int>::max()));
 
         // variables indicating if superstep is used at all
         superstep_used_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_used");
 
         VarArray superstep_has_comm, mergeable_superstep_penalty;
-        if(is_max_bsp)
-        {
+        if (is_max_bsp) {
             // variables indicating if there is any communication in superstep
             superstep_has_comm = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_has_comm");
             // variables that incentivize the schedule to be continuous - needs to be done differently for maxBsp
@@ -676,13 +659,12 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
                     if (step > 0) {
 
                         for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
-                            if(!is_max_bsp || p_from == processor){
+                            if (!is_max_bsp || p_from == processor) {
                                 expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1]
-                                                                                   [static_cast<int>(node)];
-                            }
-                            else if(step > 1){
+                                                                                       [static_cast<int>(node)];
+                            } else if (step > 1) {
                                 expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 2]
-                                                                                   [static_cast<int>(node)];
+                                                                                       [static_cast<int>(node)];
                             }
                         }
                     }
@@ -700,26 +682,25 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         }
 
         // synchronization cost calculation & forcing continuous schedule in maxBsp
-        if(is_max_bsp)
-        {
+        if (is_max_bsp) {
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 Expr expr;
                 for (const auto &node : instance.vertices()) {
                     for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
                         for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
-                            if(p_from != p_to)
+                            if (p_from != p_to)
                                 expr += comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)];
                         }
                     }
                 }
                 model.AddConstr(static_cast<unsigned>(instance.numberOfProcessors() * instance.numberOfProcessors() * instance.numberOfVertices()) *
-                                superstep_has_comm[static_cast<int>(step)] >= expr);
+                                    superstep_has_comm[static_cast<int>(step)] >=
+                                expr);
             }
 
             // if step i and (i+1) has no comm, and (i+2) has work, then (i+1) and (i+2) are mergeable -> penalize
             for (unsigned int step = 0; step < max_number_supersteps - 2; step++)
-                model.AddConstr(superstep_used_var[static_cast<int>(step + 2)] - superstep_has_comm[static_cast<int>(step)]
-                                - superstep_has_comm[static_cast<int>(step + 1)] <= mergeable_superstep_penalty[static_cast<int>(step)]);
+                model.AddConstr(superstep_used_var[static_cast<int>(step + 2)] - superstep_has_comm[static_cast<int>(step)] - superstep_has_comm[static_cast<int>(step + 1)] <= mergeable_superstep_penalty[static_cast<int>(step)]);
         }
 
         max_comm_superstep_var =
@@ -784,7 +765,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         // vertex type restrictions
         for (const vertex_idx_t<Graph_t> &node : instance.vertices()) {
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-                if(!instance.isCompatible(node, processor)) {
+                if (!instance.isCompatible(node, processor)) {
                     for (unsigned int step = 0; step < max_number_supersteps; step++) {
                         model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast<int>(step)] == 0);
                     }
@@ -797,20 +778,17 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
           */
         Expr expr;
 
-        if(is_max_bsp)
-        {
+        if (is_max_bsp) {
             VarArray max_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_superstep");
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 model.AddConstr(max_superstep_var[static_cast<int>(step)] >= max_work_superstep_var[static_cast<int>(step)]);
-                if(step > 0)
-                    model.AddConstr(max_superstep_var[static_cast<int>(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step-1)]);
+                if (step > 0)
+                    model.AddConstr(max_superstep_var[static_cast<int>(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step - 1)]);
                 expr += max_superstep_var[static_cast<int>(step)];
                 expr += instance.synchronisationCosts() * superstep_has_comm[static_cast<int>(step)];
                 expr += instance.synchronisationCosts() * mergeable_superstep_penalty[static_cast<int>(step)];
             }
-        }
-        else
-        {
+        } else {
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 expr += max_work_superstep_var[static_cast<int>(step)] +
                         instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step)] +
@@ -877,7 +855,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var;
     }
 
-        CoptFullScheduler(const BspScheduleRecomp<Graph_t> &schedule)
+    CoptFullScheduler(const BspScheduleRecomp<Graph_t> &schedule)
         : allow_recomputation(true), use_memory_constraint(false), use_initial_schedule_recomp(true),
           write_solutions_found(false), initial_schedule_recomp(&schedule),
           max_number_supersteps(schedule.numberOfSupersteps()) {
@@ -931,7 +909,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         return run_scheduler(schedule);
     }
 
-
     virtual RETURN_STATUS computeScheduleCS(BspScheduleCS<Graph_t> &schedule) override {
         allow_recomputation = false;
         is_max_bsp = false;
@@ -1010,7 +987,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         model.Solve();
     }
 
-
     /**
      * @brief Sets the provided schedule as the initial solution for the ILP.
      *
diff --git a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
index 5d759687..c051c8dc 100644
--- a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
+++ b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
@@ -240,7 +240,7 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
                 SetSolution((*max_work_superstep_var_ptr)[static_cast<int>(step)], max_work);
             }
 
-            if (instance_ptr->isNumaInstance()) {
+            if (instance_ptr->getArchitecture().isNumaArchitecture()) {
 
                 for (unsigned p1 = 0; p1 < instance_ptr->numberOfProcessors(); p1++) {
                     for (unsigned p2 = 0; p2 < instance_ptr->numberOfProcessors(); p2++) {
@@ -670,7 +670,6 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
             loadInitialSchedule();
         }
 
-
         model.SetIntParam(COPT_INTPARAM_THREADS, 128);
         model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1);
         model.SetIntParam(COPT_INTPARAM_LPMETHOD, 1);
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
index af5bfd19..1c544fd1 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
@@ -16,12 +16,12 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-//#define KL_DEBUG
+// #define KL_DEBUG
 
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/IBspSchedule.hpp"
-#include "osp/bsp/model/SetSchedule.hpp"
-#include "osp/bsp/model/VectorSchedule.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
+#include "osp/bsp/model/util/VectorSchedule.hpp"
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
@@ -73,17 +73,15 @@ class kl_current_schedule {
     using EdgeType = edge_desc_t<Graph_t>;
 
   public:
-
     kl_current_schedule(Ikl_cost_function *cost_f_) : cost_f(cost_f_) {
 
-#ifdef KL_DEBUG        
+#ifdef KL_DEBUG
         if constexpr (use_memory_constraint) {
             std::cout << "KLCurrentSchedule constructor with memory constraint" << std::endl;
         } else {
             std::cout << "KLCurrentSchedule constructor without memory constraint" << std::endl;
         }
 #endif
-
     }
 
     virtual ~kl_current_schedule() = default;
@@ -358,7 +356,7 @@ class kl_current_schedule {
         if constexpr (use_memory_constraint) {
 
             memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step);
-        }  
+        }
     }
 
     virtual void initialize_current_schedule(const IBspSchedule<Graph_t> &schedule) {
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp
index f6c425bd..2cf0c631 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp
@@ -97,7 +97,7 @@ struct kl_bsp_comm_cost_function {
     constexpr static bool is_max_comm_cost_function = true;
 
     kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> *active_schedule;
-    compatible_processor_range<Graph_t> *proc_range;
+    CompatibleProcessorRange<Graph_t> *proc_range;
     const Graph_t *graph;
     const BspInstance<Graph_t> *instance;
 
@@ -119,7 +119,7 @@ struct kl_bsp_comm_cost_function {
     }
 
     void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched,
-                    compatible_processor_range<Graph_t> &p_range) {
+                    CompatibleProcessorRange<Graph_t> &p_range) {
         active_schedule = &sched;
         proc_range = &p_range;
         instance = &sched.getInstance();
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp
index 50384c72..caaad9ca 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp
@@ -24,24 +24,24 @@ limitations under the License.
 
 namespace osp {
 
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1> 
+template<typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1>
 struct kl_hyper_total_comm_cost_function {
-    
+
     using VertexType = vertex_idx_t<Graph_t>;
     using kl_move = kl_move_struct<cost_t, VertexType>;
     using kl_gain_update_info = kl_update_info<VertexType>;
-   
+
     constexpr static unsigned window_range = 2 * window_size + 1;
     constexpr static bool is_max_comm_cost_function = false;
 
     kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> *active_schedule;
 
-    compatible_processor_range<Graph_t> *proc_range;
+    CompatibleProcessorRange<Graph_t> *proc_range;
 
     const Graph_t *graph;
     const BspInstance<Graph_t> *instance;
 
-    cost_t comm_multiplier = 1; 
+    cost_t comm_multiplier = 1;
     cost_t max_comm_weight = 0;
 
     lambda_vector_container<VertexType> node_lambda_map;
@@ -52,20 +52,20 @@ struct kl_hyper_total_comm_cost_function {
     const std::string name() const { return "toal_comm_cost"; }
     inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); }
 
-    void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched, compatible_processor_range<Graph_t> &p_range) {
+    void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched, CompatibleProcessorRange<Graph_t> &p_range) {
         active_schedule = &sched;
         proc_range = &p_range;
         instance = &sched.getInstance();
         graph = &instance->getComputationalDag();
-        comm_multiplier = 1.0 / instance->numberOfProcessors();  
-        node_lambda_map.initialize(graph->num_vertices(), instance->numberOfProcessors());      
+        comm_multiplier = 1.0 / instance->numberOfProcessors();
+        node_lambda_map.initialize(graph->num_vertices(), instance->numberOfProcessors());
     }
 
     struct empty_struct {};
 
     using pre_move_comm_data_t = empty_struct;
 
-    inline empty_struct get_pre_move_comm_data(const kl_move& ) { return empty_struct(); }
+    inline empty_struct get_pre_move_comm_data(const kl_move &) { return empty_struct(); }
 
     cost_t compute_schedule_cost() {
         cost_t work_costs = 0;
@@ -74,7 +74,7 @@ struct kl_hyper_total_comm_cost_function {
         }
 
         cost_t comm_costs = 0;
-        for(const auto vertex : graph->vertices()) {
+        for (const auto vertex : graph->vertices()) {
             const unsigned vertex_proc = active_schedule->assigned_processor(vertex);
             const cost_t v_comm_cost = graph->vertex_comm_weight(vertex);
             max_comm_weight = std::max(max_comm_weight, v_comm_cost);
@@ -87,7 +87,7 @@ struct kl_hyper_total_comm_cost_function {
                 if (node_lambda_map.increase_proc_count(vertex, target_proc)) {
                     comm_costs += v_comm_cost * instance->communicationCosts(vertex_proc, target_proc); // is 0 if target_proc == vertex_proc
                 }
-            } 
+            }
         }
 
         return work_costs + comm_costs * comm_multiplier + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
@@ -100,132 +100,132 @@ struct kl_hyper_total_comm_cost_function {
         }
 
         cost_t comm_costs = 0;
-        for(const auto vertex : graph->vertices()) {
+        for (const auto vertex : graph->vertices()) {
             const unsigned vertex_proc = active_schedule->assigned_processor(vertex);
             const cost_t v_comm_cost = graph->vertex_comm_weight(vertex);
             for (const auto lambdaproc_mult_pair : node_lambda_map.iterate_proc_entries(vertex)) {
                 const auto &lambda_proc = lambdaproc_mult_pair.first;
                 comm_costs += v_comm_cost * instance->communicationCosts(vertex_proc, lambda_proc);
-            } 
+            }
         }
 
         return work_costs + comm_costs * comm_multiplier + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
     }
 
-    inline void update_datastructure_after_move(const kl_move & move, const unsigned start_step, const unsigned end_step) {
-        if (move.to_proc != move.from_proc) {  
+    inline void update_datastructure_after_move(const kl_move &move, const unsigned start_step, const unsigned end_step) {
+        if (move.to_proc != move.from_proc) {
             for (const auto &source : instance->getComputationalDag().parents(move.node)) {
                 const unsigned source_step = active_schedule->assigned_superstep(source);
                 if (source_step < start_step || source_step > end_step)
                     continue;
-                update_source_after_move(move, source);    
+                update_source_after_move(move, source);
             }
         }
     }
 
-    inline void update_source_after_move(const kl_move & move, VertexType source) {
+    inline void update_source_after_move(const kl_move &move, VertexType source) {
         node_lambda_map.decrease_proc_count(source, move.from_proc);
         node_lambda_map.increase_proc_count(source, move.to_proc);
     }
 
     template<typename thread_data_t>
-    void update_node_comm_affinity(const kl_move &move, thread_data_t& thread_data, const cost_t& penalty, const cost_t& reward, std::map<VertexType, kl_gain_update_info> & max_gain_recompute, std::vector<VertexType> &new_nodes) {
-                
+    void update_node_comm_affinity(const kl_move &move, thread_data_t &thread_data, const cost_t &penalty, const cost_t &reward, std::map<VertexType, kl_gain_update_info> &max_gain_recompute, std::vector<VertexType> &new_nodes) {
+
         const unsigned start_step = thread_data.start_step;
         const unsigned end_step = thread_data.end_step;
-                     
+
         for (const auto &target : instance->getComputationalDag().children(move.node)) {
-            const unsigned target_step = active_schedule->assigned_superstep(target); 
+            const unsigned target_step = active_schedule->assigned_superstep(target);
             if (target_step < start_step || target_step > end_step)
                 continue;
 
-            if(thread_data.lock_manager.is_locked(target))
+            if (thread_data.lock_manager.is_locked(target))
                 continue;
 
             if (not thread_data.affinity_table.is_selected(target)) {
-                new_nodes.push_back(target);  
+                new_nodes.push_back(target);
                 continue;
             }
 
             if (max_gain_recompute.find(target) != max_gain_recompute.end()) {
-                max_gain_recompute[target].full_update = true;                
+                max_gain_recompute[target].full_update = true;
             } else {
                 max_gain_recompute[target] = kl_gain_update_info(target, true);
-            }           
+            }
 
             const unsigned target_proc = active_schedule->assigned_processor(target);
-            const unsigned target_start_idx = start_idx(target_step, start_step);            
+            const unsigned target_start_idx = start_idx(target_step, start_step);
             auto &affinity_table = thread_data.affinity_table.at(target);
 
             if (move.from_step < target_step + (move.from_proc == target_proc)) {
-                const unsigned diff = target_step - move.from_step;                
-                const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
-                unsigned idx = target_start_idx; 
+                const unsigned diff = target_step - move.from_step;
+                const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0;
+                unsigned idx = target_start_idx;
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                         affinity_table[p][idx] -= penalty;
-                    }                                                
-                } 
+                    }
+                }
 
                 if (idx - 1 < bound && is_compatible(target, move.from_proc)) {
-                    affinity_table[move.from_proc][idx - 1] += penalty;    
+                    affinity_table[move.from_proc][idx - 1] += penalty;
                 }
 
             } else {
                 const unsigned diff = move.from_step - target_step;
-                const unsigned window_bound = end_idx(target_step, end_step);  
-                unsigned idx = std::min(window_size + diff, window_bound);                  
-                
-                if (idx < window_bound && is_compatible(target, move.from_proc)) { 
-                    affinity_table[move.from_proc][idx] += reward; 
+                const unsigned window_bound = end_idx(target_step, end_step);
+                unsigned idx = std::min(window_size + diff, window_bound);
+
+                if (idx < window_bound && is_compatible(target, move.from_proc)) {
+                    affinity_table[move.from_proc][idx] += reward;
                 }
 
                 idx++;
-                
+
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                         affinity_table[p][idx] += reward;
-                    }                        
-                } 
+                    }
+                }
             }
 
             if (move.to_step < target_step + (move.to_proc == target_proc)) {
-                unsigned idx = target_start_idx; 
-                const unsigned diff = target_step - move.to_step;                
-                const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
+                unsigned idx = target_start_idx;
+                const unsigned diff = target_step - move.to_step;
+                const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0;
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                         affinity_table[p][idx] += penalty;
-                    }                                                
-                } 
+                    }
+                }
 
                 if (idx - 1 < bound && is_compatible(target, move.to_proc)) {
-                    affinity_table[move.to_proc][idx - 1] -= penalty;    
+                    affinity_table[move.to_proc][idx - 1] -= penalty;
                 }
 
             } else {
                 const unsigned diff = move.to_step - target_step;
-                const unsigned window_bound = end_idx(target_step, end_step); 
-                unsigned idx = std::min(window_size + diff, window_bound);                                                     
-                
+                const unsigned window_bound = end_idx(target_step, end_step);
+                unsigned idx = std::min(window_size + diff, window_bound);
+
                 if (idx < window_bound && is_compatible(target, move.to_proc)) {
-                    affinity_table[move.to_proc][idx] -= reward; 
+                    affinity_table[move.to_proc][idx] -= reward;
                 }
 
                 idx++;
-                                    
+
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                         affinity_table[p][idx] -= reward;
-                    }                        
-                } 
+                    }
+                }
             }
 
-            if (move.to_proc != move.from_proc) {                  
+            if (move.to_proc != move.from_proc) {
                 const cost_t comm_gain = graph->vertex_comm_weight(move.node) * comm_multiplier;
-                
+
                 const unsigned window_bound = end_idx(target_step, end_step);
-                for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                     if (p == target_proc)
                         continue;
                     if (node_lambda_map.get_proc_entry(move.node, target_proc) == 1) {
@@ -233,144 +233,143 @@ struct kl_hyper_total_comm_cost_function {
                             const cost_t x = instance->communicationCosts(move.from_proc, target_proc) * comm_gain;
                             const cost_t y = instance->communicationCosts(move.to_proc, target_proc) * comm_gain;
                             affinity_table[p][idx] += x - y;
-                        } 
+                        }
                     }
 
                     if (node_lambda_map.has_no_proc_entry(move.node, p)) {
                         for (unsigned idx = target_start_idx; idx < window_bound; idx++) {
                             const cost_t x = instance->communicationCosts(move.from_proc, p) * comm_gain;
                             const cost_t y = instance->communicationCosts(move.to_proc, p) * comm_gain;
-                            affinity_table[p][idx] -= x - y;                        
+                            affinity_table[p][idx] -= x - y;
                         }
-                    }  
+                    }
                 }
-            } 
+            }
         }
 
-        for (const auto &source : instance->getComputationalDag().parents(move.node)) {            
+        for (const auto &source : instance->getComputationalDag().parents(move.node)) {
 
             if (move.to_proc != move.from_proc) {
-                const unsigned source_proc = active_schedule->assigned_processor(source);   
-                if (node_lambda_map.has_no_proc_entry(source, move.from_proc)) {                    
+                const unsigned source_proc = active_schedule->assigned_processor(source);
+                if (node_lambda_map.has_no_proc_entry(source, move.from_proc)) {
                     const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
 
                     for (const auto &target : instance->getComputationalDag().children(source)) {
                         const unsigned target_step = active_schedule->assigned_superstep(target);
-                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))  
-                            continue;  
+                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))
+                            continue;
 
-                        if (source_proc != move.from_proc && is_compatible(target, move.from_proc)) { 
+                        if (source_proc != move.from_proc && is_compatible(target, move.from_proc)) {
                             if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update
-                                max_gain_recompute[target].full_update = true;                
+                                max_gain_recompute[target].full_update = true;
                             } else {
                                 max_gain_recompute[target] = kl_gain_update_info(target, true);
-                            }    
+                            }
 
-                            auto & affinity_table_target_from_proc = thread_data.affinity_table.at(target)[move.from_proc];
+                            auto &affinity_table_target_from_proc = thread_data.affinity_table.at(target)[move.from_proc];
                             const unsigned target_window_bound = end_idx(target_step, end_step);
                             const cost_t comm_aff = instance->communicationCosts(source_proc, move.from_proc) * comm_gain;
                             for (unsigned idx = start_idx(target_step, start_step); idx < target_window_bound; idx++) {
                                 affinity_table_target_from_proc[idx] += comm_aff;
                             }
                         }
-                    }                    
-                } else if (node_lambda_map.get_proc_entry(source, move.from_proc) == 1)  {
+                    }
+                } else if (node_lambda_map.get_proc_entry(source, move.from_proc) == 1) {
                     const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
 
                     for (const auto &target : instance->getComputationalDag().children(source)) {
                         const unsigned target_step = active_schedule->assigned_superstep(target);
-                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || thread_data.lock_manager.is_locked(target) || (not thread_data.affinity_table.is_selected(target)))  
-                            continue;   
+                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || thread_data.lock_manager.is_locked(target) || (not thread_data.affinity_table.is_selected(target)))
+                            continue;
 
                         const unsigned target_proc = active_schedule->assigned_processor(target);
-                        if (target_proc == move.from_proc) {      
+                        if (target_proc == move.from_proc) {
                             if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update
-                                max_gain_recompute[target].full_update = true;                
+                                max_gain_recompute[target].full_update = true;
                             } else {
                                 max_gain_recompute[target] = kl_gain_update_info(target, true);
-                            } 
-                            
+                            }
+
                             const unsigned target_start_idx = start_idx(target_step, start_step);
                             const unsigned target_window_bound = end_idx(target_step, end_step);
-                            auto & affinity_table_target = thread_data.affinity_table.at(target);
+                            auto &affinity_table_target = thread_data.affinity_table.at(target);
                             const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) * comm_gain;
                             for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                                 if (p == target_proc)
-                                    continue;      
-                                
+                                    continue;
+
                                 for (unsigned idx = target_start_idx; idx < target_window_bound; idx++) {
                                     affinity_table_target[p][idx] -= comm_aff;
-                                } 
+                                }
                             }
                             break; // since node_lambda_map[source][move.from_proc] == 1
-                        }   
-                    }                    
+                        }
+                    }
                 }
 
                 if (node_lambda_map.get_proc_entry(source, move.to_proc) == 1) {
                     const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
-                    
+
                     for (const auto &target : instance->getComputationalDag().children(source)) {
                         const unsigned target_step = active_schedule->assigned_superstep(target);
-                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))  
-                            continue;   
-                        
+                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))
+                            continue;
+
                         if (source_proc != move.to_proc && is_compatible(target, move.to_proc)) {
                             if (max_gain_recompute.find(target) != max_gain_recompute.end()) {
-                                max_gain_recompute[target].full_update = true;                
+                                max_gain_recompute[target].full_update = true;
                             } else {
                                 max_gain_recompute[target] = kl_gain_update_info(target, true);
-                            } 
-                            
+                            }
+
                             const unsigned target_window_bound = end_idx(target_step, end_step);
-                            auto & affinity_table_target_to_proc = thread_data.affinity_table.at(target)[move.to_proc];
+                            auto &affinity_table_target_to_proc = thread_data.affinity_table.at(target)[move.to_proc];
                             const cost_t comm_aff = instance->communicationCosts(source_proc, move.to_proc) * comm_gain;
                             for (unsigned idx = start_idx(target_step, start_step); idx < target_window_bound; idx++) {
                                 affinity_table_target_to_proc[idx] -= comm_aff;
-                            }                              
+                            }
                         }
                     }
-                } else if (node_lambda_map.get_proc_entry(source, move.to_proc) == 2) {  
+                } else if (node_lambda_map.get_proc_entry(source, move.to_proc) == 2) {
                     for (const auto &target : instance->getComputationalDag().children(source)) {
                         const unsigned target_step = active_schedule->assigned_superstep(target);
-                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))  
-                            continue; 
-                        
+                        if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target))
+                            continue;
+
                         const unsigned target_proc = active_schedule->assigned_processor(target);
                         if (target_proc == move.to_proc) {
                             if (source_proc != target_proc) {
                                 if (max_gain_recompute.find(target) != max_gain_recompute.end()) {
-                                    max_gain_recompute[target].full_update = true;                
+                                    max_gain_recompute[target].full_update = true;
                                 } else {
                                     max_gain_recompute[target] = kl_gain_update_info(target, true);
-                                } 
-                               
+                                }
+
                                 const unsigned target_start_idx = start_idx(target_step, start_step);
                                 const unsigned target_window_bound = end_idx(target_step, end_step);
-                                auto & affinity_table_target = thread_data.affinity_table.at(target);
+                                auto &affinity_table_target = thread_data.affinity_table.at(target);
                                 const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) * graph->vertex_comm_weight(source) * comm_multiplier;
                                 for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                                     if (p == target_proc)
-                                        continue;      
-                                    
+                                        continue;
+
                                     for (unsigned idx = target_start_idx; idx < target_window_bound; idx++) {
                                         affinity_table_target[p][idx] += comm_aff;
-                                    }                                         
+                                    }
                                 }
                             }
                             break;
-                        }   
-                    }                  
-                }                
+                        }
+                    }
+                }
             }
 
-
-            const unsigned source_step = active_schedule->assigned_superstep(source); 
+            const unsigned source_step = active_schedule->assigned_superstep(source);
             if (source_step < start_step || source_step > end_step)
                 continue;
 
-            if(thread_data.lock_manager.is_locked(source)) 
-                continue;            
+            if (thread_data.lock_manager.is_locked(source))
+                continue;
 
             if (not thread_data.affinity_table.is_selected(source)) {
                 new_nodes.push_back(source);
@@ -378,111 +377,111 @@ struct kl_hyper_total_comm_cost_function {
             }
 
             if (max_gain_recompute.find(source) != max_gain_recompute.end()) {
-                max_gain_recompute[source].full_update = true;                
+                max_gain_recompute[source].full_update = true;
             } else {
                 max_gain_recompute[source] = kl_gain_update_info(source, true);
-            } 
+            }
 
-            const unsigned source_proc = active_schedule->assigned_processor(source);                            
+            const unsigned source_proc = active_schedule->assigned_processor(source);
             const unsigned source_start_idx = start_idx(source_step, start_step);
             const unsigned window_bound = end_idx(source_step, end_step);
-            auto & affinity_table_source = thread_data.affinity_table.at(source);
+            auto &affinity_table_source = thread_data.affinity_table.at(source);
 
             if (move.from_step < source_step + (move.from_proc != source_proc)) {
-                const unsigned diff = source_step - move.from_step; 
-                const unsigned bound = window_size > diff ? window_size - diff : 0; 
+                const unsigned diff = source_step - move.from_step;
+                const unsigned bound = window_size > diff ? window_size - diff : 0;
                 unsigned idx = source_start_idx;
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {  
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         affinity_table_source[p][idx] += reward;
-                    } 
+                    }
                 }
 
                 if (window_size >= diff && is_compatible(source, move.from_proc)) {
-                    affinity_table_source[move.from_proc][idx] += reward;    
+                    affinity_table_source[move.from_proc][idx] += reward;
                 }
 
-            } else {  
+            } else {
                 const unsigned diff = move.from_step - source_step;
-                unsigned idx = window_size + diff; 
-                
+                unsigned idx = window_size + diff;
+
                 if (idx < window_bound && is_compatible(source, move.from_proc)) {
-                    affinity_table_source[move.from_proc][idx] += penalty;                        
+                    affinity_table_source[move.from_proc][idx] += penalty;
                 }
 
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         affinity_table_source[p][idx] -= penalty;
-                    }                        
-                }                     
+                    }
+                }
             }
 
             if (move.to_step < source_step + (move.to_proc != source_proc)) {
-                const unsigned diff = source_step - move.to_step; 
-                const unsigned bound = window_size > diff ? window_size - diff : 0; 
+                const unsigned diff = source_step - move.to_step;
+                const unsigned bound = window_size > diff ? window_size - diff : 0;
                 unsigned idx = source_start_idx;
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {  
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         affinity_table_source[p][idx] -= reward;
-                    } 
+                    }
                 }
 
                 if (window_size >= diff && is_compatible(source, move.to_proc)) {
-                    affinity_table_source[move.to_proc][idx] -= reward;    
+                    affinity_table_source[move.to_proc][idx] -= reward;
                 }
 
-            } else { 
+            } else {
                 const unsigned diff = move.to_step - source_step;
-                unsigned idx = window_size + diff; 
+                unsigned idx = window_size + diff;
 
                 if (idx < window_bound && is_compatible(source, move.to_proc)) {
-                    affinity_table_source[move.to_proc][idx] -= penalty;                         
+                    affinity_table_source[move.to_proc][idx] -= penalty;
                 }
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         affinity_table_source[p][idx] += penalty;
-                    }                        
-                }                     
-            }  
-        
-            if (move.to_proc != move.from_proc) {   
-                if (node_lambda_map.has_no_proc_entry(source, move.from_proc)) {                    
+                    }
+                }
+            }
+
+            if (move.to_proc != move.from_proc) {
+                if (node_lambda_map.has_no_proc_entry(source, move.from_proc)) {
                     const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
 
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {        
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         if (p == source_proc)
                             continue;
 
                         const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.from_proc), instance->communicationCosts(source_proc, move.from_proc), comm_gain);
                         for (unsigned idx = source_start_idx; idx < window_bound; idx++) {
                             affinity_table_source[p][idx] -= comm_cost;
-                        }                        
-                    }                  
-                } 
+                        }
+                    }
+                }
 
                 if (node_lambda_map.get_proc_entry(source, move.to_proc) == 1) {
                     const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
 
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {        
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         if (p == source_proc)
                             continue;
 
                         const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.to_proc), instance->communicationCosts(source_proc, move.to_proc), comm_gain);
                         for (unsigned idx = source_start_idx; idx < window_bound; idx++) {
                             affinity_table_source[p][idx] += comm_cost;
-                        }                 
+                        }
                     }
-                }                 
-            }                
-        }  
+                }
+            }
+        }
     }
 
     inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { return node_step < window_size + start_step ? window_size - (node_step - start_step) : 0; }
-    inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step); }   
-    inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost, const v_commw_t<Graph_t> &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0;}
+    inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step); }
+    inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost, const v_commw_t<Graph_t> &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; }
 
     template<typename affinity_table_t>
-    void compute_comm_affinity(VertexType node, affinity_table_t& affinity_table_node, const cost_t& penalty, const cost_t& reward, const unsigned start_step, const unsigned end_step) {
+    void compute_comm_affinity(VertexType node, affinity_table_t &affinity_table_node, const cost_t &penalty, const cost_t &reward, const unsigned start_step, const unsigned end_step) {
         const unsigned node_step = active_schedule->assigned_superstep(node);
         const unsigned node_proc = active_schedule->assigned_processor(node);
         const unsigned window_bound = end_idx(node_step, end_step);
@@ -490,42 +489,42 @@ struct kl_hyper_total_comm_cost_function {
 
         for (const auto &target : instance->getComputationalDag().children(node)) {
             const unsigned target_step = active_schedule->assigned_superstep(target);
-            const unsigned target_proc = active_schedule->assigned_processor(target); 
+            const unsigned target_proc = active_schedule->assigned_processor(target);
 
             if (target_step < node_step + (target_proc != node_proc)) {
-                const unsigned diff = node_step - target_step; 
-                const unsigned bound = window_size > diff ? window_size - diff : 0; 
+                const unsigned diff = node_step - target_step;
+                const unsigned bound = window_size > diff ? window_size - diff : 0;
                 unsigned idx = node_start_idx;
 
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
+                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
                         affinity_table_node[p][idx] -= reward;
-                    } 
+                    }
                 }
 
                 if (window_size >= diff && is_compatible(node, target_proc)) {
-                    affinity_table_node[target_proc][idx] -= reward;    
-                }  
+                    affinity_table_node[target_proc][idx] -= reward;
+                }
 
-            } else {  
+            } else {
                 const unsigned diff = target_step - node_step;
                 unsigned idx = window_size + diff;
 
                 if (idx < window_bound && is_compatible(node, target_proc)) {
-                    affinity_table_node[target_proc][idx] -= penalty; 
+                    affinity_table_node[target_proc][idx] -= penalty;
                 }
 
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                      
+                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
                         affinity_table_node[p][idx] += penalty;
-                    }                        
-                }                     
-            }    
+                    }
+                }
+            }
         } // traget
 
         const cost_t comm_gain = graph->vertex_comm_weight(node) * comm_multiplier;
 
-        for (const unsigned p : proc_range->compatible_processors_vertex(node)) {        
+        for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
             if (p == node_proc)
                 continue;
 
@@ -540,21 +539,21 @@ struct kl_hyper_total_comm_cost_function {
 
         for (const auto &source : instance->getComputationalDag().parents(node)) {
             const unsigned source_step = active_schedule->assigned_superstep(source);
-            const unsigned source_proc = active_schedule->assigned_processor(source);  
+            const unsigned source_proc = active_schedule->assigned_processor(source);
 
             if (source_step < node_step + (source_proc == node_proc)) {
-                const unsigned diff = node_step - source_step;                
-                const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
+                const unsigned diff = node_step - source_step;
+                const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0;
                 unsigned idx = node_start_idx;
 
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
-                        affinity_table_node[p][idx] += penalty; 
-                    }                                                
+                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
+                        affinity_table_node[p][idx] += penalty;
+                    }
                 }
 
                 if (idx - 1 < bound && is_compatible(node, source_proc)) {
-                    affinity_table_node[source_proc][idx - 1] -= penalty;    
+                    affinity_table_node[source_proc][idx - 1] -= penalty;
                 }
 
             } else {
@@ -562,34 +561,34 @@ struct kl_hyper_total_comm_cost_function {
                 unsigned idx = std::min(window_size + diff, window_bound);
 
                 if (idx < window_bound && is_compatible(node, source_proc)) {
-                    affinity_table_node[source_proc][idx] -= reward;  
-                }    
+                    affinity_table_node[source_proc][idx] -= reward;
+                }
 
                 idx++;
 
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
+                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
                         affinity_table_node[p][idx] -= reward;
-                    }                        
-                } 
+                    }
+                }
             }
 
             const cost_t source_comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
-            for (const unsigned p : proc_range->compatible_processors_vertex(node)) { 
+            for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
                 if (p == node_proc)
                     continue;
 
                 if (source_proc != node_proc && node_lambda_map.get_proc_entry(source, node_proc) == 1) {
                     for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
                         affinity_table_node[p][idx] -= instance->communicationCosts(source_proc, node_proc) * source_comm_gain;
-                    } 
+                    }
                 }
 
                 if (source_proc != p && node_lambda_map.has_no_proc_entry(source, p)) {
                     for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
                         affinity_table_node[p][idx] += instance->communicationCosts(source_proc, p) * source_comm_gain;
                     }
-                }            
+                }
             }
         } // source
     }
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp
index be7c627c..5f471077 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp
@@ -22,26 +22,26 @@ limitations under the License.
 #include "../kl_improver.hpp"
 
 namespace osp {
-template<typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1, bool use_node_communication_costs_arg = true> 
+template<typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1, bool use_node_communication_costs_arg = true>
 struct kl_total_comm_cost_function {
-    
+
     using VertexType = vertex_idx_t<Graph_t>;
     using kl_move = kl_move_struct<cost_t, VertexType>;
     using kl_gain_update_info = kl_update_info<VertexType>;
-    
+
     constexpr static bool is_max_comm_cost_function = false;
 
     constexpr static unsigned window_range = 2 * window_size + 1;
     constexpr static bool use_node_communication_costs = use_node_communication_costs_arg || not has_edge_weights_v<Graph_t>;
-     
+
     kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> *active_schedule;
 
-    compatible_processor_range<Graph_t> *proc_range;
+    CompatibleProcessorRange<Graph_t> *proc_range;
 
     const Graph_t *graph;
     const BspInstance<Graph_t> *instance;
 
-    cost_t comm_multiplier = 1; 
+    cost_t comm_multiplier = 1;
     cost_t max_comm_weight = 0;
 
     inline cost_t get_comm_multiplier() { return comm_multiplier; }
@@ -52,23 +52,23 @@ struct kl_total_comm_cost_function {
 
     inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); }
 
-    void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched, compatible_processor_range<Graph_t> &p_range) {
+    void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched, CompatibleProcessorRange<Graph_t> &p_range) {
         active_schedule = &sched;
         proc_range = &p_range;
         instance = &sched.getInstance();
         graph = &instance->getComputationalDag();
-        comm_multiplier = 1.0 / instance->numberOfProcessors();        
+        comm_multiplier = 1.0 / instance->numberOfProcessors();
     }
 
     struct empty_struct {};
     using pre_move_comm_data_t = empty_struct;
-    inline empty_struct get_pre_move_comm_data(const kl_move& ) { return empty_struct(); }
+    inline empty_struct get_pre_move_comm_data(const kl_move &) { return empty_struct(); }
 
     cost_t compute_schedule_cost_test() {
         return compute_schedule_cost();
     }
 
-    void update_datastructure_after_move(const kl_move&, const unsigned, const unsigned) {}
+    void update_datastructure_after_move(const kl_move &, const unsigned, const unsigned) {}
 
     cost_t compute_schedule_cost() {
 
@@ -89,7 +89,7 @@ struct kl_total_comm_cost_function {
             if (source_proc != target_proc) {
 
                 if constexpr (use_node_communication_costs) {
-                    const cost_t source_comm_cost = graph->vertex_comm_weight(source_v); 
+                    const cost_t source_comm_cost = graph->vertex_comm_weight(source_v);
                     max_comm_weight = std::max(max_comm_weight, source_comm_cost);
                     comm_costs += source_comm_cost * instance->communicationCosts(source_proc, target_proc);
                 } else {
@@ -98,108 +98,108 @@ struct kl_total_comm_cost_function {
                     comm_costs += source_comm_cost * instance->communicationCosts(source_proc, target_proc);
                 }
             }
-        }  
+        }
 
         return work_costs + comm_costs * comm_multiplier + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
     }
 
     template<typename thread_data_t>
-    void update_node_comm_affinity(const kl_move &move, thread_data_t& thread_data, const cost_t& penalty, const cost_t& reward, std::map<VertexType, kl_gain_update_info> & max_gain_recompute, std::vector<VertexType> &new_nodes) {
-         
-        const unsigned & start_step = thread_data.start_step; 
-        const unsigned & end_step = thread_data.end_step;
+    void update_node_comm_affinity(const kl_move &move, thread_data_t &thread_data, const cost_t &penalty, const cost_t &reward, std::map<VertexType, kl_gain_update_info> &max_gain_recompute, std::vector<VertexType> &new_nodes) {
+
+        const unsigned &start_step = thread_data.start_step;
+        const unsigned &end_step = thread_data.end_step;
 
         for (const auto &target : instance->getComputationalDag().children(move.node)) {
 
-            const unsigned target_step = active_schedule->assigned_superstep(target); 
+            const unsigned target_step = active_schedule->assigned_superstep(target);
             if (target_step < start_step || target_step > end_step)
                 continue;
 
-            if(thread_data.lock_manager.is_locked(target))
+            if (thread_data.lock_manager.is_locked(target))
                 continue;
 
             if (not thread_data.affinity_table.is_selected(target)) {
-                new_nodes.push_back(target);  
+                new_nodes.push_back(target);
                 continue;
             }
 
             if (max_gain_recompute.find(target) != max_gain_recompute.end()) {
-                max_gain_recompute[target].full_update = true;                
+                max_gain_recompute[target].full_update = true;
             } else {
                 max_gain_recompute[target] = kl_gain_update_info(target, true);
-            }           
+            }
 
             const unsigned target_proc = active_schedule->assigned_processor(target);
-            const unsigned target_start_idx = start_idx(target_step, start_step);             
-            auto & affinity_table_target = thread_data.affinity_table.at(target);
+            const unsigned target_start_idx = start_idx(target_step, start_step);
+            auto &affinity_table_target = thread_data.affinity_table.at(target);
 
             if (move.from_step < target_step + (move.from_proc == target_proc)) {
 
-                const unsigned diff = target_step - move.from_step;                
-                const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
-                unsigned idx = target_start_idx; 
+                const unsigned diff = target_step - move.from_step;
+                const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0;
+                unsigned idx = target_start_idx;
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                         affinity_table_target[p][idx] -= penalty;
-                    }                                                
-                } 
+                    }
+                }
 
                 if (idx - 1 < bound && is_compatible(target, move.from_proc)) {
-                    affinity_table_target[move.from_proc][idx - 1] += penalty;    
+                    affinity_table_target[move.from_proc][idx - 1] += penalty;
                 }
 
             } else {
 
                 const unsigned diff = move.from_step - target_step;
-                const unsigned window_bound = end_idx(target_step, end_step);  
-                unsigned idx = std::min(window_size + diff, window_bound);                  
-                
-                if (idx < window_bound && is_compatible(target, move.from_proc)) { 
-                    affinity_table_target[move.from_proc][idx] += reward; 
+                const unsigned window_bound = end_idx(target_step, end_step);
+                unsigned idx = std::min(window_size + diff, window_bound);
+
+                if (idx < window_bound && is_compatible(target, move.from_proc)) {
+                    affinity_table_target[move.from_proc][idx] += reward;
                 }
 
                 idx++;
-                
+
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                         affinity_table_target[p][idx] += reward;
-                    }                        
-                } 
+                    }
+                }
             }
 
             if (move.to_step < target_step + (move.to_proc == target_proc)) {
-                unsigned idx = target_start_idx; 
-                const unsigned diff = target_step - move.to_step;                
-                const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
+                unsigned idx = target_start_idx;
+                const unsigned diff = target_step - move.to_step;
+                const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0;
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                         affinity_table_target[p][idx] += penalty;
-                    }                                                
-                } 
+                    }
+                }
 
                 if (idx - 1 < bound && is_compatible(target, move.to_proc)) {
-                    affinity_table_target[move.to_proc][idx - 1] -= penalty;    
+                    affinity_table_target[move.to_proc][idx - 1] -= penalty;
                 }
 
             } else {
                 const unsigned diff = move.to_step - target_step;
-                const unsigned window_bound = end_idx(target_step, end_step); 
-                unsigned idx = std::min(window_size + diff, window_bound);                                                     
-                
+                const unsigned window_bound = end_idx(target_step, end_step);
+                unsigned idx = std::min(window_size + diff, window_bound);
+
                 if (idx < window_bound && is_compatible(target, move.to_proc)) {
-                    affinity_table_target[move.to_proc][idx] -= reward; 
+                    affinity_table_target[move.to_proc][idx] -= reward;
                 }
 
                 idx++;
-                                    
+
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
                         affinity_table_target[p][idx] -= reward;
-                    }                        
-                } 
+                    }
+                }
             }
-        
-            if (move.to_proc != move.from_proc) {                
+
+            if (move.to_proc != move.from_proc) {
                 const auto from_proc_target_comm_cost = instance->communicationCosts(move.from_proc, target_proc);
                 const auto to_proc_target_comm_cost = instance->communicationCosts(move.to_proc, target_proc);
 
@@ -209,21 +209,21 @@ struct kl_total_comm_cost_function {
                 const unsigned window_bound = end_idx(target_step, end_step);
                 for (; idx < window_bound; idx++) {
                     for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
-                        const auto x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_target_comm_cost, comm_gain); 
+                        const auto x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_target_comm_cost, comm_gain);
                         const auto y = change_comm_cost(instance->communicationCosts(p, move.from_proc), from_proc_target_comm_cost, comm_gain);
-                        affinity_table_target[p][idx] += x - y;  
+                        affinity_table_target[p][idx] += x - y;
                     }
                 }
-            } 
+            }
         }
 
         for (const auto &source : instance->getComputationalDag().parents(move.node)) {
 
-            const unsigned source_step = active_schedule->assigned_superstep(source); 
+            const unsigned source_step = active_schedule->assigned_superstep(source);
             if (source_step < start_step || source_step > end_step)
                 continue;
 
-            if(thread_data.lock_manager.is_locked(source))
+            if (thread_data.lock_manager.is_locked(source))
                 continue;
 
             if (not thread_data.affinity_table.is_selected(source)) {
@@ -232,75 +232,75 @@ struct kl_total_comm_cost_function {
             }
 
             if (max_gain_recompute.find(source) != max_gain_recompute.end()) {
-                max_gain_recompute[source].full_update = true;                
+                max_gain_recompute[source].full_update = true;
             } else {
                 max_gain_recompute[source] = kl_gain_update_info(source, true);
-            } 
+            }
 
             const unsigned source_proc = active_schedule->assigned_processor(source);
             const unsigned window_bound = end_idx(source_step, end_step);
-            auto & affinity_table_source = thread_data.affinity_table.at(source);
+            auto &affinity_table_source = thread_data.affinity_table.at(source);
 
             if (move.from_step < source_step + (move.from_proc != source_proc)) {
 
-                const unsigned diff = source_step - move.from_step; 
-                const unsigned bound = window_size > diff ? window_size - diff : 0; 
+                const unsigned diff = source_step - move.from_step;
+                const unsigned bound = window_size > diff ? window_size - diff : 0;
                 unsigned idx = start_idx(source_step, start_step);
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {  
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         affinity_table_source[p][idx] += reward;
-                    } 
+                    }
                 }
 
                 if (window_size >= diff && is_compatible(source, move.from_proc)) {
-                    affinity_table_source[move.from_proc][idx] += reward;    
+                    affinity_table_source[move.from_proc][idx] += reward;
                 }
 
-            } else {       
+            } else {
 
                 const unsigned diff = move.from_step - source_step;
-                unsigned idx = window_size + diff; 
-                
+                unsigned idx = window_size + diff;
+
                 if (idx < window_bound && is_compatible(source, move.from_proc)) {
-                    affinity_table_source[move.from_proc][idx] += penalty;                        
+                    affinity_table_source[move.from_proc][idx] += penalty;
                 }
 
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         affinity_table_source[p][idx] -= penalty;
-                    }                        
-                }                     
+                    }
+                }
             }
 
             if (move.to_step < source_step + (move.to_proc != source_proc)) {
-                const unsigned diff = source_step - move.to_step; 
-                const unsigned bound = window_size > diff ? window_size - diff : 0; 
+                const unsigned diff = source_step - move.to_step;
+                const unsigned bound = window_size > diff ? window_size - diff : 0;
                 unsigned idx = start_idx(source_step, start_step);
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {  
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         affinity_table_source[p][idx] -= reward;
-                    } 
+                    }
                 }
 
                 if (window_size >= diff && is_compatible(source, move.to_proc)) {
-                    affinity_table_source[move.to_proc][idx] -= reward;    
+                    affinity_table_source[move.to_proc][idx] -= reward;
                 }
 
-            } else {  
+            } else {
                 const unsigned diff = move.to_step - source_step;
-                unsigned idx = window_size + diff; 
+                unsigned idx = window_size + diff;
 
                 if (idx < window_bound && is_compatible(source, move.to_proc)) {
-                    affinity_table_source[move.to_proc][idx] -= penalty;                         
+                    affinity_table_source[move.to_proc][idx] -= penalty;
                 }
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) { 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
                         affinity_table_source[p][idx] += penalty;
-                    }                        
-                }                     
-            }  
+                    }
+                }
+            }
 
-            if (move.to_proc != move.from_proc) {                
+            if (move.to_proc != move.from_proc) {
                 const auto from_proc_source_comm_cost = instance->communicationCosts(source_proc, move.from_proc);
                 const auto to_proc_source_comm_cost = instance->communicationCosts(source_proc, move.to_proc);
 
@@ -308,23 +308,23 @@ struct kl_total_comm_cost_function {
 
                 unsigned idx = start_idx(source_step, start_step);
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) { 
-                        const cost_t x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_source_comm_cost, comm_gain); 
+                    for (const unsigned p : proc_range->compatible_processors_vertex(source)) {
+                        const cost_t x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_source_comm_cost, comm_gain);
                         const cost_t y = change_comm_cost(instance->communicationCosts(p, move.from_proc), from_proc_source_comm_cost, comm_gain);
-                        affinity_table_source[p][idx] += x - y;  
+                        affinity_table_source[p][idx] += x - y;
                     }
                 }
             }
-        } 
+        }
     }
 
     inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0; }
     inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step); }
 
-    inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost, const v_commw_t<Graph_t> &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0;}
+    inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost, const v_commw_t<Graph_t> &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; }
 
     template<typename affinity_table_t>
-    void compute_comm_affinity(VertexType node, affinity_table_t& affinity_table_node, const cost_t& penalty, const cost_t& reward, const unsigned start_step, const unsigned end_step) {
+    void compute_comm_affinity(VertexType node, affinity_table_t &affinity_table_node, const cost_t &penalty, const cost_t &reward, const unsigned start_step, const unsigned end_step) {
         const unsigned node_step = active_schedule->assigned_superstep(node);
         const unsigned node_proc = active_schedule->assigned_processor(node);
         const unsigned window_bound = end_idx(node_step, end_step);
@@ -332,37 +332,37 @@ struct kl_total_comm_cost_function {
 
         for (const auto &target : instance->getComputationalDag().children(node)) {
             const unsigned target_step = active_schedule->assigned_superstep(target);
-            const unsigned target_proc = active_schedule->assigned_processor(target); 
+            const unsigned target_proc = active_schedule->assigned_processor(target);
 
             if (target_step < node_step + (target_proc != node_proc)) {
-                const unsigned diff = node_step - target_step; 
-                const unsigned bound = window_size > diff ? window_size - diff : 0; 
+                const unsigned diff = node_step - target_step;
+                const unsigned bound = window_size > diff ? window_size - diff : 0;
                 unsigned idx = node_start_idx;
 
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
+                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
                         affinity_table_node[p][idx] -= reward;
-                    } 
+                    }
                 }
 
                 if (window_size >= diff && is_compatible(node, target_proc)) {
-                    affinity_table_node[target_proc][idx] -= reward;    
-                }  
+                    affinity_table_node[target_proc][idx] -= reward;
+                }
 
-            } else {  
+            } else {
                 const unsigned diff = target_step - node_step;
                 unsigned idx = window_size + diff;
 
                 if (idx < window_bound && is_compatible(node, target_proc)) {
-                    affinity_table_node[target_proc][idx] -= penalty; 
+                    affinity_table_node[target_proc][idx] -= penalty;
                 }
 
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                      
+                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
                         affinity_table_node[p][idx] += penalty;
-                    }                        
-                }                     
-            }    
+                    }
+                }
+            }
 
             const cost_t comm_gain = graph->vertex_comm_weight(node) * comm_multiplier;
             const auto node_target_comm_cost = instance->communicationCosts(node_proc, target_proc);
@@ -378,21 +378,21 @@ struct kl_total_comm_cost_function {
 
         for (const auto &source : instance->getComputationalDag().parents(node)) {
             const unsigned source_step = active_schedule->assigned_superstep(source);
-            const unsigned source_proc = active_schedule->assigned_processor(source);  
+            const unsigned source_proc = active_schedule->assigned_processor(source);
 
             if (source_step < node_step + (source_proc == node_proc)) {
-                const unsigned diff = node_step - source_step;                
-                const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
+                const unsigned diff = node_step - source_step;
+                const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0;
                 unsigned idx = node_start_idx;
 
                 for (; idx < bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
-                        affinity_table_node[p][idx] += penalty; 
-                    }                                                
+                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
+                        affinity_table_node[p][idx] += penalty;
+                    }
                 }
 
                 if (idx - 1 < bound && is_compatible(node, source_proc)) {
-                    affinity_table_node[source_proc][idx - 1] -= penalty;    
+                    affinity_table_node[source_proc][idx - 1] -= penalty;
                 }
 
             } else {
@@ -400,22 +400,22 @@ struct kl_total_comm_cost_function {
                 unsigned idx = std::min(window_size + diff, window_bound);
 
                 if (idx < window_bound && is_compatible(node, source_proc)) {
-                    affinity_table_node[source_proc][idx] -= reward;  
+                    affinity_table_node[source_proc][idx] -= reward;
                 }
-                
+
                 idx++;
 
                 for (; idx < window_bound; idx++) {
-                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
+                    for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
                         affinity_table_node[p][idx] -= reward;
-                    }                        
-                } 
+                    }
+                }
             }
 
             const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
             const auto source_node_comm_cost = instance->communicationCosts(source_proc, node_proc);
 
-            for (const unsigned p : proc_range->compatible_processors_vertex(node)) {   
+            for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
                 const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, source_proc), source_node_comm_cost, comm_gain);
                 for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
                     affinity_table_node[p][idx] += comm_cost;
@@ -426,4 +426,3 @@ struct kl_total_comm_cost_function {
 };
 
 } // namespace osp
-
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_cut_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_cut_cost.hpp
deleted file mode 100644
index f13abda9..00000000
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_cut_cost.hpp
+++ /dev/null
@@ -1,431 +0,0 @@
-// /*
-// Copyright 2024 Huawei Technologies Co., Ltd.
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//     http://www.apache.org/licenses/LICENSE-2.0
-
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
-// */
-
-// #pragma once
-
-// #include "../kl_active_schedule.hpp"
-// #include "../kl_improver.hpp"
-
-// namespace osp {
-// template<typename Graph_t, typename cost_t, typename MemoryConstraint_t, unsigned window_size = 1, bool use_node_communication_costs_arg = true> 
-// struct kl_total_cut_cost_function {
-    
-//     using VertexType = vertex_idx_t<Graph_t>;
-//     using kl_move = kl_move_struct<cost_t, VertexType>;
-//     using kl_gain_update_info = kl_update_info<VertexType>;
-    
-//     constexpr static unsigned window_range = 2 * window_size + 1;
-//     constexpr static bool use_node_communication_costs = use_node_communication_costs_arg || not has_edge_weights_v<Graph_t>;
-     
-//     kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> *active_schedule;
-
-//     compatible_processor_range<Graph_t> *proc_range;
-
-//     const Graph_t *graph;
-//     const BspInstance<Graph_t> *instance;
-
-//     cost_t comm_multiplier = 1; 
-//     cost_t max_comm_weight = 0;
-
-//     inline cost_t get_comm_multiplier() { return comm_multiplier; }
-//     inline cost_t get_max_comm_weight() { return max_comm_weight; }
-//     inline cost_t get_max_comm_weight_multiplied() { return max_comm_weight * comm_multiplier; }
-
-//     const std::string name() const { return "toal_comm_cost"; }
-
-//     inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); }
-
-//     void initialize(kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t> &sched, compatible_processor_range<Graph_t> &p_range) {
-//         active_schedule = &sched;
-//         proc_range = &p_range;
-//         instance = &sched.getInstance();
-//         graph = &instance->getComputationalDag();
-//         comm_multiplier = 1.0 / instance->numberOfProcessors();        
-//     }
-
-//     cost_t compute_schedule_cost_test() {
-//         return compute_schedule_cost();
-//     }
-
-//     void update_datastructure_after_move(const kl_move&, const unsigned, const unsigned) {}
-
-//     cost_t compute_schedule_cost() {
-
-//         cost_t work_costs = 0;
-//         for (unsigned step = 0; step < active_schedule->num_steps(); step++) {
-//             work_costs += active_schedule->get_step_max_work(step);
-//         }
-
-//         cost_t comm_costs = 0;
-//         for (const auto &edge : edges(*graph)) {
-
-//             const auto &source_v = source(edge, *graph);
-//             const auto &target_v = target(edge, *graph);
-
-//             const unsigned &source_proc = active_schedule->assigned_processor(source_v);
-//             const unsigned &target_proc = active_schedule->assigned_processor(target_v);
-
-//             if ((source_proc != target_proc) || (active_schedule->assigned_superstep(source_v) != active_schedule->assigned_superstep(target_v))) {
-
-//                 if constexpr (use_node_communication_costs) {
-//                     const cost_t source_comm_cost = graph->vertex_comm_weight(source_v); 
-//                     max_comm_weight = std::max(max_comm_weight, source_comm_cost);
-//                     comm_costs += source_comm_cost * instance->communicationCosts(source_proc, target_proc);
-//                 } else {
-//                     const cost_t source_comm_cost = graph->edge_comm_weight(edge);
-//                     max_comm_weight = std::max(max_comm_weight, source_comm_cost);
-//                     comm_costs += source_comm_cost * instance->communicationCosts(source_proc, target_proc);
-//                 }
-//             }
-//         }  
-
-//         return work_costs + comm_costs * comm_multiplier + static_cast<v_commw_t<Graph_t>>(active_schedule->num_steps() - 1) * instance->synchronisationCosts();
-//     }
-
-//     template<typename thread_data_t>
-//     void update_node_comm_affinity(const kl_move &move, thread_data_t& thread_data, const cost_t& penalty, const cost_t& reward, std::map<VertexType, kl_gain_update_info> & max_gain_recompute, std::vector<VertexType> &new_nodes) {
-         
-//         const unsigned & start_step = thread_data.start_step; 
-//         const unsigned & end_step = thread_data.end_step;
-
-//         for (const auto &target : instance->getComputationalDag().children(move.node)) {
-
-//             const unsigned target_step = active_schedule->assigned_superstep(target); 
-//             if (target_step < start_step || target_step > end_step)
-//                 continue;
-
-//             if(thread_data.lock_manager.is_locked(target))
-//                 continue;
-
-//             if (not thread_data.affinity_table.is_selected(target)) {
-//                 new_nodes.push_back(target);  
-//                 continue;
-//             }
-
-//             if (max_gain_recompute.find(target) != max_gain_recompute.end()) {
-//                 max_gain_recompute[target].full_update = true;                
-//             } else {
-//                 max_gain_recompute[target] = kl_gain_update_info(target, true);
-//             }           
-
-//             const unsigned target_proc = active_schedule->assigned_processor(target);
-//             const unsigned target_start_idx = start_idx(target_step, start_step);             
-//             auto & affinity_table_target = thread_data.affinity_table.at(target);
-
-//             if (move.from_step < target_step + (move.from_proc == target_proc)) {
-
-//                 const unsigned diff = target_step - move.from_step;                
-//                 const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
-//                 unsigned idx = target_start_idx; 
-//                 for (; idx < bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
-//                         affinity_table_target[p][idx] -= penalty;
-//                     }                                                
-//                 } 
-
-//                 if (idx - 1 < bound && is_compatible(target, move.from_proc)) {
-//                     affinity_table_target[move.from_proc][idx - 1] += penalty;    
-//                 }
-
-//             } else {
-
-//                 const unsigned diff = move.from_step - target_step;
-//                 const unsigned window_bound = end_idx(target_step, end_step);  
-//                 unsigned idx = std::min(window_size + diff, window_bound);                  
-                
-//                 if (idx < window_bound && is_compatible(target, move.from_proc)) { 
-//                     affinity_table_target[move.from_proc][idx] += reward; 
-//                 }
-
-//                 idx++;
-                
-//                 for (; idx < window_bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
-//                         affinity_table_target[p][idx] += reward;
-//                     }                        
-//                 } 
-//             }
-
-//             if (move.to_step < target_step + (move.to_proc == target_proc)) {
-//                 unsigned idx = target_start_idx; 
-//                 const unsigned diff = target_step - move.to_step;                
-//                 const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
-//                 for (; idx < bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
-//                         affinity_table_target[p][idx] += penalty;
-//                     }                                                
-//                 } 
-
-//                 if (idx - 1 < bound && is_compatible(target, move.to_proc)) {
-//                     affinity_table_target[move.to_proc][idx - 1] -= penalty;    
-//                 }
-
-//             } else {
-//                 const unsigned diff = move.to_step - target_step;
-//                 const unsigned window_bound = end_idx(target_step, end_step); 
-//                 unsigned idx = std::min(window_size + diff, window_bound);                                                     
-                
-//                 if (idx < window_bound && is_compatible(target, move.to_proc)) {
-//                     affinity_table_target[move.to_proc][idx] -= reward; 
-//                 }
-
-//                 idx++;
-                                    
-//                 for (; idx < window_bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(target)) { 
-//                         affinity_table_target[p][idx] -= reward;
-//                     }                        
-//                 } 
-//             }
-        
-//             if (move.to_proc != move.from_proc) {                
-//                 const auto from_proc_target_comm_cost = instance->communicationCosts(move.from_proc, target_proc);
-//                 const auto to_proc_target_comm_cost = instance->communicationCosts(move.to_proc, target_proc);
-
-//                 const cost_t comm_gain = graph->vertex_comm_weight(move.node) * comm_multiplier;
-
-//                 unsigned idx = target_start_idx;
-//                 const unsigned window_bound = end_idx(target_step, end_step);
-//                 for (; idx < window_bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(target)) {
-//                         const auto x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_target_comm_cost, comm_gain); 
-//                         const auto y = change_comm_cost(instance->communicationCosts(p, move.from_proc), from_proc_target_comm_cost, comm_gain);
-//                         affinity_table_target[p][idx] += x - y;  
-//                     }
-//                 }
-//             } 
-//         }
-
-//         for (const auto &source : instance->getComputationalDag().parents(move.node)) {
-
-//             const unsigned source_step = active_schedule->assigned_superstep(source); 
-//             if (source_step < start_step || source_step > end_step)
-//                 continue;
-
-//             if(thread_data.lock_manager.is_locked(source))
-//                 continue;
-
-//             if (not thread_data.affinity_table.is_selected(source)) {
-//                 new_nodes.push_back(source);
-//                 continue;
-//             }
-
-//             if (max_gain_recompute.find(source) != max_gain_recompute.end()) {
-//                 max_gain_recompute[source].full_update = true;                
-//             } else {
-//                 max_gain_recompute[source] = kl_gain_update_info(source, true);
-//             } 
-
-//             const unsigned source_proc = active_schedule->assigned_processor(source);
-//             const unsigned window_bound = end_idx(source_step, end_step);
-//             auto & affinity_table_source = thread_data.affinity_table.at(source);
-
-//             if (move.from_step < source_step + (move.from_proc != source_proc)) {
-
-//                 const unsigned diff = source_step - move.from_step; 
-//                 const unsigned bound = window_size > diff ? window_size - diff : 0; 
-//                 unsigned idx = start_idx(source_step, start_step);
-//                 for (; idx < bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(source)) {  
-//                         affinity_table_source[p][idx] += reward;
-//                     } 
-//                 }
-
-//                 if (window_size >= diff && is_compatible(source, move.from_proc)) {
-//                     affinity_table_source[move.from_proc][idx] += reward;    
-//                 }
-
-//             } else {       
-
-//                 const unsigned diff = move.from_step - source_step;
-//                 unsigned idx = window_size + diff; 
-                
-//                 if (idx < window_bound && is_compatible(source, move.from_proc)) {
-//                     affinity_table_source[move.from_proc][idx] += penalty;                        
-//                 }
-
-//                 for (; idx < window_bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(source)) { 
-//                         affinity_table_source[p][idx] -= penalty;
-//                     }                        
-//                 }                     
-//             }
-
-//             if (move.to_step < source_step + (move.to_proc != source_proc)) {
-//                 const unsigned diff = source_step - move.to_step; 
-//                 const unsigned bound = window_size > diff ? window_size - diff : 0; 
-//                 unsigned idx = start_idx(source_step, start_step);
-//                 for (; idx < bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(source)) {  
-//                         affinity_table_source[p][idx] -= reward;
-//                     } 
-//                 }
-
-//                 if (window_size >= diff && is_compatible(source, move.to_proc)) {
-//                     affinity_table_source[move.to_proc][idx] -= reward;    
-//                 }
-
-//             } else {  
-//                 const unsigned diff = move.to_step - source_step;
-//                 unsigned idx = window_size + diff; 
-
-//                 if (idx < window_bound && is_compatible(source, move.to_proc)) {
-//                     affinity_table_source[move.to_proc][idx] -= penalty;                         
-//                 }
-//                 for (; idx < window_bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(source)) { 
-//                         affinity_table_source[p][idx] += penalty;
-//                     }                        
-//                 }                     
-//             }  
-
-//             if (move.to_proc != move.from_proc) {                
-//                 const auto from_proc_source_comm_cost = instance->communicationCosts(source_proc, move.from_proc);
-//                 const auto to_proc_source_comm_cost = instance->communicationCosts(source_proc, move.to_proc);
-
-//                 const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
-
-//                 unsigned idx = start_idx(source_step, start_step);
-//                 const unsigned window_bound = end_idx(source_step, end_step);
-//                 for (; idx < window_bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(source)) { 
-//                         const cost_t x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_source_comm_cost, comm_gain); 
-//                         const cost_t y = change_comm_cost(instance->communicationCosts(p, move.from_proc), from_proc_source_comm_cost, comm_gain);
-//                         affinity_table_source[p][idx] += x - y;  
-//                     }
-//                 }
-//             }
-//         } 
-//     }
-
-//     inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0; }
-//     inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step); }
-
-//     inline cost_t change_comm_cost(const v_commw_t<Graph_t> &p_target_comm_cost, const v_commw_t<Graph_t> &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0;}
-
-//     template<typename affinity_table_t>
-//     void compute_comm_affinity(VertexType node, affinity_table_t& affinity_table_node, const cost_t& penalty, const cost_t& reward, const unsigned start_step, const unsigned end_step) {
-//         const unsigned node_step = active_schedule->assigned_superstep(node);
-//         const unsigned node_proc = active_schedule->assigned_processor(node);
-//         const unsigned window_bound = end_idx(node_step, end_step);
-//         const unsigned node_start_idx = start_idx(node_step, start_step);
-
-//         for (const auto &target : instance->getComputationalDag().children(node)) {
-//             const unsigned target_step = active_schedule->assigned_superstep(target);
-//             const unsigned target_proc = active_schedule->assigned_processor(target); 
-
-//             if (target_step < node_step + (target_proc != node_proc)) {
-//                 const unsigned diff = node_step - target_step; 
-//                 const unsigned bound = window_size > diff ? window_size - diff : 0; 
-//                 unsigned idx = node_start_idx;
-
-//                 for (; idx < bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
-//                         affinity_table_node[p][idx] -= reward;
-//                     } 
-//                 }
-
-//                 if (window_size >= diff && is_compatible(node, target_proc)) {
-//                     affinity_table_node[target_proc][idx] -= reward;    
-//                 }  
-
-//             } else {  
-//                 const unsigned diff = target_step - node_step;
-//                 unsigned idx = window_size + diff;
-
-//                 if (idx < window_bound && is_compatible(node, target_proc)) {
-//                     affinity_table_node[target_proc][idx] -= penalty; 
-//                 }
-
-//                 for (; idx < window_bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                      
-//                         affinity_table_node[p][idx] += penalty;
-//                     }                        
-//                 }                     
-//             }    
-
-//             const cost_t comm_gain = graph->vertex_comm_weight(node) * comm_multiplier;
-//             const auto node_target_comm_cost = instance->communicationCosts(node_proc, target_proc);
-
-//             for (const unsigned p : proc_range->compatible_processors_vertex(node)) {
-//                 if (p != target_proc) {
-//                     const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, target_proc), node_target_comm_cost, comm_gain);
-//                     for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
-//                         affinity_table_node[p][idx] += comm_cost;
-//                     }
-//                 } else {
-//                     for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
-//                         if(idx == 0) continue;
-//                         affinity_table_node[p][idx] += comm_gain;
-//                     } 
-//                 }
-//             }
-
-//         } // traget
-
-//         for (const auto &source : instance->getComputationalDag().parents(node)) {
-//             const unsigned source_step = active_schedule->assigned_superstep(source);
-//             const unsigned source_proc = active_schedule->assigned_processor(source);  
-
-//             if (source_step < node_step + (source_proc == node_proc)) {
-//                 const unsigned diff = node_step - source_step;                
-//                 const unsigned bound = window_size >= diff ? window_size - diff + 1: 0;  
-//                 unsigned idx = node_start_idx;
-
-//                 for (; idx < bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
-//                         affinity_table_node[p][idx] += penalty; 
-//                     }                                                
-//                 }
-
-//                 if (idx - 1 < bound && is_compatible(node, source_proc)) {
-//                     affinity_table_node[source_proc][idx - 1] -= penalty;    
-//                 }
-
-//             } else {
-//                 const unsigned diff = source_step - node_step;
-//                 unsigned idx = std::min(window_size + diff, window_bound);
-
-//                 if (idx < window_bound && is_compatible(node, source_proc)) {
-//                     affinity_table_node[source_proc][idx] -= reward;  
-//                 }
-                
-//                 idx++;
-
-//                 for (; idx < window_bound; idx++) {
-//                     for (const unsigned p : proc_range->compatible_processors_vertex(node)) {                        
-//                         affinity_table_node[p][idx] -= reward;
-//                     }                        
-//                 } 
-//             }
-
-//             const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier;
-//             const auto source_node_comm_cost = instance->communicationCosts(source_proc, node_proc);
-
-//             for (const unsigned p : proc_range->compatible_processors_vertex(node)) {   
-//                 const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, source_proc), source_node_comm_cost, comm_gain);
-//                 for (unsigned idx = node_start_idx; idx < window_bound; idx++) {
-//                     affinity_table_node[p][idx] += comm_cost;
-//                 }
-//             }
-//         } // source
-//     }
-// };
-
-// } // namespace osp
-
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
index 6fe460f8..862eeacc 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
@@ -16,13 +16,12 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-
 #pragma once
 
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/IBspSchedule.hpp"
-#include "osp/bsp/model/SetSchedule.hpp"
-#include "osp/bsp/model/VectorSchedule.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
+#include "osp/bsp/model/util/VectorSchedule.hpp"
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
@@ -54,7 +53,7 @@ struct kl_move_struct {
     bool operator>(kl_move_struct<cost_t, vertex_idx_t> const &rhs) const {
         return (gain > rhs.gain) or (gain >= rhs.gain and node < rhs.node);
     }
-    
+
     kl_move_struct<cost_t, vertex_idx_t> reverse_move() const {
         return kl_move_struct(node, -gain, to_proc, to_step, from_proc, from_step);
     }
@@ -73,13 +72,12 @@ struct pre_move_work_data {
 
     pre_move_work_data() {}
     pre_move_work_data(work_weight_t from_step_max_work_, work_weight_t from_step_second_max_work_, unsigned from_step_max_work_processor_count_,
-                 work_weight_t to_step_max_work_, work_weight_t to_step_second_max_work_,
-                 unsigned to_step_max_work_processor_count_)
+                       work_weight_t to_step_max_work_, work_weight_t to_step_second_max_work_,
+                       unsigned to_step_max_work_processor_count_)
         : from_step_max_work(from_step_max_work_), from_step_second_max_work(from_step_second_max_work_),
           from_step_max_work_processor_count(from_step_max_work_processor_count_),
           to_step_max_work(to_step_max_work_), to_step_second_max_work(to_step_second_max_work_),
-          to_step_max_work_processor_count(to_step_max_work_processor_count_) {}          
-
+          to_step_max_work_processor_count(to_step_max_work_processor_count_) {}
 };
 
 template<typename Graph_t>
@@ -87,16 +85,16 @@ struct kl_active_schedule_work_datastructures {
 
     using work_weight_t = v_workw_t<Graph_t>;
 
-    const BspInstance<Graph_t> *instance;   
+    const BspInstance<Graph_t> *instance;
     const SetSchedule<Graph_t> *set_schedule;
-   
+
     struct weight_proc {
         work_weight_t work;
         unsigned proc;
 
         weight_proc() : work(0), proc(0) {}
         weight_proc(work_weight_t _work, unsigned _proc) : work(_work), proc(_proc) {}
-    
+
         bool operator<(weight_proc const &rhs) const {
             return (work > rhs.work) or (work == rhs.work and proc < rhs.proc);
         }
@@ -106,17 +104,17 @@ struct kl_active_schedule_work_datastructures {
     std::vector<std::vector<unsigned>> step_processor_position;
     std::vector<unsigned> step_max_work_processor_count;
     work_weight_t max_work_weight;
-    work_weight_t total_work_weight;     
+    work_weight_t total_work_weight;
 
     inline work_weight_t step_max_work(unsigned step) const { return step_processor_work_[step][0].work; }
     inline work_weight_t step_second_max_work(unsigned step) const { return step_processor_work_[step][step_max_work_processor_count[step]].work; }
     inline work_weight_t step_proc_work(unsigned step, unsigned proc) const { return step_processor_work_[step][step_processor_position[step][proc]].work; }
-    inline work_weight_t & step_proc_work(unsigned step, unsigned proc) { return step_processor_work_[step][step_processor_position[step][proc]].work; }
+    inline work_weight_t &step_proc_work(unsigned step, unsigned proc) { return step_processor_work_[step][step_processor_position[step][proc]].work; }
 
     template<typename cost_t, typename vertex_idx_t>
-    inline pre_move_work_data<work_weight_t> get_pre_move_work_data(kl_move_struct<cost_t, vertex_idx_t> move) { 
+    inline pre_move_work_data<work_weight_t> get_pre_move_work_data(kl_move_struct<cost_t, vertex_idx_t> move) {
         return pre_move_work_data<work_weight_t>(step_max_work(move.from_step), step_second_max_work(move.from_step), step_max_work_processor_count[move.from_step],
-                                                        step_max_work(move.to_step), step_second_max_work(move.to_step), step_max_work_processor_count[move.to_step]); 
+                                                 step_max_work(move.to_step), step_second_max_work(move.to_step), step_max_work_processor_count[move.to_step]);
     }
 
     inline void initialize(const SetSchedule<Graph_t> &sched, const BspInstance<Graph_t> &inst, unsigned num_steps) {
@@ -140,20 +138,20 @@ struct kl_active_schedule_work_datastructures {
         unsigned pos = 0;
         const work_weight_t max_work_to = step_processor_work_[step][0].work;
 
-        for (const auto & wp : step_processor_work_[step]) {
+        for (const auto &wp : step_processor_work_[step]) {
             step_processor_position[step][wp.proc] = pos++;
 
             if (wp.work == max_work_to && pos < instance->numberOfProcessors())
-                step_max_work_processor_count[step] = pos; 
+                step_max_work_processor_count[step] = pos;
         }
     }
 
     template<typename cost_t, typename vertex_idx_t>
-    void apply_move(kl_move_struct<cost_t, vertex_idx_t> move, work_weight_t work_weight) {      
+    void apply_move(kl_move_struct<cost_t, vertex_idx_t> move, work_weight_t work_weight) {
 
-        if (work_weight == 0) 
+        if (work_weight == 0)
             return;
-        
+
         if (move.to_step != move.from_step) {
             step_proc_work(move.to_step, move.to_proc) += work_weight;
             step_proc_work(move.from_step, move.from_proc) -= work_weight;
@@ -171,7 +169,7 @@ struct kl_active_schedule_work_datastructures {
             // }
 
             // unsigned to_proc_pos = step_processor_position[move.to_step][move.to_proc];
-            
+
             // while (to_proc_pos > 0 && step_processor_work_[move.to_step][to_proc_pos - 1].work < new_weight_to) {
             //     std::swap(step_processor_work_[move.to_step][to_proc_pos], step_processor_work_[move.to_step][to_proc_pos - 1]);
             //     std::swap(step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos].proc], step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos - 1].proc]);
@@ -189,15 +187,15 @@ struct kl_active_schedule_work_datastructures {
             //     std::swap(step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos].proc], step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos + 1].proc]);
             //     from_proc_pos++;
             // }
-                
+
             // if (prev_max_work_from == prev_weight_from) {
-            //     step_max_work_processor_count[move.from_step]--;        
-            //     if (step_max_work_processor_count[move.from_step] == 0) {  
-            //         step_max_work_processor_count[move.from_step] = from_proc_pos; 
+            //     step_max_work_processor_count[move.from_step]--;
+            //     if (step_max_work_processor_count[move.from_step] == 0) {
+            //         step_max_work_processor_count[move.from_step] = from_proc_pos;
             //     }
-            // }    
+            // }
 
-        } else {            
+        } else {
             step_proc_work(move.to_step, move.to_proc) += work_weight;
             step_proc_work(move.from_step, move.from_proc) -= work_weight;
             arrange_superstep_data(move.to_step);
@@ -209,21 +207,21 @@ struct kl_active_schedule_work_datastructures {
         std::swap(step_processor_position[step1], step_processor_position[step2]);
         std::swap(step_max_work_processor_count[step1], step_max_work_processor_count[step2]);
     }
-    
+
     void override_next_superstep(unsigned step) {
 
         const unsigned next_step = step + 1;
         for (unsigned i = 0; i < instance->numberOfProcessors(); i++) {
-            step_processor_work_[next_step][i] = step_processor_work_[step][i]; 
-            step_processor_position[next_step][i] = step_processor_position[step][i];            
+            step_processor_work_[next_step][i] = step_processor_work_[step][i];
+            step_processor_position[next_step][i] = step_processor_position[step][i];
         }
         step_max_work_processor_count[next_step] = step_max_work_processor_count[step];
     }
 
     void reset_superstep(unsigned step) {
         for (unsigned i = 0; i < instance->numberOfProcessors(); i++) {
-            step_processor_work_[step][i] = {0,i}; 
-            step_processor_position[step][i] = i;            
+            step_processor_work_[step][i] = {0, i};
+            step_processor_position[step][i] = i;
         }
         step_max_work_processor_count[step] = instance->numberOfProcessors() - 1;
     }
@@ -249,12 +247,12 @@ struct kl_active_schedule_work_datastructures {
                     step_max_work_processor_count[step] = 1;
                 } else if (step_processor_work_[step][proc].work == max_work && step_max_work_processor_count[step] < (instance->numberOfProcessors() - 1)) {
                     step_max_work_processor_count[step]++;
-                } 
+                }
             }
 
             std::sort(step_processor_work_[step].begin(), step_processor_work_[step].end());
             unsigned pos = 0;
-            for (const auto & wp : step_processor_work_[step]) {
+            for (const auto &wp : step_processor_work_[step]) {
                 step_processor_position[step][wp.proc] = pos++;
             }
         }
@@ -287,15 +285,15 @@ struct thread_local_active_schedule_data {
         cost = cost_;
         best_cost = cost_;
         feasible = true;
-    }  
-   
+    }
+
     inline void update_cost(cost_t change_in_cost) {
-        cost += change_in_cost;        
+        cost += change_in_cost;
 
         if (cost <= best_cost && feasible) {
             best_cost = cost;
             best_schedule_idx = static_cast<unsigned>(applied_moves.size());
-        }    
+        }
     }
 };
 
@@ -319,23 +317,23 @@ class kl_active_schedule {
   public:
     virtual ~kl_active_schedule() = default;
 
-    inline const BspInstance<Graph_t> & getInstance() const { return *instance; }
-    inline const VectorSchedule<Graph_t> & getVectorSchedule() const { return vector_schedule; }
-    inline VectorSchedule<Graph_t> & getVectorSchedule() { return vector_schedule; }
-    inline const SetSchedule<Graph_t> & getSetSchedule() const { return set_schedule; }
+    inline const BspInstance<Graph_t> &getInstance() const { return *instance; }
+    inline const VectorSchedule<Graph_t> &getVectorSchedule() const { return vector_schedule; }
+    inline VectorSchedule<Graph_t> &getVectorSchedule() { return vector_schedule; }
+    inline const SetSchedule<Graph_t> &getSetSchedule() const { return set_schedule; }
     inline cost_t get_cost() { return cost; }
     inline bool is_feasible() { return feasible; }
     inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); }
     inline unsigned assigned_processor(VertexType node) const { return vector_schedule.assignedProcessor(node); }
     inline unsigned assigned_superstep(VertexType node) const { return vector_schedule.assignedSuperstep(node); }
-    inline v_workw_t<Graph_t> get_step_max_work(unsigned step) const {return work_datastructures.step_max_work(step); }
-    inline v_workw_t<Graph_t> get_step_second_max_work(unsigned step) const {return work_datastructures.step_second_max_work(step); }
-    inline std::vector<unsigned> & get_step_max_work_processor_count() {return work_datastructures.step_max_work_processor_count; }    
-    inline v_workw_t<Graph_t> get_step_processor_work(unsigned step, unsigned proc) const {return work_datastructures.step_proc_work(step, proc); }
+    inline v_workw_t<Graph_t> get_step_max_work(unsigned step) const { return work_datastructures.step_max_work(step); }
+    inline v_workw_t<Graph_t> get_step_second_max_work(unsigned step) const { return work_datastructures.step_second_max_work(step); }
+    inline std::vector<unsigned> &get_step_max_work_processor_count() { return work_datastructures.step_max_work_processor_count; }
+    inline v_workw_t<Graph_t> get_step_processor_work(unsigned step, unsigned proc) const { return work_datastructures.step_proc_work(step, proc); }
     inline pre_move_work_data<v_workw_t<Graph_t>> get_pre_move_work_data(kl_move move) { return work_datastructures.get_pre_move_work_data(move); }
     inline v_workw_t<Graph_t> get_max_work_weight() { return work_datastructures.max_work_weight; }
     inline v_workw_t<Graph_t> get_total_work_weight() { return work_datastructures.total_work_weight; }
-    inline void set_cost(cost_t cost_) { cost = cost_; }  
+    inline void set_cost(cost_t cost_) { cost = cost_; }
 
     constexpr static bool use_memory_constraint = is_local_search_memory_constraint_v<MemoryConstraint_t>;
 
@@ -343,11 +341,11 @@ class kl_active_schedule {
 
     kl_active_schedule_work_datastructures<Graph_t> work_datastructures;
 
-    inline v_workw_t<Graph_t> get_step_total_work(unsigned step) const {        
-        v_workw_t<Graph_t> total_work = 0;        
+    inline v_workw_t<Graph_t> get_step_total_work(unsigned step) const {
+        v_workw_t<Graph_t> total_work = 0;
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
             total_work += get_step_processor_work(step, proc);
-        }       
+        }
         return total_work;
     }
 
@@ -357,18 +355,18 @@ class kl_active_schedule {
 
         set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node);
         set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node);
-       
+
         update_violations(move.node, thread_data);
         thread_data.applied_moves.push_back(move);
 
         work_datastructures.apply_move(move, instance->getComputationalDag().vertex_work_weight(move.node));
         if constexpr (use_memory_constraint) {
             memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step);
-        }       
+        }
     }
 
     template<typename comm_datastructures_t>
-    void revert_to_best_schedule(unsigned start_move, unsigned insert_step, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned & end_step) {
+    void revert_to_best_schedule(unsigned start_move, unsigned insert_step, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned &end_step) {
         const unsigned bound = std::max(start_move, thread_data.best_schedule_idx);
         revert_moves(bound, comm_datastructures, thread_data, start_step, end_step);
 
@@ -391,7 +389,7 @@ class kl_active_schedule {
     }
 
     template<typename comm_datastructures_t>
-    void revert_schedule_to_bound(const size_t bound, const cost_t new_cost, const bool is_feasible, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned end_step) {
+    void revert_schedule_to_bound(const size_t bound, const cost_t new_cost, const bool is_feasible, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) {
         revert_moves(bound, comm_datastructures, thread_data, start_step, end_step);
 
         thread_data.current_violations.clear();
@@ -399,10 +397,9 @@ class kl_active_schedule {
         thread_data.cost = new_cost;
     }
 
-
-    void compute_violations(thread_data_t & thread_data);
+    void compute_violations(thread_data_t &thread_data);
     void compute_work_memory_datastructures(unsigned start_step, unsigned end_step);
-    void write_schedule (BspSchedule<Graph_t> &schedule);
+    void write_schedule(BspSchedule<Graph_t> &schedule);
     inline void initialize(const IBspSchedule<Graph_t> &schedule);
     inline void clear();
     void remove_empty_step(unsigned step);
@@ -412,15 +409,14 @@ class kl_active_schedule {
     void swap_steps(const unsigned step1, const unsigned step2);
 
   private:
-
     template<typename comm_datastructures_t>
-    void revert_moves(const size_t bound, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned end_step) {
+    void revert_moves(const size_t bound, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) {
         while (thread_data.applied_moves.size() > bound) {
             const auto move = thread_data.applied_moves.back().reverse_move();
             thread_data.applied_moves.pop_back();
 
             vector_schedule.setAssignedProcessor(move.node, move.to_proc);
-            vector_schedule.setAssignedSuperstep(move.node, move.to_step);  
+            vector_schedule.setAssignedSuperstep(move.node, move.to_step);
 
             set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node);
             set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node);
@@ -443,16 +439,16 @@ class kl_active_schedule {
             const auto &child = target(edge, instance->getComputationalDag());
 
             if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) {
-                if ((node_step > vector_schedule.assignedSuperstep(child)) || 
+                if ((node_step > vector_schedule.assignedSuperstep(child)) ||
                     (node_step == vector_schedule.assignedSuperstep(child) && node_proc != vector_schedule.assignedProcessor(child))) {
-                        thread_data.current_violations.insert(edge);
-                        thread_data.new_violations[child] = edge;                    
+                    thread_data.current_violations.insert(edge);
+                    thread_data.new_violations[child] = edge;
                 }
             } else {
-                if ((node_step < vector_schedule.assignedSuperstep(child)) || 
+                if ((node_step < vector_schedule.assignedSuperstep(child)) ||
                     (node_step == vector_schedule.assignedSuperstep(child) && node_proc == vector_schedule.assignedProcessor(child))) {
-                        thread_data.current_violations.erase(edge);
-                        thread_data.resolved_violations.insert(edge);                    
+                    thread_data.current_violations.erase(edge);
+                    thread_data.resolved_violations.insert(edge);
                 }
             }
         }
@@ -460,17 +456,17 @@ class kl_active_schedule {
         for (const auto &edge : in_edges(node, instance->getComputationalDag())) {
             const auto &parent = source(edge, instance->getComputationalDag());
 
-            if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) {   
-                if ((node_step < vector_schedule.assignedSuperstep(parent)) || 
+            if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) {
+                if ((node_step < vector_schedule.assignedSuperstep(parent)) ||
                     (node_step == vector_schedule.assignedSuperstep(parent) && node_proc != vector_schedule.assignedProcessor(parent))) {
-                        thread_data.current_violations.insert(edge);
-                        thread_data.new_violations[parent] = edge;                    
+                    thread_data.current_violations.insert(edge);
+                    thread_data.new_violations[parent] = edge;
                 }
             } else {
-                if ((node_step > vector_schedule.assignedSuperstep(parent)) || 
+                if ((node_step > vector_schedule.assignedSuperstep(parent)) ||
                     (node_step == vector_schedule.assignedSuperstep(parent) && node_proc == vector_schedule.assignedProcessor(parent))) {
-                        thread_data.current_violations.erase(edge);
-                        thread_data.resolved_violations.insert(edge);
+                    thread_data.current_violations.erase(edge);
+                    thread_data.resolved_violations.insert(edge);
                 }
             }
         }
@@ -501,7 +497,6 @@ class kl_active_schedule {
             thread_data.feasible = true;
         }
     }
-
 };
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
@@ -515,7 +510,7 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::clear() {
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_violations(thread_data_t & thread_data) {
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_violations(thread_data_t &thread_data) {
 
     thread_data.current_violations.clear();
     thread_data.feasible = true;
@@ -529,12 +524,12 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_violations
         const unsigned target_proc = assigned_processor(target_v);
         const unsigned source_step = assigned_superstep(source_v);
         const unsigned target_step = assigned_superstep(target_v);
-    
+
         if (source_step > target_step || (source_step == target_step && source_proc != target_proc)) {
             thread_data.current_violations.insert(edge);
             thread_data.feasible = false;
-        } 
-    }    
+        }
+    }
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
@@ -563,7 +558,7 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_work_memor
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::write_schedule (BspSchedule<Graph_t> &schedule) {
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::write_schedule(BspSchedule<Graph_t> &schedule) {
     for (const auto v : instance->vertices()) {
         schedule.setAssignedProcessor(v, vector_schedule.assignedProcessor(v));
         schedule.setAssignedSuperstep(v, vector_schedule.assignedSuperstep(v));
@@ -572,91 +567,92 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::write_schedule (Bs
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::remove_empty_step(unsigned step) {    
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::remove_empty_step(unsigned step) {
     for (unsigned i = step; i < num_steps() - 1; i++) {
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]){
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+            for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]) {
                 vector_schedule.setAssignedSuperstep(node, i);
             }
         }
         std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i + 1]);
-        work_datastructures.swap_steps(i, i+1);
+        work_datastructures.swap_steps(i, i + 1);
         if constexpr (use_memory_constraint) {
-           memory_constraint.swap_steps(i, i+1);
+            memory_constraint.swap_steps(i, i + 1);
         }
     }
     vector_schedule.number_of_supersteps--;
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_fwd(const unsigned step, const unsigned to_step) {    
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_fwd(const unsigned step, const unsigned to_step) {
     for (unsigned i = step; i < to_step; i++) {
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]){
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+            for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]) {
                 vector_schedule.setAssignedSuperstep(node, i);
             }
         }
         std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i + 1]);
         work_datastructures.swap_steps(i, i + 1);
         if constexpr (use_memory_constraint) {
-           memory_constraint.swap_steps(i, i+1);
+            memory_constraint.swap_steps(i, i + 1);
         }
     }
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::insert_empty_step(unsigned step) {
-    unsigned i = vector_schedule.number_of_supersteps++;  
- 
+    unsigned i = vector_schedule.number_of_supersteps++;
+
     for (; i > step; i--) {
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            for (const auto node : set_schedule.step_processor_vertices[i-1][proc]){
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+            for (const auto node : set_schedule.step_processor_vertices[i - 1][proc]) {
                 vector_schedule.setAssignedSuperstep(node, i);
             }
         }
         std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i - 1]);
-        work_datastructures.swap_steps(i-1, i);
+        work_datastructures.swap_steps(i - 1, i);
         if constexpr (use_memory_constraint) {
-           memory_constraint.swap_steps(i - 1, i);
+            memory_constraint.swap_steps(i - 1, i);
         }
-    } 
+    }
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_bwd(const unsigned to_step, const unsigned empty_step) {
-    unsigned i = to_step;  
- 
+    unsigned i = to_step;
+
     for (; i > empty_step; i--) {
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            for (const auto node : set_schedule.step_processor_vertices[i-1][proc]){
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+            for (const auto node : set_schedule.step_processor_vertices[i - 1][proc]) {
                 vector_schedule.setAssignedSuperstep(node, i);
             }
         }
         std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i - 1]);
-        work_datastructures.swap_steps(i-1, i);
+        work_datastructures.swap_steps(i - 1, i);
         if constexpr (use_memory_constraint) {
-           memory_constraint.swap_steps(i - 1, i);
+            memory_constraint.swap_steps(i - 1, i);
         }
-    }     
+    }
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_steps(const unsigned step1, const unsigned step2) {
-    if (step1 == step2) return;
+    if (step1 == step2)
+        return;
 
-    for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-        for (const auto node : set_schedule.step_processor_vertices[step1][proc]){
+    for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+        for (const auto node : set_schedule.step_processor_vertices[step1][proc]) {
             vector_schedule.setAssignedSuperstep(node, step2);
         }
-        for (const auto node : set_schedule.step_processor_vertices[step2][proc]){
+        for (const auto node : set_schedule.step_processor_vertices[step2][proc]) {
             vector_schedule.setAssignedSuperstep(node, step1);
         }
     }
     std::swap(set_schedule.step_processor_vertices[step1], set_schedule.step_processor_vertices[step2]);
-    work_datastructures.swap_steps(step1, step2);  
+    work_datastructures.swap_steps(step1, step2);
     if constexpr (use_memory_constraint) {
         memory_constraint.swap_steps(step1, step2);
-    }   
+    }
 }
 
 } // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
index 97bd35a7..dd572710 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
@@ -30,6 +30,7 @@ limitations under the License.
 
 #include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp"
 #include "osp/auxiliary/misc.hpp"
+#include "osp/bsp/model/util/CompatibleProcessorRange.hpp"
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
@@ -152,7 +153,7 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
     const Graph_t *graph;
     const BspInstance<Graph_t> *instance;
 
-    compatible_processor_range<Graph_t> proc_range;
+    CompatibleProcessorRange<Graph_t> proc_range;
 
     kl_parameter parameters;
     std::mt19937 gen;
diff --git a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
index 6961ef92..2cee3d0f 100644
--- a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
@@ -19,8 +19,8 @@ limitations under the License.
 #pragma once
 
 #include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/bsp/model/SetSchedule.hpp"
-#include "osp/bsp/model/VectorSchedule.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
+#include "osp/bsp/model/util/VectorSchedule.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 
 namespace osp {
@@ -42,7 +42,7 @@ struct is_local_search_memory_constraint<
                                                          std::declval<unsigned>(), std::declval<unsigned>(),
                                                          std::declval<unsigned>(), std::declval<unsigned>())),
                    decltype(std::declval<T>().compute_memory_datastructure(std::declval<unsigned>(),
-                                                                             std::declval<unsigned>())),
+                                                                           std::declval<unsigned>())),
                    decltype(std::declval<T>().swap_steps(std::declval<unsigned>(), std::declval<unsigned>())),
                    decltype(std::declval<T>().reset_superstep(std::declval<unsigned>())),
                    decltype(std::declval<T>().override_superstep(std::declval<unsigned>(), std::declval<unsigned>(),
@@ -105,7 +105,7 @@ struct ls_local_memory_constraint {
 
     void swap_steps(const unsigned step1, const unsigned step2) {
         std::swap(step_processor_memory[step1], step_processor_memory[step2]);
-    } 
+    }
 
     void compute_memory_datastructure(unsigned start_step, unsigned end_step) {
 
@@ -150,7 +150,7 @@ struct ls_local_memory_constraint {
             }
         }
         return true;
-    }  
+    }
 };
 
 template<typename Graph_t>
@@ -378,7 +378,7 @@ struct ls_local_sources_inc_edges_memory_constraint {
     inline void swap_steps(const unsigned step1, const unsigned step2) {
         std::swap(step_processor_memory[step1], step_processor_memory[step2]);
         std::swap(step_processor_pred[step1], step_processor_pred[step2]);
-    }    
+    }
 
     inline void initialize(const SetSchedule<Graph_t> &set_schedule_, const VectorSchedule<Graph_t> &vec_schedule_) {
 
@@ -587,7 +587,6 @@ struct ls_local_sources_inc_edges_memory_constraint {
         }
 
         return true;
-    
     }
 };
 
diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp
index a57e2e84..fa458ba9 100644
--- a/include/osp/bsp/scheduler/Scheduler.hpp
+++ b/include/osp/bsp/scheduler/Scheduler.hpp
@@ -18,6 +18,7 @@ limitations under the License.
 
 #pragma once
 
+#include "osp/auxiliary/return_status.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/BspScheduleCS.hpp"
diff --git a/include/osp/coarser/BspScheduleCoarser.hpp b/include/osp/coarser/BspScheduleCoarser.hpp
index 64684b7a..ea4cf9f9 100644
--- a/include/osp/coarser/BspScheduleCoarser.hpp
+++ b/include/osp/coarser/BspScheduleCoarser.hpp
@@ -18,10 +18,10 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/coarser/Coarser.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/bsp/model/SetSchedule.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/coarser/Coarser.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 
 namespace osp {
@@ -63,7 +63,6 @@ class BspScheduleCoarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_o
         assert(&dag_in == &schedule->getInstance().getComputationalDag());
         assert(schedule->satisfiesPrecedenceConstraints());
 
-
         SetSchedule<Graph_t_in> set_schedule(*schedule);
         std::vector<VertexType_out> reverse_vertex_map(dag_in.num_vertices(), 0);
         std::vector<std::vector<VertexType_in>> vertex_map;
diff --git a/include/osp/coarser/MultilevelCoarser.hpp b/include/osp/coarser/MultilevelCoarser.hpp
index bbd090e4..f8a1434e 100644
--- a/include/osp/coarser/MultilevelCoarser.hpp
+++ b/include/osp/coarser/MultilevelCoarser.hpp
@@ -23,11 +23,11 @@ limitations under the License.
 #include <set>
 #include <vector>
 
-#include "osp/coarser/Coarser.hpp"
+#include "osp/auxiliary/return_status.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
+#include "osp/coarser/Coarser.hpp"
 #include "osp/coarser/coarser_util.hpp"
 
-
 namespace osp {
 
 template<typename Graph_t, typename Graph_t_coarse>
@@ -36,10 +36,12 @@ class MultilevelCoarseAndSchedule;
 template<typename Graph_t, typename Graph_t_coarse>
 class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     friend class MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>;
+
   private:
     const Graph_t *original_graph;
+
   protected:
-    inline const Graph_t * getOriginalGraph() const { return original_graph; };
+    inline const Graph_t *getOriginalGraph() const { return original_graph; };
 
     std::vector<std::unique_ptr<Graph_t_coarse>> dag_history;
     std::vector<std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>>> contraction_maps;
@@ -49,7 +51,7 @@ class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     RETURN_STATUS add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map, const Graph_t_coarse &contracted_graph);
     RETURN_STATUS add_contraction(std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map, Graph_t_coarse &&contracted_graph);
     void add_identity_contraction();
-    
+
     std::vector<vertex_idx_t<Graph_t_coarse>> getCombinedContractionMap() const;
 
     virtual RETURN_STATUS run_contractions() = 0;
@@ -62,19 +64,15 @@ class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     MultilevelCoarser(const Graph_t &graph) : original_graph(&graph) {};
     virtual ~MultilevelCoarser() = default;
 
-
     bool coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag,
-                            std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) override;
+                    std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) override;
 
-    
     RETURN_STATUS run(const Graph_t &graph);
     RETURN_STATUS run(const BspInstance<Graph_t> &inst);
 
     virtual std::string getCoarserName() const override = 0;
 };
 
-
-
 template<typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const Graph_t &graph) {
     clear_computation_data();
@@ -91,7 +89,7 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const Graph_t &gra
 }
 
 template<typename Graph_t, typename Graph_t_coarse>
-RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const BspInstance< Graph_t > &inst) {
+RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const BspInstance<Graph_t> &inst) {
     return run(inst.getComputationalDag());
 }
 
@@ -99,15 +97,15 @@ template<typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::clear_computation_data() {
     dag_history.clear();
     dag_history.shrink_to_fit();
-    
+
     contraction_maps.clear();
     contraction_maps.shrink_to_fit();
 }
 
-
 template<typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
-    if (dag_history.size() < 3) return;
+    if (dag_history.size() < 3)
+        return;
 
     size_t dag_indx_first = dag_history.size() - 2;
     size_t map_indx_first = contraction_maps.size() - 2;
@@ -115,13 +113,13 @@ void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
     size_t dag_indx_second = dag_history.size() - 1;
     size_t map_indx_second = contraction_maps.size() - 1;
 
-    if ( (static_cast<double>( dag_history[dag_indx_first-1]->num_vertices() ) / static_cast<double>( dag_history[dag_indx_second-1]->num_vertices() )) > 1.25 ) return;
-    
+    if ((static_cast<double>(dag_history[dag_indx_first - 1]->num_vertices()) / static_cast<double>(dag_history[dag_indx_second - 1]->num_vertices())) > 1.25)
+        return;
 
     // Compute combined contraction_map
-    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> combi_contraction_map = std::make_unique<std::vector<vertex_idx_t<Graph_t_coarse>>>( contraction_maps[map_indx_first]->size() );
+    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> combi_contraction_map = std::make_unique<std::vector<vertex_idx_t<Graph_t_coarse>>>(contraction_maps[map_indx_first]->size());
     for (std::size_t vert = 0; vert < contraction_maps[map_indx_first]->size(); ++vert) {
-        combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at( contraction_maps[map_indx_first]->at( vert ) );
+        combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at(contraction_maps[map_indx_first]->at(vert));
     }
 
     // Delete ComputationalDag
@@ -138,7 +136,6 @@ void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
     contraction_maps[map_indx_first] = std::move(combi_contraction_map);
 }
 
-
 template<typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map) {
     std::unique_ptr<Graph_t_coarse> new_graph = std::make_unique<Graph_t_coarse>();
@@ -148,12 +145,12 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const
     bool success = false;
 
     if (dag_history.size() == 0) {
-        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()) );
+        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()));
     } else {
-        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()) );
+        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()));
     }
 
-    dag_history.emplace_back( std::move(new_graph) );
+    dag_history.emplace_back(std::move(new_graph));
 
     if (success) {
         compactify_dag_history();
@@ -166,19 +163,19 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const
 template<typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map) {
     std::unique_ptr<Graph_t_coarse> new_graph = std::make_unique<Graph_t_coarse>();
-    
+
     std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(new std::vector<vertex_idx_t<Graph_t_coarse>>(std::move(contraction_map)));
     contraction_maps.emplace_back(std::move(contr_map_ptr));
 
     bool success = false;
 
     if (dag_history.size() == 0) {
-        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()) );
+        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()));
     } else {
-        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()) );
+        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()));
     }
 
-    dag_history.emplace_back( std::move(new_graph) );
+    dag_history.emplace_back(std::move(new_graph));
 
     if (success) {
         compactify_dag_history();
@@ -188,12 +185,11 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::v
     }
 }
 
-
 template<typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map, const Graph_t_coarse &contracted_graph) {
     std::unique_ptr<Graph_t_coarse> graph_ptr(new Graph_t_coarse(contracted_graph));
     dag_history.emplace_back(std::move(graph_ptr));
-    
+
     std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(new std::vector<vertex_idx_t<Graph_t_coarse>>(contraction_map));
     contraction_maps.emplace_back(std::move(contr_map_ptr));
 
@@ -213,7 +209,6 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::v
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-
 template<typename Graph_t, typename Graph_t_coarse>
 std::vector<vertex_idx_t<Graph_t_coarse>> MultilevelCoarser<Graph_t, Graph_t_coarse>::getCombinedContractionMap() const {
     std::vector<vertex_idx_t<Graph_t_coarse>> combinedContractionMap(original_graph->num_vertices());
@@ -221,23 +216,22 @@ std::vector<vertex_idx_t<Graph_t_coarse>> MultilevelCoarser<Graph_t, Graph_t_coa
 
     for (std::size_t j = 0; j < contraction_maps.size(); ++j) {
         for (std::size_t i = 0; i < combinedContractionMap.size(); ++i) {
-            combinedContractionMap[i] = contraction_maps[j]->at( combinedContractionMap[i] );
+            combinedContractionMap[i] = contraction_maps[j]->at(combinedContractionMap[i]);
         }
     }
 
     return combinedContractionMap;
 }
 
-
-
 template<typename Graph_t, typename Graph_t_coarse>
 bool MultilevelCoarser<Graph_t, Graph_t_coarse>::coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag,
-                                                                    std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) {
+                                                            std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) {
     clear_computation_data();
 
     RETURN_STATUS status = run(dag_in);
 
-    if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return false;
+    if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND)
+        return false;
 
     assert(dag_history.size() != 0);
     coarsened_dag = *(dag_history.back());
@@ -251,20 +245,16 @@ template<typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::add_identity_contraction() {
     std::size_t n_vert;
     if (dag_history.size() == 0) {
-        n_vert = static_cast<std::size_t>( original_graph->num_vertices() );
+        n_vert = static_cast<std::size_t>(original_graph->num_vertices());
     } else {
-        n_vert = static_cast<std::size_t>( dag_history.back()->num_vertices() );
+        n_vert = static_cast<std::size_t>(dag_history.back()->num_vertices());
     }
-    
-    std::vector<vertex_idx_t<Graph_t_coarse>> contraction_map( n_vert );
+
+    std::vector<vertex_idx_t<Graph_t_coarse>> contraction_map(n_vert);
     std::iota(contraction_map.begin(), contraction_map.end(), 0);
 
     add_contraction(std::move(contraction_map));
     compactify_dag_history();
 }
 
-
-
-
-
 } // end namespace osp
\ No newline at end of file
diff --git a/include/osp/dag_divider/AbstractWavefrontScheduler.hpp b/include/osp/dag_divider/AbstractWavefrontScheduler.hpp
index 556e82bc..69a3c80c 100644
--- a/include/osp/dag_divider/AbstractWavefrontScheduler.hpp
+++ b/include/osp/dag_divider/AbstractWavefrontScheduler.hpp
@@ -21,10 +21,10 @@ limitations under the License.
 #include "osp/graph_algorithms/computational_dag_util.hpp"
 #include "osp/graph_algorithms/subgraph_algorithms.hpp"
 #include "osp/graph_implementations/boost_graphs/boost_graph.hpp"
-#include <numeric>
 #include <algorithm>
-#include <iostream>
 #include <cassert>
+#include <iostream>
+#include <numeric>
 
 namespace osp {
 
@@ -34,7 +34,7 @@ namespace osp {
  */
 template<typename Graph_t, typename constr_graph_t>
 class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
-protected:
+  protected:
     IDagDivider<Graph_t> *divider;
     Scheduler<constr_graph_t> *scheduler;
     static constexpr bool enable_debug_prints = true;
@@ -46,17 +46,17 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
      */
     bool distributeProcessors(
         unsigned total_processors_of_type,
-        const std::vector<double>& work_weights,
-        std::vector<unsigned>& allocation) const {
-        
+        const std::vector<double> &work_weights,
+        std::vector<unsigned> &allocation) const {
+
         allocation.assign(work_weights.size(), 0);
         double total_work = std::accumulate(work_weights.begin(), work_weights.end(), 0.0);
         if (total_work <= 1e-9 || total_processors_of_type == 0) {
             return false;
         }
-        
+
         std::vector<size_t> active_indices;
-        for(size_t i = 0; i < work_weights.size(); ++i) {
+        for (size_t i = 0; i < work_weights.size(); ++i) {
             if (work_weights[i] > 1e-9) {
                 active_indices.push_back(i);
             }
@@ -68,7 +68,7 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
 
         size_t num_active_components = active_indices.size();
         unsigned remaining_procs = total_processors_of_type;
-        
+
         // --- Stage 1: Guarantee at least one processor if possible (anti-starvation) ---
         if (total_processors_of_type >= num_active_components) {
             // Abundance case: Give one processor to each active component first.
@@ -79,11 +79,11 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
         } else {
             // Scarcity case: Not enough processors for each active component.
             std::vector<std::pair<double, size_t>> sorted_work;
-            for(size_t idx : active_indices) {
+            for (size_t idx : active_indices) {
                 sorted_work.push_back({work_weights[idx], idx});
             }
             std::sort(sorted_work.rbegin(), sorted_work.rend());
-            for(unsigned i = 0; i < remaining_procs; ++i) {
+            for (unsigned i = 0; i < remaining_procs; ++i) {
                 allocation[sorted_work[i].second]++;
             }
             return true; // Scarcity case was hit.
@@ -93,10 +93,10 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
         if (remaining_procs > 0) {
             std::vector<double> adjusted_work_weights;
             double adjusted_total_work = 0;
-            
+
             double work_per_proc = total_work / static_cast<double>(total_processors_of_type);
 
-            for(size_t idx : active_indices) {
+            for (size_t idx : active_indices) {
                 double adjusted_work = std::max(0.0, work_weights[idx] - work_per_proc);
                 adjusted_work_weights.push_back(adjusted_work);
                 adjusted_total_work += adjusted_work;
@@ -123,14 +123,13 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
                     }
                 }
             }
-        }        
+        }
         return false; // Scarcity case was not hit.
     }
 
-
     BspArchitecture<constr_graph_t> createSubArchitecture(
         const BspArchitecture<Graph_t> &original_arch,
-        const std::vector<unsigned>& sub_dag_proc_types) const {
+        const std::vector<unsigned> &sub_dag_proc_types) const {
 
         // The calculation is now inside the assert, so it only happens in debug builds.
         assert(std::accumulate(sub_dag_proc_types.begin(), sub_dag_proc_types.end(), 0u) > 0 && "Attempted to create a sub-architecture with zero processors.");
@@ -142,33 +141,34 @@ class AbstractWavefrontScheduler : public Scheduler<Graph_t> {
             sub_dag_processor_memory[original_arch.processorType(i)] =
                 std::min(original_arch.memoryBound(i), sub_dag_processor_memory[original_arch.processorType(i)]);
         }
-        sub_architecture.set_processors_consequ_types(sub_dag_proc_types, sub_dag_processor_memory);
+        sub_architecture.SetProcessorsConsequTypes(sub_dag_proc_types, sub_dag_processor_memory);
         return sub_architecture;
     }
 
-    bool validateWorkDistribution(const std::vector<constr_graph_t>& sub_dags, const BspInstance<Graph_t>& instance) const {
-        const auto& original_arch = instance.getArchitecture();
-        for (const auto& rep_sub_dag : sub_dags) {
+    bool validateWorkDistribution(const std::vector<constr_graph_t> &sub_dags, const BspInstance<Graph_t> &instance) const {
+        const auto &original_arch = instance.getArchitecture();
+        for (const auto &rep_sub_dag : sub_dags) {
             const double total_rep_work = sumOfVerticesWorkWeights(rep_sub_dag);
-            
+
             double sum_of_compatible_works_for_rep = 0.0;
             for (unsigned type_idx = 0; type_idx < original_arch.getNumberOfProcessorTypes(); ++type_idx) {
                 sum_of_compatible_works_for_rep += sumOfCompatibleWorkWeights(rep_sub_dag, instance, type_idx);
             }
 
             if (sum_of_compatible_works_for_rep > total_rep_work + 1e-9) {
-                if constexpr (enable_debug_prints) std::cerr << "ERROR: Sum of compatible work (" << sum_of_compatible_works_for_rep 
-                                              << ") exceeds total work (" << total_rep_work 
-                                              << ") for a sub-dag. Aborting." << std::endl;
+                if constexpr (enable_debug_prints)
+                    std::cerr << "ERROR: Sum of compatible work (" << sum_of_compatible_works_for_rep
+                              << ") exceeds total work (" << total_rep_work
+                              << ") for a sub-dag. Aborting." << std::endl;
                 return false;
             }
         }
         return true;
     }
 
-public:
+  public:
     AbstractWavefrontScheduler(IDagDivider<Graph_t> &div, Scheduler<constr_graph_t> &sched)
         : divider(&div), scheduler(&sched) {}
 };
 
-}
+} // namespace osp
diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
index 5ba326d9..83556089 100644
--- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
@@ -16,22 +16,22 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-#include <iostream>
-#include <chrono>
-#include <ctime>
-#include <iomanip>
-#include "OrbitGraphProcessor.hpp"
+#include "EftSubgraphScheduler.hpp"
 #include "HashComputer.hpp"
 #include "MerkleHashComputer.hpp"
-#include "EftSubgraphScheduler.hpp"
+#include "OrbitGraphProcessor.hpp"
 #include "TrimmedGroupScheduler.hpp"
 #include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
 #include "osp/graph_algorithms/subgraph_algorithms.hpp"
+#include <chrono>
+#include <ctime>
+#include <iomanip>
+#include <iostream>
 
 namespace osp {
 
-/** 
+/**
  * @brief A scheduler that leverages isomorphic subgraphs to partition a DAG.
  *
  * @class IsomorphicSubgraphScheduler
@@ -58,12 +58,11 @@ class IsomorphicSubgraphScheduler {
     static_assert(std::is_same_v<vertex_idx_t<Graph_t>, vertex_idx_t<Constr_Graph_t>>,
                   "Graph_t and Constr_Graph_t must have the same vertex_idx types");
 
-    private:
-
-    static constexpr bool verbose = false;    
-    const HashComputer<vertex_idx_t<Graph_t>>* hash_computer_;
+  private:
+    static constexpr bool verbose = false;
+    const HashComputer<vertex_idx_t<Graph_t>> *hash_computer_;
     size_t symmetry_ = 4;
-    Scheduler<Constr_Graph_t> * bsp_scheduler_;
+    Scheduler<Constr_Graph_t> *bsp_scheduler_;
     bool use_max_group_size_ = false;
     unsigned max_group_size_ = 0;
     bool plot_dot_graphs_ = false;
@@ -76,22 +75,21 @@ class IsomorphicSubgraphScheduler {
     bool use_max_bsp = false;
     bool use_adaptive_symmetry_threshold = true;
 
-    public:
-
-    explicit IsomorphicSubgraphScheduler(Scheduler<Constr_Graph_t> & bsp_scheduler) 
+  public:
+    explicit IsomorphicSubgraphScheduler(Scheduler<Constr_Graph_t> &bsp_scheduler)
         : hash_computer_(nullptr), bsp_scheduler_(&bsp_scheduler), plot_dot_graphs_(false) {}
 
-    IsomorphicSubgraphScheduler(Scheduler<Constr_Graph_t> & bsp_scheduler, const HashComputer<vertex_idx_t<Graph_t>>& hash_computer) 
+    IsomorphicSubgraphScheduler(Scheduler<Constr_Graph_t> &bsp_scheduler, const HashComputer<vertex_idx_t<Graph_t>> &hash_computer)
         : hash_computer_(&hash_computer), bsp_scheduler_(&bsp_scheduler), plot_dot_graphs_(false) {}
 
     virtual ~IsomorphicSubgraphScheduler() {}
 
-    void setMergeDifferentTypes(bool flag) {merge_different_node_types = flag;}
-    void setWorkThreshold(v_workw_t<Constr_Graph_t> work_threshold) {work_threshold_ = work_threshold;}
-    void setCriticalPathThreshold(v_workw_t<Constr_Graph_t> critical_path_threshold) {critical_path_threshold_ = critical_path_threshold;}
-    void setOrbitLockRatio(double orbit_lock_ratio) {orbit_lock_ratio_ = orbit_lock_ratio;}
-    void setNaturalBreaksCountPercentage(double natural_breaks_count_percentage) {natural_breaks_count_percentage_ = natural_breaks_count_percentage;}
-    void setAllowTrimmedScheduler(bool flag) {allow_use_trimmed_scheduler = flag;}
+    void setMergeDifferentTypes(bool flag) { merge_different_node_types = flag; }
+    void setWorkThreshold(v_workw_t<Constr_Graph_t> work_threshold) { work_threshold_ = work_threshold; }
+    void setCriticalPathThreshold(v_workw_t<Constr_Graph_t> critical_path_threshold) { critical_path_threshold_ = critical_path_threshold; }
+    void setOrbitLockRatio(double orbit_lock_ratio) { orbit_lock_ratio_ = orbit_lock_ratio; }
+    void setNaturalBreaksCountPercentage(double natural_breaks_count_percentage) { natural_breaks_count_percentage_ = natural_breaks_count_percentage; }
+    void setAllowTrimmedScheduler(bool flag) { allow_use_trimmed_scheduler = flag; }
     void set_plot_dot_graphs(bool plot) { plot_dot_graphs_ = plot; }
     void disable_use_max_group_size() { use_max_group_size_ = false; }
     void setUseMaxBsp(bool flag) { use_max_bsp = flag; }
@@ -100,12 +98,12 @@ class IsomorphicSubgraphScheduler {
         max_group_size_ = max_group_size;
     }
     void setEnableAdaptiveSymmetryThreshold() { use_adaptive_symmetry_threshold = true; }
-    void setUseStaticSymmetryLevel(size_t static_symmetry_level) { 
-        use_adaptive_symmetry_threshold = false; 
-        symmetry_ = static_symmetry_level; 
+    void setUseStaticSymmetryLevel(size_t static_symmetry_level) {
+        use_adaptive_symmetry_threshold = false;
+        symmetry_ = static_symmetry_level;
     }
 
-    std::vector<vertex_idx_t<Graph_t>> compute_partition(const BspInstance<Graph_t>& instance) {
+    std::vector<vertex_idx_t<Graph_t>> compute_partition(const BspInstance<Graph_t> &instance) {
         OrbitGraphProcessor<Graph_t, Constr_Graph_t> orbit_processor;
         orbit_processor.set_work_threshold(work_threshold_);
         orbit_processor.setMergeDifferentNodeTypes(merge_different_node_types);
@@ -116,7 +114,7 @@ class IsomorphicSubgraphScheduler {
             orbit_processor.setUseStaticSymmetryLevel(symmetry_);
         }
 
-        std::unique_ptr<HashComputer<vertex_idx_t<Graph_t>>> local_hasher;      
+        std::unique_ptr<HashComputer<vertex_idx_t<Graph_t>>> local_hasher;
         if (!hash_computer_) {
             local_hasher = std::make_unique<MerkleHashComputer<Graph_t, bwd_merkle_node_hash_func<Graph_t>, true>>(instance.getComputationalDag(), instance.getComputationalDag());
             hash_computer_ = local_hasher.get();
@@ -125,7 +123,7 @@ class IsomorphicSubgraphScheduler {
         orbit_processor.discover_isomorphic_groups(instance.getComputationalDag(), *hash_computer_);
 
         auto isomorphic_groups = orbit_processor.get_final_groups();
-        
+
         std::vector<bool> was_trimmed(isomorphic_groups.size(), false);
         trim_subgraph_groups(isomorphic_groups, instance, was_trimmed); // Apply trimming and record which groups were affected
 
@@ -157,8 +155,7 @@ class IsomorphicSubgraphScheduler {
         return partition;
     }
 
-    protected:
-
+  protected:
     template<typename G_t, typename C_G_t>
     struct subgraph_scheduler_input {
         BspInstance<C_G_t> instance;
@@ -167,14 +164,14 @@ class IsomorphicSubgraphScheduler {
         std::vector<std::vector<v_workw_t<G_t>>> required_proc_types;
     };
 
-    void trim_subgraph_groups(std::vector<typename OrbitGraphProcessor<Graph_t, Constr_Graph_t>::Group>& isomorphic_groups,
-                              const BspInstance<Graph_t>& instance,
-                              std::vector<bool>& was_trimmed) {
+    void trim_subgraph_groups(std::vector<typename OrbitGraphProcessor<Graph_t, Constr_Graph_t>::Group> &isomorphic_groups,
+                              const BspInstance<Graph_t> &instance,
+                              std::vector<bool> &was_trimmed) {
         if constexpr (verbose) {
             std::cout << "\n--- Trimming Isomorphic Subgraph Groups ---" << std::endl;
         }
         for (size_t group_idx = 0; group_idx < isomorphic_groups.size(); ++group_idx) {
-            auto& group = isomorphic_groups[group_idx];
+            auto &group = isomorphic_groups[group_idx];
             const unsigned group_size = static_cast<unsigned>(group.size());
             if (group_size <= 1)
                 continue;
@@ -194,24 +191,24 @@ class IsomorphicSubgraphScheduler {
                 if constexpr (has_typed_vertices_v<Graph_t>) {
                     if (!group.subgraphs.empty() && !group.subgraphs[0].empty()) {
                         common_node_type = instance.getComputationalDag().vertex_type(group.subgraphs[0][0]);
-                        const auto& rep_subgraph = group.subgraphs[0];   
-                        for (const auto& vertex : rep_subgraph) {
+                        const auto &rep_subgraph = group.subgraphs[0];
+                        for (const auto &vertex : rep_subgraph) {
                             if (instance.getComputationalDag().vertex_type(vertex) != common_node_type) {
                                 is_single_type_group = false;
                                 break;
                             }
                         }
                     } else {
-                        is_single_type_group = false; 
+                        is_single_type_group = false;
                     }
                 } else {
-                    is_single_type_group = false; 
+                    is_single_type_group = false;
                 }
 
                 if (is_single_type_group) {
                     // Dynamically determine min_proc_type_count based on compatible processors for this type
                     unsigned min_compatible_processors = std::numeric_limits<unsigned>::max();
-                    const auto& proc_type_counts = instance.getArchitecture().getProcessorTypeCount();
+                    const auto &proc_type_counts = instance.getArchitecture().getProcessorTypeCount();
 
                     bool found_compatible_processor = false;
                     for (unsigned proc_type_idx = 0; proc_type_idx < proc_type_counts.size(); ++proc_type_idx) {
@@ -222,13 +219,13 @@ class IsomorphicSubgraphScheduler {
                     }
                     if (found_compatible_processor) {
                         if constexpr (verbose) {
-                            std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" << common_node_type 
+                            std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" << common_node_type
                                       << "). Min compatible processors: " << min_compatible_processors << "." << std::endl;
                         }
                         effective_min_proc_type_count = min_compatible_processors;
                     } else {
                         if constexpr (verbose) {
-                            std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" << common_node_type 
+                            std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" << common_node_type
                                       << ") but no compatible processors found. Disabling trimming." << std::endl;
                         }
                         // If no compatible processors found for this type, effectively disable trimming for this group.
@@ -236,7 +233,11 @@ class IsomorphicSubgraphScheduler {
                     }
                 } else {
                     // Fallback to a default min_proc_type_count if not a single-type group or no typed vertices.
-                    effective_min_proc_type_count = instance.getArchitecture().getMinProcessorTypeCount();
+                    const auto &type_count = instance.getArchitecture().getProcessorTypeCount();
+                    if (type_count.empty()) {
+                        effective_min_proc_type_count = 0;
+                    }
+                    effective_min_proc_type_count = *std::min_element(type_count.begin(), type_count.end());
                     if constexpr (verbose) {
                         std::cout << "Group " << group_idx << " (size " << group_size << "): Multi-type or untyped group. Using default min_proc_type_count: " << effective_min_proc_type_count << "." << std::endl;
                     }
@@ -257,13 +258,13 @@ class IsomorphicSubgraphScheduler {
 
             if (gcd < group_size) {
                 if constexpr (verbose) {
-                    std::cout << "  -> Trimming group " << group_idx << ". GCD(" << group_size << ", " << effective_min_proc_type_count 
+                    std::cout << "  -> Trimming group " << group_idx << ". GCD(" << group_size << ", " << effective_min_proc_type_count
                               << ") = " << gcd << ". Merging " << group_size / gcd << " subgraphs at a time." << std::endl;
                 }
 
                 if (allow_use_trimmed_scheduler)
                     gcd = 1;
-                
+
                 was_trimmed[group_idx] = true;
                 const unsigned merge_size = group_size / gcd;
                 std::vector<std::vector<vertex_idx_t<Graph_t>>> new_subgraphs;
@@ -279,7 +280,7 @@ class IsomorphicSubgraphScheduler {
                     }
 
                     for (unsigned k = 0; k < merge_size; ++k) {
-                        const auto& sg_to_merge_vertices = group.subgraphs[original_sg_cursor];
+                        const auto &sg_to_merge_vertices = group.subgraphs[original_sg_cursor];
                         original_sg_cursor++;
                         merged_sg_vertices.insert(merged_sg_vertices.end(), sg_to_merge_vertices.begin(), sg_to_merge_vertices.end());
                     }
@@ -292,16 +293,16 @@ class IsomorphicSubgraphScheduler {
                 }
                 was_trimmed[group_idx] = false;
             }
-       }
+        }
     }
 
     subgraph_scheduler_input<Graph_t, Constr_Graph_t> prepare_subgraph_scheduling_input(
-        const BspInstance<Graph_t>& original_instance,
-        const std::vector<typename OrbitGraphProcessor<Graph_t, Constr_Graph_t>::Group>& isomorphic_groups,
-        const std::vector<bool>& was_trimmed) {
-        
+        const BspInstance<Graph_t> &original_instance,
+        const std::vector<typename OrbitGraphProcessor<Graph_t, Constr_Graph_t>::Group> &isomorphic_groups,
+        const std::vector<bool> &was_trimmed) {
+
         subgraph_scheduler_input<Graph_t, Constr_Graph_t> result;
-        result.instance.setArchitecture(original_instance.getArchitecture());
+        result.instance.getArchitecture() = original_instance.getArchitecture();
         const unsigned num_proc_types = original_instance.getArchitecture().getNumberOfProcessorTypes();
 
         result.multiplicities.resize(isomorphic_groups.size());
@@ -332,35 +333,35 @@ class IsomorphicSubgraphScheduler {
             ++coarse_node_idx;
         }
         coarser_util::construct_coarse_dag(original_instance.getComputationalDag(), result.instance.getComputationalDag(),
-                                        contraction_map);
+                                           contraction_map);
 
         if constexpr (verbose) {
             std::cout << "\n--- Preparing Subgraph Scheduling Input ---\n";
             std::cout << "Found " << isomorphic_groups.size() << " isomorphic groups to schedule as coarse nodes.\n";
             for (size_t j = 0; j < isomorphic_groups.size(); ++j) {
                 std::cout << "  - Coarse Node " << j << " (from " << isomorphic_groups[j].subgraphs.size()
-                        << " isomorphic subgraphs):\n";
+                          << " isomorphic subgraphs):\n";
                 std::cout << "    - Multiplicity for scheduling: " << result.multiplicities[j] << "\n";
                 std::cout << "    - Total Work (in coarse graph): " << result.instance.getComputationalDag().vertex_work_weight(j) << "\n";
                 std::cout << "    - Required Processor Types: ";
                 for (unsigned k = 0; k < num_proc_types; ++k) {
                     std::cout << result.required_proc_types[j][k] << " ";
                 }
-                std::cout << "\n";  
+                std::cout << "\n";
                 std::cout << "    - Max number of processors: " << result.max_num_processors[j] << "\n";
             }
         }
         return result;
     }
 
-    void schedule_isomorphic_group(const BspInstance<Graph_t>& instance, 
-                                   const std::vector<typename OrbitGraphProcessor<Graph_t, Constr_Graph_t>::Group>& isomorphic_groups, 
-                                   const SubgraphSchedule & sub_sched, 
-                                   std::vector<vertex_idx_t<Graph_t>> & partition) {
+    void schedule_isomorphic_group(const BspInstance<Graph_t> &instance,
+                                   const std::vector<typename OrbitGraphProcessor<Graph_t, Constr_Graph_t>::Group> &isomorphic_groups,
+                                   const SubgraphSchedule &sub_sched,
+                                   std::vector<vertex_idx_t<Graph_t>> &partition) {
         vertex_idx_t<Graph_t> current_partition_idx = 0;
 
         for (size_t group_idx = 0; group_idx < isomorphic_groups.size(); ++group_idx) {
-            const auto& group = isomorphic_groups[group_idx];
+            const auto &group = isomorphic_groups[group_idx];
             if (group.subgraphs.empty()) {
                 continue;
             }
@@ -372,48 +373,48 @@ class IsomorphicSubgraphScheduler {
             BspInstance<Constr_Graph_t> representative_instance;
             auto rep_global_to_local_map = create_induced_subgraph_map(instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted);
 
-            representative_instance.setArchitecture(instance.getArchitecture());
-            const auto& procs_for_group = sub_sched.node_assigned_worker_per_type[group_idx];
+            representative_instance.getArchitecture() = instance.getArchitecture();
+            const auto &procs_for_group = sub_sched.node_assigned_worker_per_type[group_idx];
             std::vector<v_memw_t<Constr_Graph_t>> mem_weights(procs_for_group.size(), 0);
             for (unsigned proc_type = 0; proc_type < procs_for_group.size(); ++proc_type) {
                 mem_weights[proc_type] = static_cast<v_memw_t<Constr_Graph_t>>(instance.getArchitecture().maxMemoryBoundProcType(proc_type));
             }
-            representative_instance.getArchitecture().set_processors_consequ_types(procs_for_group, mem_weights);
+            representative_instance.getArchitecture().SetProcessorsConsequTypes(procs_for_group, mem_weights);
             representative_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
 
             // --- Decide which scheduler to use ---
             unsigned min_non_zero_procs = std::numeric_limits<unsigned>::max();
-            for (const auto& proc_count : procs_for_group) {
+            for (const auto &proc_count : procs_for_group) {
                 if (proc_count > 0) {
                     min_non_zero_procs = std::min(min_non_zero_procs, proc_count);
                 }
             }
 
-
             bool use_trimmed_scheduler = sub_sched.was_trimmed[group_idx] && min_non_zero_procs > 1 && allow_use_trimmed_scheduler;
- 
-            Scheduler<Constr_Graph_t>* scheduler_for_group_ptr;
+
+            Scheduler<Constr_Graph_t> *scheduler_for_group_ptr;
             std::unique_ptr<Scheduler<Constr_Graph_t>> trimmed_scheduler_owner;
             if (use_trimmed_scheduler) {
-                if constexpr (verbose) std::cout << "Using TrimmedGroupScheduler for group " << group_idx << std::endl;
+                if constexpr (verbose)
+                    std::cout << "Using TrimmedGroupScheduler for group " << group_idx << std::endl;
                 trimmed_scheduler_owner = std::make_unique<TrimmedGroupScheduler<Constr_Graph_t>>(*bsp_scheduler_, min_non_zero_procs);
                 scheduler_for_group_ptr = trimmed_scheduler_owner.get();
             } else {
-                if constexpr (verbose) std::cout << "Using standard BSP scheduler for group " << group_idx << std::endl;
+                if constexpr (verbose)
+                    std::cout << "Using standard BSP scheduler for group " << group_idx << std::endl;
                 scheduler_for_group_ptr = bsp_scheduler_;
             }
 
-
             // --- Schedule the representative to get the pattern ---
             BspSchedule<Constr_Graph_t> bsp_schedule(representative_instance);
 
             if constexpr (verbose) {
                 std::cout << "--- Scheduling representative for group " << group_idx << " ---" << std::endl;
                 std::cout << "  Number of subgraphs in group: " << group.subgraphs.size() << std::endl;
-                const auto& rep_dag = representative_instance.getComputationalDag();
+                const auto &rep_dag = representative_instance.getComputationalDag();
                 std::cout << "  Representative subgraph size: " << rep_dag.num_vertices() << " vertices" << std::endl;
                 std::vector<unsigned> node_type_counts(rep_dag.num_vertex_types(), 0);
-                for (const auto& v : rep_dag.vertices()) {
+                for (const auto &v : rep_dag.vertices()) {
                     node_type_counts[rep_dag.vertex_type(v)]++;
                 }
                 std::cout << "    Node type counts: ";
@@ -424,45 +425,34 @@ class IsomorphicSubgraphScheduler {
                 }
                 std::cout << std::endl;
 
-                const auto& sub_arch = representative_instance.getArchitecture();
+                const auto &sub_arch = representative_instance.getArchitecture();
                 std::cout << "  Sub-architecture for scheduling:" << std::endl;
                 std::cout << "    Processors: " << sub_arch.numberOfProcessors() << std::endl;
                 std::cout << "    Processor types counts: ";
-                const auto& type_counts = sub_arch.getProcessorTypeCount();
+                const auto &type_counts = sub_arch.getProcessorTypeCount();
                 for (size_t type_idx = 0; type_idx < type_counts.size(); ++type_idx) {
                     std::cout << "T" << type_idx << ":" << type_counts[type_idx] << " ";
                 }
                 std::cout << std::endl;
                 std::cout << "    Sync cost: " << sub_arch.synchronisationCosts() << ", Comm cost: " << sub_arch.communicationCosts() << std::endl;
-                std::cout << "    Sub-problem compatibility matrix:" << std::endl;
-                const auto & sub_comp_matrix = representative_instance.getNodeNodeCompatabilityMatrix();
-                for(unsigned i = 0; i < sub_comp_matrix.size(); ++i) {
-                    std::cout << "      Node Type " << i << ": [ ";
-                    for (unsigned j = 0; j < sub_comp_matrix[i].size(); ++j) {
-                        std::cout << (sub_comp_matrix[i][j] ? "1" : "0") << " ";
-                    }
-                    std::cout << "]" << std::endl;
-                }
-
             }
-            
+
             scheduler_for_group_ptr->computeSchedule(bsp_schedule);
 
             if constexpr (verbose) {
-                std::cout << "  Schedule satisfies precedence constraints: ";  
+                std::cout << "  Schedule satisfies precedence constraints: ";
                 std::cout << bsp_schedule.satisfiesPrecedenceConstraints() << std::endl;
                 std::cout << "  Schedule satisfies node type constraints: ";
                 std::cout << bsp_schedule.satisfiesNodeTypeConstraints() << std::endl;
             }
-            
 
             if (plot_dot_graphs_) {
-                const auto& rep_dag = bsp_schedule.getInstance().getComputationalDag();
+                const auto &rep_dag = bsp_schedule.getInstance().getComputationalDag();
                 std::vector<unsigned> colors(rep_dag.num_vertices());
                 std::map<std::pair<unsigned, unsigned>, unsigned> proc_ss_to_color;
                 unsigned next_color = 0;
 
-                for (const auto& v : rep_dag.vertices()) {
+                for (const auto &v : rep_dag.vertices()) {
                     const auto assignment = std::make_pair(bsp_schedule.assignedProcessor(v), bsp_schedule.assignedSuperstep(v));
                     if (proc_ss_to_color.find(assignment) == proc_ss_to_color.end()) {
                         proc_ss_to_color[assignment] = next_color++;
@@ -476,12 +466,10 @@ class IsomorphicSubgraphScheduler {
                 ss << std::put_time(std::localtime(&in_time_t), "%Y%m%d_%H%M%S");
                 std::string timestamp = ss.str() + "_";
 
-
                 DotFileWriter writer;
                 writer.write_colored_graph(timestamp + "iso_group_rep_" + std::to_string(group_idx) + ".dot", rep_dag, colors);
             }
 
-
             const bool max_bsp = use_max_bsp && (representative_instance.getComputationalDag().num_edges() == 0) && (representative_instance.getComputationalDag().vertex_type(0) == 0);
 
             // Build data structures for applying the pattern ---
@@ -491,10 +479,9 @@ class IsomorphicSubgraphScheduler {
             for (vertex_idx_t<Graph_t> j = 0; j < static_cast<vertex_idx_t<Graph_t>>(rep_subgraph_vertices_sorted.size()); ++j) {
                 auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(j), bsp_schedule.assignedProcessor(j));
 
-                if (max_bsp) 
+                if (max_bsp)
                     sp_pair = std::make_pair(j, 0);
 
-
                 if (sp_proc_to_relative_partition.find(sp_pair) == sp_proc_to_relative_partition.end()) {
                     sp_proc_to_relative_partition[sp_pair] = num_partitions_per_subgraph++;
                 }
@@ -516,12 +503,12 @@ class IsomorphicSubgraphScheduler {
                 } else { // For other subgraphs, build the isomorphic mapping
                     Constr_Graph_t current_subgraph_graph;
                     create_induced_subgraph(instance.getComputationalDag(), current_subgraph_graph, current_subgraph_vertices_sorted);
-                    
+
                     MerkleHashComputer<Constr_Graph_t> current_hasher(current_subgraph_graph);
 
-                    for(const auto& [hash, rep_orbit_nodes] : rep_hasher.get_orbits()) {
-                        const auto& current_orbit_nodes = current_hasher.get_orbit_from_hash(hash);
-                        for(size_t k = 0; k < rep_orbit_nodes.size(); ++k) {
+                    for (const auto &[hash, rep_orbit_nodes] : rep_hasher.get_orbits()) {
+                        const auto &current_orbit_nodes = current_hasher.get_orbit_from_hash(hash);
+                        for (size_t k = 0; k < rep_orbit_nodes.size(); ++k) {
                             // Map: current_subgraph_vertex -> representative_subgraph_local_idx
                             current_vertex_to_rep_local_idx[current_subgraph_vertices_sorted[current_orbit_nodes[k]]] = static_cast<vertex_idx_t<Constr_Graph_t>>(rep_orbit_nodes[k]);
                         }
@@ -529,11 +516,11 @@ class IsomorphicSubgraphScheduler {
                 }
 
                 // Apply the partition pattern
-                for (const auto& current_vertex : current_subgraph_vertices_sorted) {
+                for (const auto &current_vertex : current_subgraph_vertices_sorted) {
                     const auto rep_local_idx = current_vertex_to_rep_local_idx.at(current_vertex);
                     auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(rep_local_idx), bsp_schedule.assignedProcessor(rep_local_idx));
 
-                    if (max_bsp) 
+                    if (max_bsp)
                         sp_pair = std::make_pair(rep_local_idx, 0);
 
                     partition[current_vertex] = current_partition_idx + sp_proc_to_relative_partition.at(sp_pair);
@@ -544,4 +531,4 @@ class IsomorphicSubgraphScheduler {
     }
 };
 
-}
\ No newline at end of file
+} // namespace osp
\ No newline at end of file
diff --git a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
index 0b125e71..97fa53a5 100644
--- a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
@@ -19,8 +19,8 @@ limitations under the License.
 #pragma once
 
 #include "osp/bsp/scheduler/Scheduler.hpp"
-#include "osp/graph_algorithms/subgraph_algorithms.hpp"
 #include "osp/graph_algorithms/computational_dag_util.hpp"
+#include "osp/graph_algorithms/subgraph_algorithms.hpp"
 #include <iostream>
 #include <numeric>
 
@@ -35,7 +35,7 @@ namespace osp {
  * potentially disconnected, subgraph that resulted from merging smaller isomorphic subgraphs. It divides
  * the input graph into its weakly connected components and schedules them on proportionally allocated processors.
  */
-template <typename Constr_Graph_t>
+template<typename Constr_Graph_t>
 class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
 
     Scheduler<Constr_Graph_t> *sub_scheduler;
@@ -94,7 +94,7 @@ class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
         // Determine the processor allocation for a single sub-problem.
         // Calculate offsets for processor types within the main 'arch' (passed to TrimmedGroupScheduler)
         std::vector<unsigned> arch_proc_type_offsets(arch.getNumberOfProcessorTypes(), 0);
-        const auto& arch_proc_type_counts = arch.getProcessorTypeCount();
+        const auto &arch_proc_type_counts = arch.getProcessorTypeCount();
         for (unsigned type_idx = 1; type_idx < arch.getNumberOfProcessorTypes(); ++type_idx) {
             arch_proc_type_offsets[type_idx] = arch_proc_type_offsets[type_idx - 1] + arch_proc_type_counts[type_idx - 1];
         }
@@ -115,12 +115,12 @@ class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
         }
 
         // Create the sub-architecture for one sub-problem.
-        BspArchitecture<Constr_Graph_t> sub_arch(arch);  
-        sub_arch.set_processors_consequ_types(sub_proc_counts, mem_weights);
+        BspArchitecture<Constr_Graph_t> sub_arch(arch);
+        sub_arch.SetProcessorsConsequTypes(sub_proc_counts, mem_weights);
 
         // Calculate offsets for processor types within the 'sub_arch'
         std::vector<unsigned> sub_arch_proc_type_offsets(sub_arch.getNumberOfProcessorTypes(), 0);
-        const auto& sub_arch_proc_type_counts = sub_arch.getProcessorTypeCount();
+        const auto &sub_arch_proc_type_counts = sub_arch.getProcessorTypeCount();
         for (unsigned type_idx = 1; type_idx < sub_arch.getNumberOfProcessorTypes(); ++type_idx) {
             sub_arch_proc_type_offsets[type_idx] = sub_arch_proc_type_offsets[type_idx - 1] + sub_arch_proc_type_counts[type_idx - 1];
         }
@@ -135,8 +135,8 @@ class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
             std::sort(group_vertices.begin(), group_vertices.end());
 
             BspInstance<Constr_Graph_t> sub_instanc;
-            sub_instanc.setArchitecture(sub_arch); // Set the sub-architecture
-            sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix()); // Inherit compatibility
+            sub_instanc.getArchitecture() = sub_arch;
+            sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix());                      // Inherit compatibility
             auto global_to_local_map = create_induced_subgraph_map(dag, sub_instanc.getComputationalDag(), group_vertices); // Create induced subgraph
 
             // Create a schedule object for the sub-problem
@@ -144,10 +144,11 @@ class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
 
             // Call the sub-scheduler to compute the schedule for this group of components
             auto status = sub_scheduler->computeSchedule(sub_schedule);
-            if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return status;
+            if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND)
+                return status;
 
             // Map the sub-schedule back to the main schedule.
-            for (const auto& v_global : group_vertices) {
+            for (const auto &v_global : group_vertices) {
                 const auto v_local = global_to_local_map.at(v_global);
                 const unsigned sub_proc = sub_schedule.assignedProcessor(v_local);
                 const unsigned sub_superstep = sub_schedule.assignedSuperstep(v_local);
diff --git a/include/osp/graph_algorithms/computational_dag_construction_util.hpp b/include/osp/graph_algorithms/computational_dag_construction_util.hpp
index e85217e9..553996a6 100644
--- a/include/osp/graph_algorithms/computational_dag_construction_util.hpp
+++ b/include/osp/graph_algorithms/computational_dag_construction_util.hpp
@@ -34,7 +34,7 @@ namespace osp {
  * @tparam Graph_to The type of the target graph. Must satisfy `is_constructable_cdag_vertex`.
  * @param from The source graph.
  * @param to The target graph.
- */    
+ */
 template<typename Graph_from, typename Graph_to>
 void constructComputationalDag(const Graph_from &from, Graph_to &to) {
     static_assert(is_computational_dag_v<Graph_from>, "Graph_from must satisfy the computational_dag concept");
@@ -46,21 +46,21 @@ void constructComputationalDag(const Graph_from &from, Graph_to &to) {
     for (const auto &v_idx : from.vertices()) {
         if constexpr (has_typed_vertices_v<Graph_from> and has_typed_vertices_v<Graph_to>) {
             vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx),
-                          from.vertex_mem_weight(v_idx), from.vertex_type(v_idx)));
+                                               from.vertex_mem_weight(v_idx), from.vertex_type(v_idx)));
         } else {
             vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx),
-                          from.vertex_mem_weight(v_idx)));
+                                               from.vertex_mem_weight(v_idx)));
         }
     }
 
     if constexpr (has_edge_weights_v<Graph_from> and has_edge_weights_v<Graph_to>) {
         for (const auto &e : edges(from)) {
-            to.add_edge(vertex_map.at(source(e, from)), vertex_map.at(target(e, from)), from.edge_comm_weight(e));
+            to.add_edge(vertex_map[source(e, from)], vertex_map[target(e, from)], from.edge_comm_weight(e));
         }
     } else {
         for (const auto &v : from.vertices()) {
             for (const auto &child : from.children(v)) {
-                to.add_edge(vertex_map.at(v), vertex_map.at(child));
+                to.add_edge(vertex_map[v], vertex_map[child]);
             }
         }
     }
diff --git a/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp b/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp
index 0b67ab30..616aea6b 100644
--- a/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp
@@ -17,6 +17,8 @@ limitations under the License.
 */
 #pragma once
 
+#include <cstddef> // for std::size_t
+
 namespace osp {
 
 /**
@@ -71,17 +73,17 @@ struct cdag_vertex_impl {
 };
 
 /**
- * @brief A vertex implementation with integer weights. Indexed by size_t. Node types are unsigned.
+ * @brief A vertex implementation with integer weights. Indexed by std::size_t. Node types are unsigned.
  *
  * This struct implements a vertex with integer weights for work, communication, and memory.
  */
-using cdag_vertex_impl_int = cdag_vertex_impl<size_t, int, int, int, unsigned>;
+using cdag_vertex_impl_int = cdag_vertex_impl<std::size_t, int, int, int, unsigned>;
 
 /**
- * @brief A vertex implementation with unsigned weights. Indexed by size_t. Node types are unsigned.
+ * @brief A vertex implementation with unsigned weights. Indexed by std::size_t. Node types are unsigned.
  *
  * This struct implements a vertex with unsigned weights for work, communication, and memory.
  */
-using cdag_vertex_impl_unsigned = cdag_vertex_impl<size_t, unsigned, unsigned, unsigned, unsigned>;
+using cdag_vertex_impl_unsigned = cdag_vertex_impl<std::size_t, unsigned, unsigned, unsigned, unsigned>;
 
 } // namespace osp
\ No newline at end of file
diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
index 74340de6..0a1b676a 100644
--- a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
@@ -32,8 +32,8 @@ namespace osp {
  * @brief A vector-based implementation of a computational DAG.
  *
  * This class implements a computational DAG using adjacency lists stored in two std::vectors.
- * It manages the storage of vertices and edges, and provides an interface to query and modify the graph. 
- * 
+ * It manages the storage of vertices and edges, and provides an interface to query and modify the graph.
+ *
  * This class satisfies the following concepts:
  * - `is_computational_dag_typed_vertices`
  * - `is_directed_graph`
@@ -78,9 +78,8 @@ class computational_dag_vector_impl {
     explicit computational_dag_vector_impl(const vertex_idx num_vertices)
         : vertices_(num_vertices), out_neigbors(num_vertices), in_neigbors(num_vertices), num_edges_(0),
           num_vertex_types_(0) {
-
         for (vertex_idx i = 0; i < num_vertices; ++i) {
-            vertices_.at(i).id = i;
+            vertices_[i].id = i;
         }
     }
 
@@ -98,9 +97,7 @@ class computational_dag_vector_impl {
      */
     template<typename Graph_t>
     explicit computational_dag_vector_impl(const Graph_t &other) {
-
         static_assert(is_computational_dag_v<Graph_t>, "Graph_t must satisfy the is_computation_dag concept");
-
         constructComputationalDag(other, *this);
     }
 
@@ -150,40 +147,40 @@ class computational_dag_vector_impl {
     [[nodiscard]] vertex_idx num_edges() const { return num_edges_; }
 
     /**
-     * @brief Returns the parents (in-neighbors) of a vertex.
+     * @brief Returns the parents (in-neighbors) of a vertex. Does not perform bounds checking.
      * @param v The vertex index.
      */
-    [[nodiscard]] const std::vector<vertex_idx> &parents(const vertex_idx v) const { return in_neigbors.at(v); }
+    [[nodiscard]] const std::vector<vertex_idx> &parents(const vertex_idx v) const { return in_neigbors[v]; }
 
     /**
-     * @brief Returns the children (out-neighbors) of a vertex.
+     * @brief Returns the children (out-neighbors) of a vertex. Does not perform bounds checking.
      * @param v The vertex index.
      */
-    [[nodiscard]] const std::vector<vertex_idx> &children(const vertex_idx v) const { return out_neigbors.at(v); }
+    [[nodiscard]] const std::vector<vertex_idx> &children(const vertex_idx v) const { return out_neigbors[v]; }
 
     /**
-     * @brief Returns the in-degree of a vertex.
+     * @brief Returns the in-degree of a vertex. Does not perform bounds checking.
      * @param v The vertex index.
      */
-    [[nodiscard]] vertex_idx in_degree(const vertex_idx v) const { return static_cast<vertex_idx>(in_neigbors.at(v).size()); }
+    [[nodiscard]] vertex_idx in_degree(const vertex_idx v) const { return static_cast<vertex_idx>(in_neigbors[v].size()); }
 
     /**
-     * @brief Returns the out-degree of a vertex.
+     * @brief Returns the out-degree of a vertex. Does not perform bounds checking.
      * @param v The vertex index.
      */
-    [[nodiscard]] vertex_idx out_degree(const vertex_idx v) const { return static_cast<vertex_idx>(out_neigbors.at(v).size()); }
+    [[nodiscard]] vertex_idx out_degree(const vertex_idx v) const { return static_cast<vertex_idx>(out_neigbors[v].size()); }
 
-    [[nodiscard]] vertex_work_weight_type vertex_work_weight(const vertex_idx v) const { return vertices_.at(v).work_weight; }
+    [[nodiscard]] vertex_work_weight_type vertex_work_weight(const vertex_idx v) const { return vertices_[v].work_weight; }
 
-    [[nodiscard]] vertex_comm_weight_type vertex_comm_weight(const vertex_idx v) const { return vertices_.at(v).comm_weight; }
+    [[nodiscard]] vertex_comm_weight_type vertex_comm_weight(const vertex_idx v) const { return vertices_[v].comm_weight; }
 
-    [[nodiscard]] vertex_mem_weight_type vertex_mem_weight(const vertex_idx v) const { return vertices_.at(v).mem_weight; }
+    [[nodiscard]] vertex_mem_weight_type vertex_mem_weight(const vertex_idx v) const { return vertices_[v].mem_weight; }
 
-    [[nodiscard]] vertex_type_type vertex_type(const vertex_idx v) const { return vertices_.at(v).vertex_type; }
+    [[nodiscard]] vertex_type_type vertex_type(const vertex_idx v) const { return vertices_[v].vertex_type; }
 
     [[nodiscard]] vertex_type_type num_vertex_types() const { return num_vertex_types_; }
 
-    [[nodiscard]] const v_impl &get_vertex_impl(const vertex_idx v) const { return vertices_.at(v); }
+    [[nodiscard]] const v_impl &get_vertex_impl(const vertex_idx v) const { return vertices_[v]; }
 
     /**
      * @brief Adds a new isolated vertex to the graph.
@@ -196,7 +193,6 @@ class computational_dag_vector_impl {
      */
     vertex_idx add_vertex(const vertex_work_weight_type work_weight, const vertex_comm_weight_type comm_weight,
                           const vertex_mem_weight_type mem_weight, const vertex_type_type vertex_type = 0) {
-
         vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type);
         out_neigbors.push_back({});
         in_neigbors.push_back({});
@@ -231,7 +227,6 @@ class computational_dag_vector_impl {
      * @return True if the edge was added, false if it already exists or vertices are invalid.
      */
     bool add_edge(const vertex_idx source, const vertex_idx target) {
-
         if (source >= static_cast<vertex_idx>(vertices_.size()) || target >= static_cast<vertex_idx>(vertices_.size()) || source == target)
             return false;
 
@@ -240,7 +235,7 @@ class computational_dag_vector_impl {
             return false;
         }
 
-        out_neigbors.at(source).push_back(target);
+        out_neigbors[source].push_back(target);
         in_neigbors.at(target).push_back(source);
         num_edges_++;
 
@@ -267,7 +262,6 @@ using computational_dag_vector_impl_def_t = computational_dag_vector_impl<cdag_v
  */
 using computational_dag_vector_impl_def_int_t = computational_dag_vector_impl<cdag_vertex_impl_int>;
 
-
 static_assert(is_directed_graph_edge_desc_v<computational_dag_vector_impl<cdag_vertex_impl_unsigned>>,
               "computational_dag_vector_impl must satisfy the directed_graph_edge_desc concept");
 
diff --git a/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp b/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp
index 1deadcee..3ab94872 100644
--- a/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp
@@ -80,8 +80,8 @@ class dag_vector_adapter {
     dag_vector_adapter(const std::vector<std::vector<index_t>> &out_neigbors_,
                        const std::vector<std::vector<index_t>> &in_neigbors_) : vertices_(out_neigbors_.size()), out_neigbors(&out_neigbors_), in_neigbors(&in_neigbors_), num_edges_(0), num_vertex_types_(1) {
         for (vertex_idx i = 0; i < static_cast<vertex_idx>(out_neigbors_.size()); ++i) {
-            vertices_.at(i).id = i;
-            num_edges_ += out_neigbors_.at(i).size();
+            vertices_[i].id = i;
+            num_edges_ += out_neigbors_[i].size();
         }
     }
 
@@ -107,8 +107,8 @@ class dag_vector_adapter {
 
         num_edges_ = 0;
         for (vertex_idx i = 0; i < static_cast<vertex_idx>(out_neigbors->size()); ++i) {
-            vertices_.at(i).id = i;
-            num_edges_ += out_neigbors->at(i).size();
+            vertices_[i].id = i;
+            num_edges_ += out_neigbors_[i].size();
         }
 
         num_vertex_types_ = 1;
@@ -130,40 +130,40 @@ class dag_vector_adapter {
     [[nodiscard]] vertex_idx num_edges() const { return static_cast<vertex_idx>(num_edges_); }
 
     /**
-     * @brief Returns a view of the parents (in-neighbors) of a vertex.
+     * @brief Returns a view of the parents (in-neighbors) of a vertex. Does not perform bounds checking.
      * @param v The vertex index.
      */
-    [[nodiscard]] auto parents(const vertex_idx v) const { return vector_cast_view<index_t, vertex_idx>(in_neigbors->at(v)); }
+    [[nodiscard]] auto parents(const vertex_idx v) const { return vector_cast_view<index_t, vertex_idx>((*in_neigbors)[v]); }
 
     /**
-     * @brief Returns a view of the children (out-neighbors) of a vertex.
+     * @brief Returns a view of the children (out-neighbors) of a vertex. Does not perform bounds checking.
      * @param v The vertex index.
      */
-    [[nodiscard]] auto children(const vertex_idx v) const { return vector_cast_view<index_t, vertex_idx>(out_neigbors->at(v)); }
+    [[nodiscard]] auto children(const vertex_idx v) const { return vector_cast_view<index_t, vertex_idx>((*out_neigbors)[v]); }
 
     /**
-     * @brief Returns the in-degree of a vertex.
+     * @brief Returns the in-degree of a vertex. Does not perform bounds checking.
      * @param v The vertex index.
      */
-    [[nodiscard]] vertex_idx in_degree(const vertex_idx v) const { return static_cast<vertex_idx>(in_neigbors->at(v).size()); }
+    [[nodiscard]] vertex_idx in_degree(const vertex_idx v) const { return static_cast<vertex_idx>((*in_neigbors)[v].size()); }
 
     /**
-     * @brief Returns the out-degree of a vertex.
+     * @brief Returns the out-degree of a vertex. Does not perform bounds checking.
      * @param v The vertex index.
      */
-    [[nodiscard]] vertex_idx out_degree(const vertex_idx v) const { return static_cast<vertex_idx>(out_neigbors->at(v).size()); }
+    [[nodiscard]] vertex_idx out_degree(const vertex_idx v) const { return static_cast<vertex_idx>((*out_neigbors)[v].size()); }
 
-    [[nodiscard]] vertex_work_weight_type vertex_work_weight(const vertex_idx v) const { return vertices_.at(v).work_weight; }
+    [[nodiscard]] vertex_work_weight_type vertex_work_weight(const vertex_idx v) const { return vertices_[v].work_weight; }
 
-    [[nodiscard]] vertex_comm_weight_type vertex_comm_weight(const vertex_idx v) const { return vertices_.at(v).comm_weight; }
+    [[nodiscard]] vertex_comm_weight_type vertex_comm_weight(const vertex_idx v) const { return vertices_[v].comm_weight; }
 
-    [[nodiscard]] vertex_mem_weight_type vertex_mem_weight(const vertex_idx v) const { return vertices_.at(v).mem_weight; }
+    [[nodiscard]] vertex_mem_weight_type vertex_mem_weight(const vertex_idx v) const { return vertices_[v].mem_weight; }
 
-    [[nodiscard]] vertex_type_type vertex_type(const vertex_idx v) const { return vertices_.at(v).vertex_type; }
+    [[nodiscard]] vertex_type_type vertex_type(const vertex_idx v) const { return vertices_[v].vertex_type; }
 
     [[nodiscard]] vertex_type_type num_vertex_types() const { return num_vertex_types_; }
 
-    [[nodiscard]] const v_impl &get_vertex_impl(const vertex_idx v) const { return vertices_.at(v); }
+    [[nodiscard]] const v_impl &get_vertex_impl(const vertex_idx v) const { return vertices_[v]; }
 
     void set_vertex_work_weight(const vertex_idx v, const vertex_work_weight_type work_weight) {
         vertices_.at(v).work_weight = work_weight;
@@ -192,7 +192,6 @@ class dag_vector_adapter {
     unsigned num_vertex_types_ = 0;
 };
 
-
 static_assert(is_directed_graph_edge_desc_v<dag_vector_adapter<cdag_vertex_impl_unsigned, int>>,
               "dag_vector_adapter must satisfy the directed_graph_edge_desc concept");
 
diff --git a/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp b/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp
index e8fbe586..b42ea17d 100644
--- a/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp
@@ -141,7 +141,7 @@ class vector_cast_view {
      * @param i The index of the element to access.
      * @return The element at index i, cast to to_t.
      */
-    [[nodiscard]] auto operator[](std::size_t i) const { return static_cast<to_t>(vec.at(i)); }
+    [[nodiscard]] auto operator[](std::size_t i) const { return static_cast<to_t>(vec[i]); }
 };
 
 } // namespace osp
\ No newline at end of file
diff --git a/include/osp/partitioning/partitioners/partitioning_ILP.hpp b/include/osp/partitioning/partitioners/partitioning_ILP.hpp
index 0482d936..2e6c4e0e 100644
--- a/include/osp/partitioning/partitioners/partitioning_ILP.hpp
+++ b/include/osp/partitioning/partitioners/partitioning_ILP.hpp
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
@@ -21,40 +21,39 @@ limitations under the License.
 #include <callbackbase.h>
 #include <coptcpp_pch.h>
 
-#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp"
+#include "osp/auxiliary/return_status.hpp"
 #include "osp/partitioning/model/partitioning.hpp"
+#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp"
 
-namespace osp{
+namespace osp {
 
 template<typename hypergraph_t>
 class HypergraphPartitioningILP : public HypergraphPartitioningILPBase<hypergraph_t> {
 
   protected:
-    std::vector<unsigned> readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model& model);
+    std::vector<unsigned> readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model &model);
 
-    void setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model& model);
+    void setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model &model);
 
-    void setInitialSolution(const Partitioning<hypergraph_t> &partition, Model& model);
+    void setInitialSolution(const Partitioning<hypergraph_t> &partition, Model &model);
 
   public:
-
     virtual ~HypergraphPartitioningILP() override = default;
 
-    RETURN_STATUS computePartitioning(Partitioning<hypergraph_t>& result);
+    RETURN_STATUS computePartitioning(Partitioning<hypergraph_t> &result);
 
     virtual std::string getAlgorithmName() const override { return "HypergraphPartitioningILP"; }
 };
 
 template<typename hypergraph_t>
-RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Partitioning<hypergraph_t>& result)
-{
+RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Partitioning<hypergraph_t> &result) {
     Envr env;
     Model model = env.CreateModel("HypergraphPart");
 
     this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model);
     setupExtraVariablesConstraints(result.getInstance(), model);
 
-    if(this->use_initial_solution)
+    if (this->use_initial_solution)
         setInitialSolution(result, model);
 
     this->solveILP(model);
@@ -82,7 +81,7 @@ RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Parti
 }
 
 template<typename hypergraph_t>
-void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model& model) {
+void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model &model) {
 
     using index_type = typename hypergraph_t::vertex_idx;
 
@@ -104,19 +103,17 @@ void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(con
     // hyperedge indicators match node variables
     for (unsigned part = 0; part < numberOfParts; part++)
         for (index_type node = 0; node < numberOfVertices; node++)
-            for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
+            for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
                 model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part)] >= this->node_in_partition[node][static_cast<int>(part)]);
-             
 }
 
 // convert generic one-to-many assingment (of base class function) to one-to-one
 template<typename hypergraph_t>
-std::vector<unsigned> HypergraphPartitioningILP<hypergraph_t>::readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model& model)
-{
+std::vector<unsigned> HypergraphPartitioningILP<hypergraph_t>::readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
     std::vector<unsigned> node_to_partition(instance.getHypergraph().num_vertices(), std::numeric_limits<unsigned>::max());
-    std::vector<std::vector<unsigned> > assignmentsGenericForm = this->readAllCoptAssignments(instance, model);
+    std::vector<std::vector<unsigned>> assignmentsGenericForm = this->readAllCoptAssignments(instance, model);
 
     for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++)
         node_to_partition[node] = assignmentsGenericForm[node].front();
@@ -125,21 +122,19 @@ std::vector<unsigned> HypergraphPartitioningILP<hypergraph_t>::readCoptAssignmen
 }
 
 template<typename hypergraph_t>
-void HypergraphPartitioningILP<hypergraph_t>::setInitialSolution(const Partitioning<hypergraph_t> &partition,  Model& model)
-{
+void HypergraphPartitioningILP<hypergraph_t>::setInitialSolution(const Partitioning<hypergraph_t> &partition, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
-    const std::vector<unsigned>& assignment = partition.assignedPartitions();
-    const unsigned& numPartitions = partition.getInstance().getNumberOfPartitions();
-    if(assignment.size() != partition.getInstance().getHypergraph().num_vertices())
+    const std::vector<unsigned> &assignment = partition.assignedPartitions();
+    const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions();
+    if (assignment.size() != partition.getInstance().getHypergraph().num_vertices())
         return;
 
-    for(index_type node = 0; node < assignment.size(); ++node)
-    {
-        if(assignment[node] >= numPartitions)
+    for (index_type node = 0; node < assignment.size(); ++node) {
+        if (assignment[node] >= numPartitions)
             continue;
-        
-        for(unsigned part = 0; part < numPartitions; ++part)
+
+        for (unsigned part = 0; part < numPartitions; ++part)
             model.SetMipStart(this->node_in_partition[node][static_cast<int>(part)], static_cast<int>(assignment[node] == part));
     }
     model.LoadMipStart();
diff --git a/tests/bsp_architecture.cpp b/tests/bsp_architecture.cpp
index af26e034..d803bb56 100644
--- a/tests/bsp_architecture.cpp
+++ b/tests/bsp_architecture.cpp
@@ -19,8 +19,8 @@ limitations under the License.
 #define BOOST_TEST_MODULE Bsp_Architecture
 #include <boost/test/unit_test.hpp>
 
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/bsp/model/BspArchitecture.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 
 using namespace osp;
 
@@ -61,18 +61,18 @@ BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) {
 
     BOOST_CHECK_EQUAL(architecture.maxMemoryBoundProcType(0), 100);
 
-    BOOST_TEST(architecture.sendCostMatrix() == uniform_sent_costs);
+    BOOST_TEST(architecture.sendCost() == uniform_sent_costs);
 
     std::vector<std::vector<int>> expectedSendCosts = {{0, 2, 2, 2}, {2, 0, 2, 2}, {2, 2, 0, 2}, {2, 2, 2, 0}};
 
-    architecture.setSendCosts(expectedSendCosts);
-    BOOST_TEST(architecture.sendCostMatrix() == expectedSendCosts);
+    architecture.SetSendCosts(expectedSendCosts);
+    BOOST_TEST(architecture.sendCost() == expectedSendCosts);
 
     BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 1), 4);
     BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 0), 0);
 
     architecture.SetUniformSendCost();
-    BOOST_TEST(architecture.sendCostMatrix() == uniform_sent_costs);
+    BOOST_TEST(architecture.sendCost() == uniform_sent_costs);
 
     BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 1), 2);
     BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 0), 0);
@@ -141,8 +141,7 @@ BOOST_AUTO_TEST_CASE(Architecture) {
     }
 
     // constructor
-    std::vector<std::vector<int>> send_costs = {{0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1},
-                                                {1, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
+    std::vector<std::vector<int>> send_costs = {{0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {1, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
 
     BOOST_CHECK_THROW(BspArchitecture<computational_dag_vector_impl_def_int_t> test31(7, 42942, 0, send_costs),
                       std::invalid_argument);
@@ -169,10 +168,8 @@ BOOST_AUTO_TEST_CASE(Architecture) {
     }
 
     // constructor
-    std::vector<std::vector<int>> send_costs2 = {{0, 1, 2, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1},
-                                                 {1, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
-    std::vector<std::vector<int>> send_costs3 = {{0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1},
-                                                 {3, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
+    std::vector<std::vector<int>> send_costs2 = {{0, 1, 2, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {1, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
+    std::vector<std::vector<int>> send_costs3 = {{0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {3, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}};
 
     BspArchitecture<computational_dag_vector_impl_def_int_t> test4(6, 0, 4294965, send_costs2);
     BOOST_CHECK_EQUAL(test4.numberOfProcessors(), 6);
diff --git a/tests/bsp_instance.cpp b/tests/bsp_instance.cpp
index c2b0b02a..101e4b2f 100644
--- a/tests/bsp_instance.cpp
+++ b/tests/bsp_instance.cpp
@@ -19,12 +19,13 @@ limitations under the License.
 #define BOOST_TEST_MODULE Bsp_Architecture
 #include <boost/test/unit_test.hpp>
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
+#include "osp/bsp/model/util/CompatibleProcessorRange.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include <filesystem>
 #include <iostream>
 
@@ -43,7 +44,7 @@ BOOST_AUTO_TEST_CASE(test_1) {
 
     BspArchitecture<computational_dag_vector_impl_def_t> architecture_2(6, 3, 1);
 
-    instance.setArchitecture(architecture_2);
+    instance.getArchitecture() = architecture_2;
 
     BOOST_CHECK_EQUAL(instance.numberOfProcessors(), 6);
     BOOST_CHECK_EQUAL(instance.synchronisationCosts(), 1);
@@ -84,8 +85,7 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
     BOOST_CHECK_EQUAL(instance.isCompatible(0, 0), true);
     BOOST_CHECK_EQUAL(instance.isCompatible(1, 0), false);
 
-
-    compatible_processor_range range(instance);
+    CompatibleProcessorRange range(instance);
 
     BOOST_CHECK_EQUAL(range.compatible_processors_type(0).size(), 3);
     BOOST_CHECK_EQUAL(range.compatible_processors_type(1).size(), 1);
@@ -97,7 +97,6 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
     }
     std::cout << std::endl;
 
-
     std::cout << "Compatible processors type 1: " << std::endl;
 
     for (const auto &p : range.compatible_processors_type(1)) {
@@ -105,7 +104,6 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) {
     }
     std::cout << std::endl;
 
-
     BOOST_CHECK_EQUAL(range.compatible_processors_vertex(0).size(), 1);
     BOOST_CHECK_EQUAL(range.compatible_processors_vertex(1).size(), 3);
     BOOST_CHECK_EQUAL(range.compatible_processors_vertex(2).size(), 3);
diff --git a/tests/coarser.cpp b/tests/coarser.cpp
index e4bd92c3..9c77703d 100644
--- a/tests/coarser.cpp
+++ b/tests/coarser.cpp
@@ -23,24 +23,24 @@ limitations under the License.
 #include <filesystem>
 #include <iostream>
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/CoarseAndSchedule.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/coarser/BspScheduleCoarser.hpp"
-#include "osp/coarser/coarser_util.hpp"
-#include "osp/coarser/funnel/FunnelBfs.hpp"
-#include "osp/coarser/hdagg/hdagg_coarser.hpp"
 #include "osp/coarser/Sarkar/Sarkar.hpp"
 #include "osp/coarser/Sarkar/SarkarMul.hpp"
 #include "osp/coarser/SquashA/SquashA.hpp"
 #include "osp/coarser/SquashA/SquashAMul.hpp"
+#include "osp/coarser/coarser_util.hpp"
+#include "osp/coarser/funnel/FunnelBfs.hpp"
+#include "osp/coarser/hdagg/hdagg_coarser.hpp"
 #include "osp/coarser/top_order/top_order_coarser.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "test_graphs.hpp"
 
 using namespace osp;
@@ -121,14 +121,15 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t = computational_dag_edge_idx_vector_impl_def_t;
 
         BspInstance<graph_t> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -140,7 +141,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) {
         }
 
         BspInstance<graph_t> coarse_instance;
-        coarse_instance.setArchitecture(instance.getArchitecture());
+        coarse_instance.getArchitecture() = instance.getArchitecture();
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -193,7 +194,8 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = computational_dag_vector_impl_def_t;
@@ -201,7 +203,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) {
         BspInstance<graph_t1> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -214,7 +216,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) {
 
         BspInstance<graph_t2> coarse_instance;
         BspArchitecture<graph_t2> architecture_t2(instance.getArchitecture());
-        coarse_instance.setArchitecture(architecture_t2);
+        coarse_instance.getArchitecture() = architecture_t2;
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -265,14 +267,15 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t = computational_dag_edge_idx_vector_impl_def_t;
 
         BspInstance<graph_t> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -284,7 +287,7 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) {
         }
 
         BspInstance<graph_t> coarse_instance;
-        coarse_instance.setArchitecture(instance.getArchitecture());
+        coarse_instance.getArchitecture() = instance.getArchitecture();
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -345,12 +348,13 @@ void test_coarser_same_graph(Coarser<graph_t, graph_t> &coarser) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         BspInstance<graph_t> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -362,17 +366,15 @@ void test_coarser_same_graph(Coarser<graph_t, graph_t> &coarser) {
         }
 
         BspInstance<graph_t> coarse_instance;
-        coarse_instance.setArchitecture(instance.getArchitecture());
+        coarse_instance.getArchitecture() = instance.getArchitecture();
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
         GreedyBspScheduler<graph_t> scheduler;
 
-
         bool coarse_success = coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map);
         BOOST_CHECK(coarse_success);
 
-
         vertex_map = coarser_util::invert_vertex_contraction_map<graph_t, graph_t>(reverse_vertex_map);
 
         BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices()));
@@ -446,27 +448,20 @@ BOOST_AUTO_TEST_CASE(squashA_test) {
     SquashA<graph_t, graph_t> coarser(params);
 
     test_coarser_same_graph<graph_t>(coarser);
-    
-    
+
     params.mode = SquashAParams::Mode::TRIANGLES;
     params.use_structured_poset = true;
     params.use_top_poset = true;
     coarser.setParams(params);
-    
+
     test_coarser_same_graph<graph_t>(coarser);
 
     params.use_top_poset = false;
     coarser.setParams(params);
-    
+
     test_coarser_same_graph<graph_t>(coarser);
 }
 
-
-
-
-
-
-
 BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
@@ -484,7 +479,8 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = CSG;
@@ -492,7 +488,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
         BspInstance<graph_t1> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -505,7 +501,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
 
         BspInstance<graph_t2> coarse_instance;
         BspArchitecture<graph_t2> architecture_t2(instance.getArchitecture());
-        coarse_instance.setArchitecture(architecture_t2);
+        coarse_instance.getArchitecture() = architecture_t2;
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -560,7 +556,8 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = CSGE;
@@ -568,7 +565,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
         BspInstance<graph_t1> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -581,7 +578,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
 
         BspInstance<graph_t2> coarse_instance;
         BspArchitecture<graph_t2> architecture_t2(instance.getArchitecture());
-        coarse_instance.setArchitecture(architecture_t2);
+        coarse_instance.getArchitecture() = architecture_t2;
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -619,13 +616,6 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
     }
 }
 
-
-
-
-
-
-
-
 BOOST_AUTO_TEST_CASE(Sarkar_test) {
     using graph_t = computational_dag_edge_idx_vector_impl_def_t;
     // using graph_t = computational_dag_vector_impl_def_t;
@@ -639,58 +629,47 @@ BOOST_AUTO_TEST_CASE(Sarkar_test) {
 
     test_coarser_same_graph<graph_t>(coarser);
 
-    
     params.useTopPoset = false;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
-    
-    
+
     params.mode = SarkarParams::Mode::FAN_IN_FULL;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-    
     params.mode = SarkarParams::Mode::FAN_IN_PARTIAL;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-    
     params.mode = SarkarParams::Mode::FAN_OUT_FULL;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::LEVEL_EVEN;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
-    
-    
+
     params.mode = SarkarParams::Mode::LEVEL_ODD;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::FAN_IN_BUFFER;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::FAN_OUT_BUFFER;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 }
 
-
 BOOST_AUTO_TEST_CASE(SarkarML_test) {
     using graph_t = computational_dag_edge_idx_vector_impl_def_t;
     // using graph_t = computational_dag_vector_impl_def_t;
@@ -723,6 +702,6 @@ BOOST_AUTO_TEST_CASE(SquashAML_test) {
     // using graph_t = computational_dag_vector_impl_def_t;
 
     SquashAMul<graph_t, graph_t> coarser;
-    
+
     test_coarser_same_graph<graph_t>(coarser);
 }
\ No newline at end of file
diff --git a/tests/debug_merkle_divider.cpp b/tests/debug_merkle_divider.cpp
index bf3bd1b5..5763d840 100644
--- a/tests/debug_merkle_divider.cpp
+++ b/tests/debug_merkle_divider.cpp
@@ -16,24 +16,23 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-#include <iostream>
-#include "osp/auxiliary/io/dot_graph_file_reader.hpp"
 #include "osp/auxiliary/io/DotFileWriter.hpp"
+#include "osp/auxiliary/io/dot_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp"
-#include "osp/bsp/scheduler/Serial.hpp"
-#include "osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp"
+#include "osp/bsp/scheduler/Serial.hpp"
 #include "osp/coarser/coarser_util.hpp"
 #include "osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
+#include <iostream>
 
 using namespace osp;
 
-
 template<typename GraphT>
-void check_partition_type_homogeneity(const GraphT& dag, const std::vector<vertex_idx_t<GraphT>>& partition) {
+void check_partition_type_homogeneity(const GraphT &dag, const std::vector<vertex_idx_t<GraphT>> &partition) {
     // Group partitions by their ID
     std::map<vertex_idx_t<GraphT>, std::vector<vertex_idx_t<GraphT>>> partitions;
     for (vertex_idx_t<GraphT> i = 0; i < dag.num_vertices(); ++i) {
@@ -41,19 +40,20 @@ void check_partition_type_homogeneity(const GraphT& dag, const std::vector<verte
     }
 
     // For each partition, check that all vertices have the same type
-    for (const auto& [part_id, vertices] : partitions) {
-        if (vertices.empty()) continue;
+    for (const auto &[part_id, vertices] : partitions) {
+        if (vertices.empty())
+            continue;
         const auto first_node_type = dag.vertex_type(vertices[0]);
-        for (const auto& vertex : vertices) {
+        for (const auto &vertex : vertices) {
             if (dag.vertex_type(vertex) != first_node_type) {
                 std::cerr << "Partition " << part_id << " contains vertices with different types." << std::endl;
                 return;
-            } 
+            }
         }
     }
 }
 
-int main(int argc, char* argv[]) {
+int main(int argc, char *argv[]) {
     if (argc < 2) {
         std::cerr << "Usage: " << argv[0] << " <path_to_dot_file>" << std::endl;
         return 1;
@@ -76,15 +76,12 @@ int main(int argc, char* argv[]) {
         instance.getComputationalDag().set_vertex_comm_weight(v, static_cast<v_commw_t<graph_t2>>(instance.getComputationalDag().vertex_comm_weight(v) * 0.01));
     }
 
-
     // Set up architecture
-    instance.getArchitecture().set_processors_consequ_types({24,48},{100,100});
+    instance.getArchitecture().SetProcessorsConsequTypes({24, 48}, {100, 100});
     instance.setDiagonalCompatibilityMatrix(2);
     instance.setSynchronisationCosts(2000);
     instance.setCommunicationCosts(1);
 
-    
-
     // Set up the scheduler
     GrowLocalAutoCores<graph_t> growlocal;
     BspLocking<graph_t> locking;
@@ -95,9 +92,9 @@ int main(int argc, char* argv[]) {
     ComboScheduler<graph_t> growlocal_kl(growlocal, kl);
     ComboScheduler<graph_t> locking_kl(locking, kl);
     ComboScheduler<graph_t> children_kl(children, kl);
- 
+
     GreedyMetaScheduler<graph_t> scheduler;
-    //scheduler.addScheduler(growlocal_kl);
+    // scheduler.addScheduler(growlocal_kl);
     scheduler.addScheduler(locking_kl);
     scheduler.addScheduler(children_kl);
     scheduler.addSerialScheduler();
@@ -120,7 +117,7 @@ int main(int argc, char* argv[]) {
     graph_t corase_graph;
     coarser_util::construct_coarse_dag(instance.getComputationalDag(), corase_graph, partition);
     bool acyc = is_acyclic(corase_graph);
-    std::cout << "Partition is " << (acyc ? "acyclic." : "not acyclic."); 
+    std::cout << "Partition is " << (acyc ? "acyclic." : "not acyclic.");
 
     std::cout << "Partition computation finished." << std::endl;
     std::cout << "Generated " << std::set<vertex_idx_t<graph_t>>(partition.begin(), partition.end()).size() << " partitions." << std::endl;
diff --git a/tests/kl_bsp_improver_test.cpp b/tests/kl_bsp_improver_test.cpp
index df3ac3f1..6e1611ec 100644
--- a/tests/kl_bsp_improver_test.cpp
+++ b/tests/kl_bsp_improver_test.cpp
@@ -152,7 +152,7 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) {
 
 //         std::vector<std::vector<int>> send_cost = {{0, 1, 4, 4}, {1, 0, 4, 4}, {4, 4, 0, 1}, {4, 4, 1, 0}};
 
-//         instance.getArchitecture().setSendCosts(send_cost);
+//         instance.getArchitecture().SetSendCosts(send_cost);
 
 //         if (!status_graph) {
 
diff --git a/tests/kl_lambda.cpp b/tests/kl_lambda.cpp
index a7f40cf4..31f86130 100644
--- a/tests/kl_lambda.cpp
+++ b/tests/kl_lambda.cpp
@@ -25,14 +25,14 @@ limitations under the License.
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp"
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
+#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp"
-#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "test_graphs.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "test_graphs.hpp"
 
 using namespace osp;
 
@@ -56,37 +56,38 @@ void add_node_types(Graph_t &dag) {
 
     for (const auto &v : dag.vertices()) {
         dag.set_vertex_type(v, node_type++ % 2);
-    }    
+    }
 }
 
 template<typename table_t>
-void check_equal_affinity_table(table_t & table_1, table_t & table_2, const std::set<size_t> & nodes) {
+void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::set<size_t> &nodes) {
 
-    for ( auto i : nodes) {
+    for (auto i : nodes) {
         BOOST_CHECK_EQUAL(table_1[i].size(), table_2[i].size());
-        if (table_1[i].size() != table_2[i].size()) continue;
+        if (table_1[i].size() != table_2[i].size())
+            continue;
         for (size_t j = 0; j < table_1[i].size(); ++j) {
             BOOST_CHECK_EQUAL(table_1[i][j].size(), table_2[i][j].size());
-            if (table_1[i][j].size() != table_2[i][j].size()) continue;
+            if (table_1[i][j].size() != table_2[i][j].size())
+                continue;
             for (size_t k = 0; k < table_1[i][j].size(); ++k) {
                 BOOST_CHECK(std::abs(table_1[i][j][k] - table_2[i][j][k]) < 0.000001);
 
-                if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) {                   
-                    std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] << ", table_2=" << table_2[i][j][k] << std::endl;                   
-
+                if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) {
+                    std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] << ", table_2=" << table_2[i][j][k] << std::endl;
                 }
             }
         }
     }
 }
 
-void check_equal_lambda_map(const std::vector<std::map<unsigned,unsigned>> & map_1, const std::vector<std::map<unsigned,unsigned>> & map_2) {
+void check_equal_lambda_map(const std::vector<std::map<unsigned, unsigned>> &map_1, const std::vector<std::map<unsigned, unsigned>> &map_2) {
     BOOST_CHECK_EQUAL(map_1.size(), map_2.size());
     if (map_1.size() != map_2.size())
         return;
 
     for (size_t i = 0; i < map_1.size(); ++i) {
-        for (const auto & [key, value] : map_1[i]) {
+        for (const auto &[key, value] : map_1[i]) {
             BOOST_CHECK_EQUAL(value, map_2[i].at(key));
 
             if (value != map_2[i].at(key)) {
@@ -117,7 +118,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) {
         BspInstance<graph> instance;
 
         bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                                              instance.getComputationalDag());
 
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
@@ -134,7 +135,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) {
         add_mem_weights(instance.getComputationalDag());
         add_node_types(instance.getComputationalDag());
 
-        instance.getArchitecture().setProcessorsWithTypes({0,0,1,1});
+        instance.getArchitecture().setProcessorsWithTypes({0, 0, 1, 1});
 
         instance.setDiagonalCompatibilityMatrix(2);
 
@@ -147,18 +148,15 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) {
         BOOST_CHECK(schedule.satisfiesNodeTypeConstraints());
 
         kl_total_lambda_comm_improver<graph, no_local_search_memory_constraint, 1> kl;
-        
+
         auto status = kl.improveSchedule(schedule);
 
         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
         BOOST_CHECK(schedule.satisfiesPrecedenceConstraints());
         BOOST_CHECK(schedule.satisfiesNodeTypeConstraints());
-        
     }
 }
 
-
-
 BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
 
     std::vector<std::string> filenames_graph = test_graphs();
@@ -180,7 +178,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
         BspInstance<graph> instance;
 
         bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                                              instance.getComputationalDag());
 
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
@@ -204,7 +202,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
         BOOST_CHECK(schedule.satisfiesPrecedenceConstraints());
 
         kl_total_lambda_comm_improver<graph, no_local_search_memory_constraint, 1> kl;
-        
+
         auto status = kl.improveSchedule(schedule);
 
         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
@@ -252,11 +250,11 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
 //     schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3});
 
 //     schedule.updateNumberOfSupersteps();
-    
-//     using cost_f = kl_hyper_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1>;  
+
+//     using cost_f = kl_hyper_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1>;
 //     using kl_improver_test = kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double>;
 //     kl_improver_test kl;
-    
+
 //     kl.setup_schedule(schedule);
 
 //     auto &kl_active_schedule = kl.get_active_schedule();
@@ -269,7 +267,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
 //     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0);
 //     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0);
 //     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0);
-        
+
 //     BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4);
 //     BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true);
 
@@ -369,7 +367,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
 //     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
 //     using VertexType = graph::vertex_idx;
 //     using kl_move = kl_move_struct<double, VertexType>;
-    
 
 //     graph dag;
 
@@ -401,11 +398,11 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
 //     schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3});
 
 //     schedule.updateNumberOfSupersteps();
-    
-//     using cost_f = kl_hyper_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1>; 
+
+//     using cost_f = kl_hyper_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1>;
 //     using kl_improver_test = kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double>;
 //     kl_improver_test kl;
-    
+
 //     kl.setup_schedule(schedule);
 
 //     auto &kl_active_schedule = kl.get_active_schedule();
@@ -418,7 +415,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) {
 //     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0);
 //     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0);
 //     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0);
-        
+
 //      auto node_selection = kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7});
 
 //     std::set<VertexType> nodes_to_check = {0, 1, 2, 3, 4, 5, 6, 7};
@@ -533,11 +530,10 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) {
 
     schedule.updateNumberOfSupersteps();
 
-    
-    using cost_f = kl_hyper_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1>; 
+    using cost_f = kl_hyper_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1>;
     using kl_improver_test = kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double>;
     kl_improver_test kl;
-    
+
     kl.setup_schedule(schedule);
 
     auto &kl_active_schedule = kl.get_active_schedule();
@@ -550,48 +546,47 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) {
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0);
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0);
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0);
-        
+
     BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4);
     BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true);
 
-    auto node_selection = kl.insert_gain_heap_test_penalty({2,3});
+    auto node_selection = kl.insert_gain_heap_test_penalty({2, 3});
 
     auto recompute_max_gain = kl.run_inner_iteration_test(); // best move 3
-    std::cout << "------------------------recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "------------------------recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
-    std::cout << "}" << std::endl; 
+    }
+    std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test(); // best move 0
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test(); // best move 1
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
-   
+
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
-
 }
 
 BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
@@ -629,27 +624,27 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
     schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3});
 
     schedule.updateNumberOfSupersteps();
-    
-    using cost_f = kl_hyper_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1>; 
+
+    using cost_f = kl_hyper_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1>;
     using kl_improver_test = kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double>;
     kl_improver_test kl;
-    
+
     kl.setup_schedule(schedule);
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
-    auto node_selection = kl.insert_gain_heap_test_penalty({7}); 
+    auto node_selection = kl.insert_gain_heap_test_penalty({7});
 
     auto recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "-----------recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "-----------recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
-    std::cout << "}" << std::endl; 
+    }
+    std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
-        
-    auto& lambda_map = kl.get_comm_cost_f().node_lambda_map;
+
+    auto &lambda_map = kl.get_comm_cost_f().node_lambda_map;
 
     BOOST_CHECK(lambda_map.get_proc_entry(v1, 0) == 2);
     BOOST_CHECK(lambda_map.get_proc_entry(v1, 1) == 1);
@@ -669,32 +664,31 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
     BOOST_CHECK(lambda_map.has_no_proc_entry(v8, 0));
 
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
-
 }
 
 // BOOST_AUTO_TEST_CASE(kl_lambda_total_comm_large_test_graphs) {
@@ -708,7 +702,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //         std::cout << cwd << std::endl;
 //     }
 
-   
 //     for (auto &filename_graph : filenames_graph) {
 //         GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 //         BspInstance<graph> instance;
@@ -724,7 +717,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //                                                    {4,4,0,1},
 //                                                    {4,4,1,0}};
 
-//         instance.getArchitecture().setSendCosts(send_cost);
+//         instance.getArchitecture().SetSendCosts(send_cost);
 
 //         if (!status_graph) {
 
@@ -752,7 +745,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //         auto status = kl.improveSchedule(schedule);
 //         auto finish_time = std::chrono::high_resolution_clock::now();
 //         auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
-        
+
 //         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
@@ -763,18 +756,17 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //         // start_time = std::chrono::high_resolution_clock::now();
 //         // status = kl_old.improve_schedule_test_2(schedule_2);
 //         // finish_time = std::chrono::high_resolution_clock::now();
-        
+
 //         // duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
 //         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
-        
+
 //         // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true);
 
 //     }
 // }
 
-
 // BOOST_AUTO_TEST_CASE(kl_lambda_total_comm_large_test_graphs_mt) {
 //     std::vector<std::string> filenames_graph = large_spaa_graphs();
 //     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -786,7 +778,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //         std::cout << cwd << std::endl;
 //     }
 
-   
 //     for (auto &filename_graph : filenames_graph) {
 //         GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 //         BspInstance<graph> instance;
@@ -802,7 +793,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //                                                    {4,4,0,1},
 //                                                    {4,4,1,0}};
 
-//         instance.getArchitecture().setSendCosts(send_cost);
+//         instance.getArchitecture().SetSendCosts(send_cost);
 
 //         if (!status_graph) {
 
@@ -830,7 +821,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //         auto status = kl.improveSchedule(schedule);
 //         auto finish_time = std::chrono::high_resolution_clock::now();
 //         auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
-        
+
 //         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
@@ -841,11 +832,11 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) {
 //         // start_time = std::chrono::high_resolution_clock::now();
 //         // status = kl_old.improve_schedule_test_2(schedule_2);
 //         // finish_time = std::chrono::high_resolution_clock::now();
-        
+
 //         // duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
 //         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
-        
+
 //         // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true);
 
diff --git a/tests/kl_total.cpp b/tests/kl_total.cpp
index 5d3d1486..58421144 100644
--- a/tests/kl_total.cpp
+++ b/tests/kl_total.cpp
@@ -22,18 +22,17 @@ limitations under the License.
 
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
+#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp"
 #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp"
-#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "test_graphs.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "test_graphs.hpp"
 
 using namespace osp;
 
-
 template<typename Graph_t>
 void add_mem_weights(Graph_t &dag) {
 
@@ -49,17 +48,16 @@ void add_mem_weights(Graph_t &dag) {
 }
 
 template<typename table_t>
-void check_equal_affinity_table(table_t & table_1, table_t & table_2, const std::set<size_t> & nodes) {
+void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::set<size_t> &nodes) {
     BOOST_CHECK_EQUAL(table_1.size(), table_2.size());
 
-    for ( auto i : nodes) {
+    for (auto i : nodes) {
         for (size_t j = 0; j < table_1[i].size(); ++j) {
             for (size_t k = 0; k < table_1[i][j].size(); ++k) {
                 BOOST_CHECK(std::abs(table_1[i][j][k] - table_2[i][j][k]) < 0.000001);
 
-                if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) {                   
-                    std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] << ", table_2=" << table_2[i][j][k] << std::endl;                   
-
+                if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) {
+                    std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] << ", table_2=" << table_2[i][j][k] << std::endl;
                 }
             }
         }
@@ -102,16 +100,13 @@ BOOST_AUTO_TEST_CASE(kl_improver_smoke_test) {
 
     schedule.updateNumberOfSupersteps();
 
-    
     using kl_improver_t = kl_total_comm_improver<graph, no_local_search_memory_constraint, 1, true>;
     kl_improver_t kl;
-    
-          
+
     auto status = kl.improveSchedule(schedule);
 
     BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
     BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true);
-
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) {
@@ -135,7 +130,7 @@ BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) {
         BspInstance<graph> instance;
 
         bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                                              instance.getComputationalDag());
 
         instance.getArchitecture().setSynchronisationCosts(5);
         instance.getArchitecture().setCommunicationCosts(5);
@@ -147,7 +142,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) {
             BOOST_CHECK(false);
         }
 
-
         add_mem_weights(instance.getComputationalDag());
 
         BspSchedule<graph> schedule(instance);
@@ -158,7 +152,7 @@ BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) {
         BOOST_CHECK(schedule.satisfiesPrecedenceConstraints());
 
         kl_total_comm_improver<graph> kl;
-        
+
         auto status = kl.improveSchedule(schedule);
 
         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
@@ -170,7 +164,7 @@ BOOST_AUTO_TEST_CASE(kl_improver_superstep_removal_test) {
 
     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
     using VertexType = graph::vertex_idx;
-    
+
     graph dag;
 
     const VertexType v1 = dag.add_vertex(2, 9, 2);
@@ -200,13 +194,13 @@ BOOST_AUTO_TEST_CASE(kl_improver_superstep_removal_test) {
     // Create a schedule with an almost empty superstep (step 1)
     schedule.setAssignedProcessors({0, 0, 0, 0, 1, 1, 1, 1});
     schedule.setAssignedSupersteps({0, 0, 0, 0, 1, 2, 2, 2});
-    
+
     schedule.updateNumberOfSupersteps();
     unsigned original_steps = schedule.numberOfSupersteps();
-    
-    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>; 
+
+    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>;
     kl_improver<graph, cost_f, no_local_search_memory_constraint, 1, double> kl;
-    
+
     auto status = kl.improveSchedule(schedule);
 
     BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
@@ -250,11 +244,10 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) {
 
     schedule.updateNumberOfSupersteps();
 
-    
-    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>; 
+    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>;
     using kl_improver_test = kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double>;
     kl_improver_test kl;
-    
+
     kl.setup_schedule(schedule);
 
     auto &kl_active_schedule = kl.get_active_schedule();
@@ -267,13 +260,13 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) {
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0);
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0);
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0);
-        
+
     BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4);
     BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true);
 
-    auto node_selection = kl.insert_gain_heap_test_penalty({2,3});
+    auto node_selection = kl.insert_gain_heap_test_penalty({2, 3});
 
-    auto& affinity = kl.get_affinity_table();
+    auto &affinity = kl.get_affinity_table();
 
     BOOST_CHECK_CLOSE(affinity[v3][0][0], 5.5, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v3][0][1], 4.0, 0.00001);
@@ -290,41 +283,40 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) {
     BOOST_CHECK_CLOSE(affinity[v4][1][2], -3.5, 0.00001);
 
     auto recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "------------------------recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "------------------------recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
-    std::cout << "}" << std::endl; 
+    }
+    std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
-        
+
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
-
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_penalty_test) {
@@ -363,55 +355,53 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_penalty_test) {
 
     schedule.updateNumberOfSupersteps();
 
-    
-    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>; 
+    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>;
     using kl_improver_test = kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double>;
     kl_improver_test kl;
-    
+
     kl.setup_schedule(schedule);
 
-    //auto &kl_active_schedule = kl.get_active_schedule();
+    // auto &kl_active_schedule = kl.get_active_schedule();
 
     BOOST_CHECK_CLOSE(51.5, kl.get_current_cost(), 0.00001);
 
-    auto node_selection = kl.insert_gain_heap_test_penalty({7}); 
+    auto node_selection = kl.insert_gain_heap_test_penalty({7});
 
     auto recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "-----------recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "-----------recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
-    std::cout << "}" << std::endl; 
+    }
+    std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
-        
+
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
 
     recompute_max_gain = kl.run_inner_iteration_test();
-    std::cout << "recompute max_gain: { "; 
-    for (const auto & [key, value] : recompute_max_gain) {
+    std::cout << "recompute max_gain: { ";
+    for (const auto &[key, value] : recompute_max_gain) {
         std::cout << key << " ";
-    }                
+    }
     std::cout << "}" << std::endl;
 
     BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001);
-
 }
 
 BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) {
@@ -450,16 +440,15 @@ BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) {
 
     schedule.updateNumberOfSupersteps();
 
-    
-    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>; 
+    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>;
     kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double> kl;
-    
+
     kl.setup_schedule(schedule);
 
     kl.compute_violations_test();
 
     BOOST_CHECK_EQUAL(kl.is_feasible(), false);
-  
+
     kl_improver<graph, cost_f, no_local_search_memory_constraint, 1, double> kl_improver;
     kl_improver.improveSchedule(schedule);
 
@@ -502,10 +491,9 @@ BOOST_AUTO_TEST_CASE(kl_base_1) {
 
     schedule.updateNumberOfSupersteps();
 
-    
-    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>; 
+    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>;
     kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double> kl;
-    
+
     kl.setup_schedule(schedule);
 
     auto &kl_active_schedule = kl.get_active_schedule();
@@ -529,11 +517,11 @@ BOOST_AUTO_TEST_CASE(kl_base_1) {
     BOOST_CHECK_EQUAL(kl.is_feasible(), false);
     BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001);
 
-    kl_move move_2(v2, 3.0 + 4.5 - 4.0 , 0, 0, 1, 0);
+    kl_move move_2(v2, 3.0 + 4.5 - 4.0, 0, 0, 1, 0);
 
     kl.apply_move_test(move_2);
 
-    BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 39.0); // 42-3
+    BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 39.0);       // 42-3
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 5.0); // 2+3
     BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 1);
     BOOST_CHECK_EQUAL(kl.is_feasible(), false);
@@ -541,7 +529,7 @@ BOOST_AUTO_TEST_CASE(kl_base_1) {
 
     kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7});
 
-    auto& affinity = kl.get_affinity_table();
+    auto &affinity = kl.get_affinity_table();
 
     BOOST_CHECK_CLOSE(affinity[v1][0][1], 2.0 - 4.5, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v1][1][1], 0.0, 0.00001);
@@ -598,10 +586,9 @@ BOOST_AUTO_TEST_CASE(kl_base_2) {
 
     schedule.updateNumberOfSupersteps();
 
-    
-    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>; 
+    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>;
     kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double> kl;
-    
+
     kl.setup_schedule(schedule);
 
     auto &kl_active_schedule = kl.get_active_schedule();
@@ -614,7 +601,7 @@ BOOST_AUTO_TEST_CASE(kl_base_2) {
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0);
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0);
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0);
-    
+
     BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4);
     BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001);
     BOOST_CHECK_EQUAL(kl.is_feasible(), true);
@@ -636,7 +623,7 @@ BOOST_AUTO_TEST_CASE(kl_base_2) {
     BOOST_CHECK_EQUAL(kl.is_feasible(), true);
     BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001);
 
-    kl_move move_2(v2, -1.0 - 8.5 , 1, 1, 0, 0);
+    kl_move move_2(v2, -1.0 - 8.5, 1, 1, 0, 0);
 
     kl.apply_move_test(move_2);
 
@@ -652,7 +639,7 @@ BOOST_AUTO_TEST_CASE(kl_base_2) {
     BOOST_CHECK_EQUAL(kl.is_feasible(), false);
     BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001);
 
-    kl_move move_x(v2, -2.0 + 8.5 , 0, 0, 1, 0);
+    kl_move move_x(v2, -2.0 + 8.5, 0, 0, 1, 0);
 
     kl.apply_move_test(move_x);
 
@@ -670,14 +657,13 @@ BOOST_AUTO_TEST_CASE(kl_base_2) {
 
     kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7});
 
-    auto& affinity = kl.get_affinity_table();
+    auto &affinity = kl.get_affinity_table();
 
     BOOST_CHECK_CLOSE(affinity[v1][0][1], -4.5, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v1][0][2], -2.5, 0.00001);
 
     BOOST_CHECK_CLOSE(affinity[v1][1][1], 2.0, 0.00001);
-    BOOST_CHECK_CLOSE(affinity[v1][1][2], 0.0, 0.00001); 
-
+    BOOST_CHECK_CLOSE(affinity[v1][1][2], 0.0, 0.00001);
 
     BOOST_CHECK_CLOSE(affinity[v2][0][1], 9.5, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v2][0][2], 11.5, 0.00001);
@@ -719,7 +705,6 @@ BOOST_AUTO_TEST_CASE(kl_base_2) {
     BOOST_CHECK_CLOSE(affinity[v7][1][0], 7.0, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v7][1][1], 8.0, 0.00001);
 
-
     BOOST_CHECK_CLOSE(affinity[v8][0][0], 8.5, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v8][0][1], 8.5, 0.00001);
 
@@ -763,10 +748,9 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 
     schedule.updateNumberOfSupersteps();
 
-    
-    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>; 
+    using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>;
     kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double> kl;
-    
+
     kl.setup_schedule(schedule);
 
     auto &kl_active_schedule = kl.get_active_schedule();
@@ -779,21 +763,19 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0);
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0);
     BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0);
-        
+
     BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4);
     BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true);
 
     kl.insert_gain_heap_test_penalty({0, 1, 2, 3, 4, 5, 6, 7});
 
-    auto& affinity = kl.get_affinity_table();
-
+    auto &affinity = kl.get_affinity_table();
 
     BOOST_CHECK_CLOSE(affinity[v1][0][1], 1.0, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v1][0][2], 3.0, 0.00001);
 
     BOOST_CHECK_CLOSE(affinity[v1][1][1], 2.0, 0.00001);
-    BOOST_CHECK_CLOSE(affinity[v1][1][2], 16.5, 0.00001); 
-
+    BOOST_CHECK_CLOSE(affinity[v1][1][2], 16.5, 0.00001);
 
     BOOST_CHECK_CLOSE(affinity[v2][0][1], 15, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v2][0][2], 11.5, 0.00001);
@@ -835,16 +817,13 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
     BOOST_CHECK_CLOSE(affinity[v7][1][0], 7.0, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v7][1][1], 8.0, 0.00001);
 
-    
     BOOST_CHECK_CLOSE(affinity[v8][0][0], 14.0, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v8][0][1], 8.5, 0.00001);
 
     BOOST_CHECK_CLOSE(affinity[v8][1][0], 8.0, 0.00001);
     BOOST_CHECK_CLOSE(affinity[v8][1][1], 1.0, 0.00001);
-
 }
 
-
 // BOOST_AUTO_TEST_CASE(kl_improver_incremental_update_test) {
 
 //     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -881,12 +860,11 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //     schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3});
 
 //     schedule.updateNumberOfSupersteps();
-    
-    
-//     using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>; 
+
+//     using cost_f = kl_total_comm_cost_function<graph, double, no_local_search_memory_constraint, 1, true>;
 //     using kl_improver_test = kl_improver_test<graph, cost_f, no_local_search_memory_constraint, 1, double>;
 //     kl_improver_test kl;
-    
+
 //     kl.setup_schedule(schedule);
 
 //     auto node_selection = kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7});
@@ -974,7 +952,6 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 
 // };
 
-
 // BOOST_AUTO_TEST_CASE(kl_total_comm_large_test_graphs) {
 //     std::vector<std::string> filenames_graph = large_spaa_graphs();
 //     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -987,7 +964,6 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //         std::cout << cwd << std::endl;
 //     }
 
-   
 //     for (auto &filename_graph : filenames_graph) {
 //         GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 //         BspInstance<graph> instance;
@@ -1003,7 +979,7 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //                                                    {4,4,0,1},
 //                                                    {4,4,1,0}};
 
-//         instance.getArchitecture().setSendCosts(send_cost);
+//         instance.getArchitecture().SetSendCosts(send_cost);
 
 //         if (!status_graph) {
 
@@ -1031,9 +1007,9 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //         auto start_time = std::chrono::high_resolution_clock::now();
 //         auto status = kl.improveSchedule(schedule);
 //         auto finish_time = std::chrono::high_resolution_clock::now();
-        
+
 //         auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
-        
+
 //         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
@@ -1044,18 +1020,17 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //         // start_time = std::chrono::high_resolution_clock::now();
 //         // status = kl_old.improve_schedule_test_2(schedule_2);
 //         // finish_time = std::chrono::high_resolution_clock::now();
-        
+
 //         // duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
 //         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
-        
+
 //         // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true);
 
 //     }
 // }
 
-
 // BOOST_AUTO_TEST_CASE(kl_total_comm_large_test_graphs_mt) {
 //     std::vector<std::string> filenames_graph = large_spaa_graphs();
 //     using graph = computational_dag_edge_idx_vector_impl_def_int_t;
@@ -1068,7 +1043,6 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //         std::cout << cwd << std::endl;
 //     }
 
-   
 //     for (auto &filename_graph : filenames_graph) {
 //         GreedyBspScheduler<computational_dag_edge_idx_vector_impl_def_int_t> test_scheduler;
 //         BspInstance<graph> instance;
@@ -1084,7 +1058,7 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //                                                    {4,4,0,1},
 //                                                    {4,4,1,0}};
 
-//         instance.getArchitecture().setSendCosts(send_cost);
+//         instance.getArchitecture().SetSendCosts(send_cost);
 
 //         if (!status_graph) {
 
@@ -1112,9 +1086,9 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //         auto start_time = std::chrono::high_resolution_clock::now();
 //         auto status = kl.improveSchedule(schedule);
 //         auto finish_time = std::chrono::high_resolution_clock::now();
-        
+
 //         auto duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
-        
+
 //         std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl;
 
 //         BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
@@ -1125,11 +1099,11 @@ BOOST_AUTO_TEST_CASE(kl_base_3) {
 //         // start_time = std::chrono::high_resolution_clock::now();
 //         // status = kl_old.improve_schedule_test_2(schedule_2);
 //         // finish_time = std::chrono::high_resolution_clock::now();
-        
+
 //         // duration = std::chrono::duration_cast<std::chrono::seconds>(finish_time - start_time).count();
 
 //         // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl;
-        
+
 //         // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND);
 //         // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true);
 
diff --git a/tests/trimmed_group_scheduler.cpp b/tests/trimmed_group_scheduler.cpp
index 52cf4cdb..ccbfee8a 100644
--- a/tests/trimmed_group_scheduler.cpp
+++ b/tests/trimmed_group_scheduler.cpp
@@ -19,10 +19,10 @@ limitations under the License.
 #define BOOST_TEST_MODULE TrimmedGroupSchedulerTest
 #include <boost/test/unit_test.hpp>
 
-#include "osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 
 using namespace osp;
@@ -30,9 +30,9 @@ using namespace osp;
 using graph_t = computational_dag_vector_impl_def_t;
 
 // Mock SubScheduler for TrimmedGroupScheduler tests
-template <typename Constr_Graph_t>
+template<typename Constr_Graph_t>
 class MockSubScheduler : public Scheduler<Constr_Graph_t> {
-public:
+  public:
     // This mock scheduler assigns all nodes to local processor 0 and superstep 0.
     // This simplifies verification of the TrimmedGroupScheduler's mapping logic.
     RETURN_STATUS computeSchedule(BspSchedule<Constr_Graph_t> &schedule) override {
@@ -66,7 +66,7 @@ BOOST_FIXTURE_TEST_SUITE(TrimmedGroupSchedulerTestSuite, TrimmedGroupSchedulerFi
 BOOST_AUTO_TEST_CASE(EmptyGraphTest) {
     // Graph is empty by default
     arch.setNumberOfProcessors(4);
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
 
     TrimmedGroupScheduler<graph_t> scheduler(mock_sub_scheduler, 1);
     BspSchedule<graph_t> schedule(instance);
@@ -87,7 +87,7 @@ BOOST_AUTO_TEST_CASE(SingleComponentSingleProcessorTypeTest) {
 
     // Architecture: 4 processors of type 0
     arch.setProcessorsWithTypes({0, 0, 0, 0});
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
 
     // min_non_zero_procs_ = 1 (all 4 processors assigned to this single component group)
     TrimmedGroupScheduler<graph_t> scheduler(mock_sub_scheduler, 1);
@@ -119,7 +119,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeEvenDistributionTest)
 
     // Architecture: 4 processors of type 0
     arch.setProcessorsWithTypes({0, 0, 0, 0});
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
 
     // min_non_zero_procs_ = 2 (2 component groups, each gets 2 processors)
     TrimmedGroupScheduler<graph_t> scheduler(mock_sub_scheduler, 2);
@@ -154,7 +154,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeUnevenDistributionTest
 
     // Architecture: 6 processors of type 0
     arch.setProcessorsWithTypes({0, 0, 0, 0, 0, 0});
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
 
     // min_non_zero_procs_ = 2 (3 components, 2 groups)
     // base_count = 3 / 2 = 1, remainder = 3 % 2 = 1
@@ -190,7 +190,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsHeterogeneousArchitectureTest) {
 
     // Architecture: 2 processors of type 0 (global 0,1), 2 processors of type 1 (global 2,3)
     arch.setProcessorsWithTypes({0, 0, 1, 1});
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
     instance.setDiagonalCompatibilityMatrix(2); // Node type 0 compatible with proc type 0, etc.
 
     // min_non_zero_procs_ = 2 (2 components, 2 groups)