From 614d8d6f9a48821764e8ed8fefa6e23811f7d20d Mon Sep 17 00:00:00 2001 From: Christos Konstantinos Matzoros Date: Wed, 8 Oct 2025 17:23:08 +0200 Subject: [PATCH 1/6] Base version of VarianceSSP scheduler (needs revisions) --- .../GreedyVarianceSspScheduler.hpp | 592 ++++++++++++++++++ tests/bsp_schedulers.cpp | 7 + 2 files changed, 599 insertions(+) create mode 100644 include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp new file mode 100644 index 00000000..d97bdaf3 --- /dev/null +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp @@ -0,0 +1,592 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MemoryConstraintModules.hpp" +#include "osp/auxiliary/misc.hpp" +#include "osp/bsp/model/BspSchedule.hpp" +#include "osp/bsp/scheduler/Scheduler.hpp" +#include "osp/graph_algorithms/directed_graph_top_sort.hpp" + +namespace osp { + +/** + * @brief The GreedyVarianceSspScheduler class represents a scheduler that uses a greedy algorithm + * with stale synchronous parallel (SSP) execution model. + * + * It computes schedules for BspInstance using variance-based priorities. + */ +template +class GreedyVarianceSspScheduler : public Scheduler { + + static_assert(is_computational_dag_v, "GreedyVarianceSspScheduler can only be used with computational DAGs."); + + private: + using VertexType = vertex_idx_t; + + constexpr static bool use_memory_constraint = + is_memory_constraint_v or is_memory_constraint_schedule_v; + + static_assert(not use_memory_constraint or std::is_same_v, + "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); + + MemoryConstraint_t memory_constraint; + double max_percent_idle_processors; + bool increase_parallelism_in_new_superstep; + std::vector> current_proc_persistent_memory; + std::vector> current_proc_transient_memory; + + + std::vector compute_work_variance(const Graph_t &graph) const { + + std::vector work_variance(graph.num_vertices(), 0.0); + + const std::vector top_order = GetTopOrder(graph); + + for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { + double temp = 0; + double max_priority = 0; + for (const auto &child : graph.children(*r_iter)) { + max_priority = std::max(work_variance[child], max_priority); + } + for (const auto &child : graph.children(*r_iter)) { + temp += std::exp(2 * (work_variance[child] - max_priority)); + } + temp = std::log(temp) / 2 + max_priority; + + double node_weight = std::log((double)std::max(graph.vertex_work_weight(*r_iter), static_cast>(1))); + double larger_val = node_weight > temp ? node_weight : temp; + + work_variance[*r_iter] = + std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; + } + + return work_variance; + } + + struct VarianceCompare { + bool operator()(const std::pair &lhs, const std::pair &rhs) const { + return ((lhs.second > rhs.second) || ((lhs.second == rhs.second) && (lhs.first < rhs.first))); + } + }; + + bool CanChooseNode(const BspInstance &instance, + const std::vector, VarianceCompare>> &allReady, + const std::vector, VarianceCompare>> &procReady, + const std::vector &procFree) const { + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) + if (procFree[i] && !procReady[i].empty()) + return true; + + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) + if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) + return true; + + return false; + } + + void Choose(const BspInstance &instance, + const std::vector &work_variance, + std::vector, VarianceCompare>> &allReady, + std::vector, VarianceCompare>> &procReady, + const std::vector &procFree, + VertexType &node, unsigned &p, + const bool endSupStep, + const v_workw_t remaining_time) const + { + (void)work_variance; // silence -Werror=unused-parameter + double maxScore = -1; + bool found_allocation = false; + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !procReady[i].empty()) { + // select node + for (auto node_pair_it = procReady[i].begin(); node_pair_it != procReady[i].end();) { + if (endSupStep && + (remaining_time < instance.getComputationalDag().vertex_work_weight(node_pair_it->first))) { + node_pair_it = procReady[i].erase(node_pair_it); + continue; + } + + if (use_memory_constraint) { + if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { + if (current_proc_persistent_memory[i] + + instance.getComputationalDag().vertex_mem_weight(node_pair_it->first) > + instance.getArchitecture().memoryBound(i)) { + + node_pair_it = procReady[i].erase(node_pair_it); + continue; + } + } else if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { + if (current_proc_persistent_memory[i] + + instance.getComputationalDag().vertex_mem_weight(node_pair_it->first) + + std::max(current_proc_transient_memory[i], + instance.getComputationalDag().vertex_comm_weight(node_pair_it->first)) > + instance.getArchitecture().memoryBound(i)) { + + node_pair_it = procReady[i].erase(node_pair_it); + continue; + } + } + } + + const double &score = node_pair_it->second; + + if (score > maxScore) { + maxScore = score; + node = node_pair_it->first; + p = i; + found_allocation = true; + break; + } + node_pair_it++; + } + } + } + + if (found_allocation) + return; + + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) { + // select node + for (auto it = allReady[instance.getArchitecture().processorType(i)].begin(); it != allReady[instance.getArchitecture().processorType(i)].end();) { + if (endSupStep && + (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { + it = allReady[instance.getArchitecture().processorType(i)].erase(it); + continue; + } + + const double &score = it->second; + + if (score > maxScore) { + if (use_memory_constraint) { + + if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { + + if (current_proc_persistent_memory[i] + + instance.getComputationalDag().vertex_mem_weight(it->first) <= + instance.getArchitecture().memoryBound(i)) { + + node = it->first; + p = i; + return; + } + + } else if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { + if (current_proc_persistent_memory[i] + + instance.getComputationalDag().vertex_mem_weight(it->first) + + std::max(current_proc_transient_memory[i], + instance.getComputationalDag().vertex_comm_weight(it->first)) <= + instance.getArchitecture().memoryBound(i)) { + + node = it->first; + p = i; + return; + } + } + + } else { + node = it->first; + p = i; + return; + } + } + it++; + } + + } + } + }; + + bool check_mem_feasibility( + const BspInstance &instance, + const std::vector, VarianceCompare>> &allReady, + const std::vector, VarianceCompare>> &procReady) const + { + if constexpr (use_memory_constraint) { + if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) + { + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (!procReady[i].empty()) { + + const std::pair &node_pair = *procReady[i].begin(); + VertexType top_node = node_pair.first; + + if (memory_constraint.can_add(top_node, i)) { + return true; + } + } + } + + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + + if (allReady[instance.getArchitecture().processorType(i)].empty()) + continue; + + const std::pair &node_pair = + *allReady[instance.getArchitecture().processorType(i)].begin(); + VertexType top_node = node_pair.first; + + if (memory_constraint.can_add(top_node, i)) { + return true; + } + } + + return false; + } + } + + return true; + } + + unsigned get_nr_parallelizable_nodes( + const BspInstance &instance, + const unsigned &stale, + const std::vector &nr_old_ready_nodes_per_type, + const std::vector &nr_ready_nodes_per_type, + const std::vector, VarianceCompare>> &procReady, + const std::vector &nr_procs_per_type) const + { + unsigned nr_nodes = 0; + unsigned num_proc_types = instance.getArchitecture().getNumberOfProcessorTypes(); + + std::vector procs_per_type = nr_procs_per_type; + + if (stale > 1) { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { + if (!procReady[proc].empty()) { + procs_per_type[instance.getArchitecture().processorType(proc)]--; + nr_nodes++; + } + } + } + + std::vector ready_nodes_per_type = nr_ready_nodes_per_type; + for (unsigned node_type = 0; node_type < ready_nodes_per_type.size(); node_type++) { + ready_nodes_per_type[node_type] += nr_old_ready_nodes_per_type[node_type]; + } + + for (unsigned proc_type = 0; proc_type < num_proc_types; ++proc_type) { + for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { + if (instance.isCompatibleType(node_type, proc_type)) { + unsigned matched = std::min(ready_nodes_per_type[node_type], + procs_per_type[proc_type]); + nr_nodes += matched; + ready_nodes_per_type[node_type] -= matched; + procs_per_type[proc_type] -= matched; + } + } + } + + return nr_nodes; + } + + + public: + virtual RETURN_STATUS computeSspSchedule(BspSchedule &schedule, unsigned stale) { + const auto &instance = schedule.getInstance(); + const auto N = instance.numberOfVertices(); + const auto P = instance.numberOfProcessors(); + const auto &G = instance.getComputationalDag(); + + if constexpr (use_memory_constraint) { + memory_constraint.initialize(schedule, 0); // SSP starts at supstepIdx = 0 + } + + // Reset processor assignments + for (auto v : G.vertices()) { + schedule.setAssignedProcessor(v, std::numeric_limits::max()); + } + + const std::vector work_variances = compute_work_variance(G); + + std::set, VarianceCompare> old_ready; + std::vector, VarianceCompare>> ready(stale); + std::vector, VarianceCompare>>> procReady( + stale, std::vector, VarianceCompare>>(P)); + std::vector, VarianceCompare>> allReady( + instance.getArchitecture().getNumberOfProcessorTypes()); + + const auto procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); + + std::vector nr_old_ready_nodes_per_type(G.num_vertex_types(), 0); + std::vector> nr_ready_stale_nodes_per_type( + stale, std::vector(G.num_vertex_types(), 0)); + std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); + for (auto proc = 0u; proc < P; ++proc) { + ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; + } + + std::vector nrPredecRemain(N); + for (VertexType node = 0; node < static_cast(N); ++node) { + const auto num_parents = G.in_degree(node); + nrPredecRemain[node] = static_cast(num_parents); + if (num_parents == 0) { + ready[0].insert(std::make_pair(node, work_variances[node])); + nr_ready_stale_nodes_per_type[0][G.vertex_type(node)]++; + } + } + + std::vector procFree(P, true); + unsigned free = P; + + std::set, VertexType>> finishTimes; + finishTimes.emplace(0, std::numeric_limits::max()); + + std::vector number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0); + std::vector limit_of_number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0); + + unsigned supstepIdx = 0u; + bool endSupStep = true; + bool begin_outer_while = true; + bool able_to_schedule_in_step = false; + unsigned successive_empty_supersteps = 0u; + + auto nonempty_ready = [&]() { + return std::any_of(ready.cbegin(), ready.cend(), + [](const std::set, VarianceCompare>& ready_set) { return !ready_set.empty(); }); + }; + + while (!old_ready.empty() || nonempty_ready() || !finishTimes.empty()) { + if (finishTimes.empty() && endSupStep) { + able_to_schedule_in_step = false; + number_of_allocated_allReady_tasks_in_superstep = std::vector(instance.getArchitecture().getNumberOfProcessorTypes(), 0); + + for (unsigned i = 0; i < P; ++i) + procReady[supstepIdx % stale][i].clear(); + + if (!begin_outer_while) { + supstepIdx++; + } else { + begin_outer_while = false; + } + + for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType) + allReady[procType].clear(); + + old_ready.insert(ready[supstepIdx % stale].begin(), ready[supstepIdx % stale].end()); + ready[supstepIdx % stale].clear(); + for (unsigned node_type = 0; node_type < G.num_vertex_types(); ++node_type) { + nr_old_ready_nodes_per_type[node_type] += nr_ready_stale_nodes_per_type[supstepIdx % stale][node_type]; + nr_ready_stale_nodes_per_type[supstepIdx % stale][node_type] = 0; + } + + for (const auto &nodeAndValuePair : old_ready) { + VertexType node = nodeAndValuePair.first; + for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) { + allReady[procType].insert(allReady[procType].end(), nodeAndValuePair); + } + } + + if constexpr (use_memory_constraint) { + if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { + for (unsigned proc = 0; proc < P; proc++) + memory_constraint.reset(proc); + } + } + + for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { + unsigned equal_split = (static_cast(allReady[procType].size()) + stale - 1) / stale; + unsigned at_least_for_long_step = 3 * nr_procs_per_type[procType]; + + limit_of_number_of_allocated_allReady_tasks_in_superstep[procType] = std::max(at_least_for_long_step, equal_split); + } + + endSupStep = false; + finishTimes.emplace(0, std::numeric_limits::max()); + } + + const v_workw_t time = finishTimes.begin()->first; + const v_workw_t max_finish_time = finishTimes.rbegin()->first; + + // Find new ready jobs + while (!finishTimes.empty() && finishTimes.begin()->first == time) { + const VertexType node = finishTimes.begin()->second; + finishTimes.erase(finishTimes.begin()); + if (node != std::numeric_limits::max()) { + for (const auto &succ : G.children(node)) { + nrPredecRemain[succ]--; + if (nrPredecRemain[succ] == 0) { + ready[supstepIdx % stale].emplace(succ, work_variances[succ]); + nr_ready_stale_nodes_per_type[supstepIdx % stale][G.vertex_type(succ)]++; + + bool canAdd = instance.isCompatible(succ, schedule.assignedProcessor(node)); + unsigned earliest_add = supstepIdx; + bool memory_ok = true; + + for (const auto &pred : G.parents(succ)) { + if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node)) + earliest_add = std::max(earliest_add, stale + schedule.assignedSuperstep(pred)); + } + + if (use_memory_constraint && canAdd && (earliest_add == supstepIdx)) { + + if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { + + if (current_proc_persistent_memory[schedule.assignedProcessor(node)] + + instance.getComputationalDag().vertex_mem_weight(succ) > + instance.getArchitecture().memoryBound(schedule.assignedProcessor(node))) { + memory_ok = false; + } + + } else if (instance.getArchitecture().getMemoryConstraintType() == + MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { + + if (current_proc_persistent_memory[schedule.assignedProcessor(node)] + + instance.getComputationalDag().vertex_mem_weight(succ) + + std::max(current_proc_transient_memory[schedule.assignedProcessor(node)], + instance.getComputationalDag().vertex_comm_weight(succ)) > + instance.getArchitecture().memoryBound(schedule.assignedProcessor(node))) { + memory_ok = false; + } + } + } + + if (canAdd) { + for (unsigned step_to_add = earliest_add; step_to_add < supstepIdx + stale; step_to_add++) { + if ((step_to_add == supstepIdx) && !memory_ok) { + continue; + } + procReady[step_to_add % stale][schedule.assignedProcessor(node)].emplace(succ, work_variances[succ]); + } + } + } + } + + procFree[schedule.assignedProcessor(node)] = true; + ++free; + } + } + + // Assign new jobs + if (!CanChooseNode(instance, allReady, procReady[supstepIdx % stale], procFree)) { + endSupStep = true; + } + while (CanChooseNode(instance, allReady, procReady[supstepIdx % stale], procFree)) { + std::cout << "10" << std::endl; + VertexType nextNode = std::numeric_limits::max(); + unsigned nextProc = P; + + Choose( instance, work_variances, allReady, + procReady[supstepIdx % stale], procFree, + nextNode, nextProc, endSupStep, max_finish_time - time); + + if (nextNode == std::numeric_limits::max() || nextProc == P) { + endSupStep = true; + break; + } + + if (procReady[supstepIdx % stale][nextProc].find(std::make_pair(nextNode, work_variances[nextNode])) != + procReady[supstepIdx % stale][nextProc].end()) { + for (size_t i = 0; i < stale; i++) { + procReady[i][nextProc].erase(std::make_pair(nextNode, work_variances[nextNode])); + } + } else { + for(unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(nextNode)]) { + allReady[procType].erase(std::make_pair(nextNode, work_variances[nextNode])); + } + nr_old_ready_nodes_per_type[G.vertex_type(nextNode)]--; + const unsigned nextProcType = instance.getArchitecture().processorType(nextProc); + number_of_allocated_allReady_tasks_in_superstep[nextProcType]++; + if (number_of_allocated_allReady_tasks_in_superstep[nextProcType] >= limit_of_number_of_allocated_allReady_tasks_in_superstep[nextProcType]) { + allReady[nextProcType].clear(); + } + } + + for (size_t i = 0; i < stale; i++) { + ready[i].erase(std::make_pair(nextNode, work_variances[nextNode])); + } + old_ready.erase(std::make_pair(nextNode, work_variances[nextNode])); + + schedule.setAssignedProcessor(nextNode, nextProc); + schedule.setAssignedSuperstep(nextNode, supstepIdx); + able_to_schedule_in_step = true; + + if constexpr (use_memory_constraint) { + memory_constraint.add(nextNode, nextProc); + + std::vector> toErase; + for (const auto &node_pair : procReady[supstepIdx % stale][nextProc]) { + if (!memory_constraint.can_add(node_pair.first, nextProc)) { + toErase.push_back(node_pair); + } + } + for (const auto &n : toErase) { + procReady[supstepIdx % stale][nextProc].erase(n); + } + } + + finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode); + procFree[nextProc] = false; + --free; + } + + if (able_to_schedule_in_step) + successive_empty_supersteps = 0; + else if (++successive_empty_supersteps > 100u + stale) + return RETURN_STATUS::ERROR; + + if (free > static_cast(P * max_percent_idle_processors) && + ((!increase_parallelism_in_new_superstep) || + get_nr_parallelizable_nodes(instance, stale, + nr_old_ready_nodes_per_type, + nr_ready_stale_nodes_per_type[(supstepIdx + 1) % stale], + procReady[(supstepIdx + 1) % stale], + nr_procs_per_type) >= + std::min(std::min(P, static_cast(1.2 * (P - free))), + P - free + static_cast(0.5 * free)))) { + endSupStep = true; + } + } + + assert(schedule.satisfiesPrecedenceConstraints()); + //schedule.setAutoCommunicationSchedule(); + + return RETURN_STATUS::OSP_SUCCESS; + } + + + + virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { + // Default SSP = 1 (no staleness) or maybe configurable later + return computeSspSchedule(schedule, /*stale=*/1); + } + + virtual std::string getScheduleName() const override { + if constexpr (use_memory_constraint) { + return "GreedyVarianceSspMemory"; + } else { + return "GreedyVarianceSsp"; + } + } + +}; + +} // namespace osp diff --git a/tests/bsp_schedulers.cpp b/tests/bsp_schedulers.cpp index 936bad69..94a9161b 100644 --- a/tests/bsp_schedulers.cpp +++ b/tests/bsp_schedulers.cpp @@ -33,6 +33,7 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp" #include "osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp" #include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" @@ -209,6 +210,12 @@ BOOST_AUTO_TEST_CASE(variancefillup_test) { run_test(&test); } +BOOST_AUTO_TEST_CASE(greedyvariancesspscheduler_test){ + GreedyVarianceSspScheduler test; + run_test(&test); +} + + BOOST_AUTO_TEST_CASE(etf_test_edge_desc_impl) { EtfScheduler test; From 07367391d91d58b2731e50074ee4d4f87c7a6156 Mon Sep 17 00:00:00 2001 From: Christos Konstantinos Matzoros Date: Thu, 9 Oct 2025 15:59:18 +0200 Subject: [PATCH 2/6] GreedyVarianceSspScheduler: adding MaxBspSchedule (staleness=2) support and tests --- include/osp/bsp/model/BspSchedule.hpp | 2 +- .../GreedyVarianceSspScheduler.hpp | 320 ++++++++++-------- include/osp/bsp/scheduler/MaxBspScheduler.hpp | 87 +++++ tests/bsp_schedulers.cpp | 38 ++- 4 files changed, 307 insertions(+), 140 deletions(-) create mode 100644 include/osp/bsp/scheduler/MaxBspScheduler.hpp diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp index 299e1716..b30cb26c 100644 --- a/include/osp/bsp/model/BspSchedule.hpp +++ b/include/osp/bsp/model/BspSchedule.hpp @@ -65,7 +65,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval node_to_processor_assignment; - std::vector node_to_superstep_assignment; + std::vector node_to_superstep_assignment; template inline bool satisfies_precedence_constraints_staleness() const { diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp index d97bdaf3..2a8b8d99 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp @@ -32,6 +32,7 @@ limitations under the License. #include "MemoryConstraintModules.hpp" #include "osp/auxiliary/misc.hpp" #include "osp/bsp/model/BspSchedule.hpp" +#include "osp/bsp/model/MaxBspSchedule.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" @@ -60,14 +61,9 @@ class GreedyVarianceSspScheduler : public Scheduler { MemoryConstraint_t memory_constraint; double max_percent_idle_processors; bool increase_parallelism_in_new_superstep; - std::vector> current_proc_persistent_memory; - std::vector> current_proc_transient_memory; - std::vector compute_work_variance(const Graph_t &graph) const { - std::vector work_variance(graph.num_vertices(), 0.0); - const std::vector top_order = GetTopOrder(graph); for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { @@ -91,6 +87,28 @@ class GreedyVarianceSspScheduler : public Scheduler { return work_variance; } + std::vector>> + procTypesCompatibleWithNodeType_omit_procType(const BspInstance &instance) const { + + const std::vector> procTypesCompatibleWithNodeType = + instance.getProcTypesCompatibleWithNodeType(); + + std::vector>> procTypesCompatibleWithNodeType_skip( + instance.getArchitecture().getNumberOfProcessorTypes(), + std::vector>(instance.getComputationalDag().num_vertex_types())); + for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { + for (unsigned nodeType = 0; nodeType < instance.getComputationalDag().num_vertex_types(); nodeType++) { + for (unsigned otherProcType : procTypesCompatibleWithNodeType[nodeType]) { + if (procType == otherProcType) + continue; + procTypesCompatibleWithNodeType_skip[procType][nodeType].emplace_back(otherProcType); + } + } + } + + return procTypesCompatibleWithNodeType_skip; + } + struct VarianceCompare { bool operator()(const std::pair &lhs, const std::pair &rhs) const { return ((lhs.second > rhs.second) || ((lhs.second == rhs.second) && (lhs.first < rhs.first))); @@ -119,112 +137,150 @@ class GreedyVarianceSspScheduler : public Scheduler { const std::vector &procFree, VertexType &node, unsigned &p, const bool endSupStep, - const v_workw_t remaining_time) const + const v_workw_t remaining_time, + const std::vector>> &procTypesCompatibleWithNodeType_skip_proctype) const { - (void)work_variance; // silence -Werror=unused-parameter double maxScore = -1; bool found_allocation = false; + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (procFree[i] && !procReady[i].empty()) { - // select node - for (auto node_pair_it = procReady[i].begin(); node_pair_it != procReady[i].end();) { - if (endSupStep && - (remaining_time < instance.getComputationalDag().vertex_work_weight(node_pair_it->first))) { - node_pair_it = procReady[i].erase(node_pair_it); - continue; - } + if (!procFree[i] || procReady[i].empty()) + continue; + + auto it = procReady[i].begin(); + while (it != procReady[i].end()) { + if (endSupStep && + (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { + it = procReady[i].erase(it); + continue; + } - if (use_memory_constraint) { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { - if (current_proc_persistent_memory[i] + - instance.getComputationalDag().vertex_mem_weight(node_pair_it->first) > - instance.getArchitecture().memoryBound(i)) { + const double &score = it->second; - node_pair_it = procReady[i].erase(node_pair_it); - continue; - } - } else if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - if (current_proc_persistent_memory[i] + - instance.getComputationalDag().vertex_mem_weight(node_pair_it->first) + - std::max(current_proc_transient_memory[i], - instance.getComputationalDag().vertex_comm_weight(node_pair_it->first)) > - instance.getArchitecture().memoryBound(i)) { - - node_pair_it = procReady[i].erase(node_pair_it); - continue; - } - } - } + if (score > maxScore) { + const unsigned procType = instance.getArchitecture().processorType(i); - const double &score = node_pair_it->second; + if constexpr (use_memory_constraint) { + if (memory_constraint.can_add(it->first, i)) { + node = it->first; + p = i; + found_allocation = true; + + procReady[i].erase(it); + + if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) { + const auto &compatibleTypes = + procTypesCompatibleWithNodeType_skip_proctype[procType] + [instance.getComputationalDag().vertex_type(node)]; + + for (unsigned otherType : compatibleTypes) { + for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) { + if (j != i && + instance.getArchitecture().processorType(j) == otherType && + j < procReady.size()) { + procReady[j].erase(std::make_pair(node, work_variance[node])); + } + } + } + } - if (score > maxScore) { - maxScore = score; - node = node_pair_it->first; + return; + } + } else { + node = it->first; p = i; found_allocation = true; - break; + + procReady[i].erase(it); + + if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) { + const auto &compatibleTypes = + procTypesCompatibleWithNodeType_skip_proctype[procType] + [instance.getComputationalDag().vertex_type(node)]; + + for (unsigned otherType : compatibleTypes) { + for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) { + if (j != i && + instance.getArchitecture().processorType(j) == otherType && + j < procReady.size()) { + procReady[j].erase(std::make_pair(node, work_variance[node])); + } + } + } + } + + return; } - node_pair_it++; } + + ++it; } } if (found_allocation) return; - - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) { - // select node - for (auto it = allReady[instance.getArchitecture().processorType(i)].begin(); it != allReady[instance.getArchitecture().processorType(i)].end();) { - if (endSupStep && - (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { - it = allReady[instance.getArchitecture().processorType(i)].erase(it); - continue; - } - - const double &score = it->second; - if (score > maxScore) { - if (use_memory_constraint) { + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + const unsigned procType = instance.getArchitecture().processorType(i); + if (!procFree[i] || procType >= allReady.size() || allReady[procType].empty()) + continue; + + auto &readyList = allReady[procType]; + auto it = readyList.begin(); + + while (it != readyList.end()) { + if (endSupStep && + (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { + it = readyList.erase(it); + continue; + } - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { + const double &score = it->second; - if (current_proc_persistent_memory[i] + - instance.getComputationalDag().vertex_mem_weight(it->first) <= - instance.getArchitecture().memoryBound(i)) { + if (score > maxScore) { + if constexpr (use_memory_constraint) { + if (memory_constraint.can_add(it->first, i)) { + node = it->first; + p = i; - node = it->first; - p = i; - return; - } + readyList.erase(it); - } else if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - if (current_proc_persistent_memory[i] + - instance.getComputationalDag().vertex_mem_weight(it->first) + - std::max(current_proc_transient_memory[i], - instance.getComputationalDag().vertex_comm_weight(it->first)) <= - instance.getArchitecture().memoryBound(i)) { + const auto &compatibleTypes = + procTypesCompatibleWithNodeType_skip_proctype[procType] + [instance.getComputationalDag().vertex_type(node)]; - node = it->first; - p = i; - return; - } + for (unsigned otherType : compatibleTypes) { + if (otherType < allReady.size()) + allReady[otherType].erase(std::make_pair(node, work_variance[node])); } - } else { - node = it->first; - p = i; return; } + } else { + node = it->first; + p = i; + + readyList.erase(it); + + const auto &compatibleTypes = + procTypesCompatibleWithNodeType_skip_proctype[procType] + [instance.getComputationalDag().vertex_type(node)]; + + for (unsigned otherType : compatibleTypes) { + if (otherType < allReady.size()) + allReady[otherType].erase(std::make_pair(node, work_variance[node])); + } + + return; } - it++; } + ++it; } } }; + bool check_mem_feasibility( const BspInstance &instance, const std::vector, VarianceCompare>> &allReady, @@ -310,20 +366,25 @@ class GreedyVarianceSspScheduler : public Scheduler { public: - virtual RETURN_STATUS computeSspSchedule(BspSchedule &schedule, unsigned stale) { + + RETURN_STATUS computeSspSchedule(BspSchedule &schedule, unsigned stale) { + const auto &instance = schedule.getInstance(); - const auto N = instance.numberOfVertices(); - const auto P = instance.numberOfProcessors(); const auto &G = instance.getComputationalDag(); + const auto &N = instance.numberOfVertices(); + const unsigned &P = instance.numberOfProcessors(); - if constexpr (use_memory_constraint) { - memory_constraint.initialize(schedule, 0); // SSP starts at supstepIdx = 0 - } - - // Reset processor assignments for (auto v : G.vertices()) { schedule.setAssignedProcessor(v, std::numeric_limits::max()); } + + unsigned supstepIdx = 0; + + if constexpr (is_memory_constraint_v) { + memory_constraint.initialize(instance); + } else if constexpr (is_memory_constraint_schedule_v) { + memory_constraint.initialize(schedule, supstepIdx); + } const std::vector work_variances = compute_work_variance(G); @@ -335,6 +396,8 @@ class GreedyVarianceSspScheduler : public Scheduler { instance.getArchitecture().getNumberOfProcessorTypes()); const auto procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); + const std::vector>> procTypesCompatibleWithNodeType_skip_proctype = + procTypesCompatibleWithNodeType_omit_procType(instance); std::vector nr_old_ready_nodes_per_type(G.num_vertex_types(), 0); std::vector> nr_ready_stale_nodes_per_type( @@ -363,7 +426,9 @@ class GreedyVarianceSspScheduler : public Scheduler { std::vector number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0); std::vector limit_of_number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - unsigned supstepIdx = 0u; + + + bool endSupStep = true; bool begin_outer_while = true; bool able_to_schedule_in_step = false; @@ -430,57 +495,45 @@ class GreedyVarianceSspScheduler : public Scheduler { while (!finishTimes.empty() && finishTimes.begin()->first == time) { const VertexType node = finishTimes.begin()->second; finishTimes.erase(finishTimes.begin()); + if (node != std::numeric_limits::max()) { - for (const auto &succ : G.children(node)) { + const unsigned proc_of_node = schedule.assignedProcessor(node); + + for (const auto& succ : G.children(node)) { nrPredecRemain[succ]--; if (nrPredecRemain[succ] == 0) { ready[supstepIdx % stale].emplace(succ, work_variances[succ]); nr_ready_stale_nodes_per_type[supstepIdx % stale][G.vertex_type(succ)]++; - bool canAdd = instance.isCompatible(succ, schedule.assignedProcessor(node)); unsigned earliest_add = supstepIdx; - bool memory_ok = true; - - for (const auto &pred : G.parents(succ)) { - if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node)) - earliest_add = std::max(earliest_add, stale + schedule.assignedSuperstep(pred)); + for (const auto& pred : G.parents(succ)) { + if (schedule.assignedProcessor(pred) != proc_of_node) { + earliest_add = std::max(earliest_add, + stale + schedule.assignedSuperstep(pred)); + } } - if (use_memory_constraint && canAdd && (earliest_add == supstepIdx)) { + if (instance.isCompatible(succ, proc_of_node)) { + bool memory_ok = true; - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { - - if (current_proc_persistent_memory[schedule.assignedProcessor(node)] + - instance.getComputationalDag().vertex_mem_weight(succ) > - instance.getArchitecture().memoryBound(schedule.assignedProcessor(node))) { - memory_ok = false; - } - - } else if (instance.getArchitecture().getMemoryConstraintType() == - MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - - if (current_proc_persistent_memory[schedule.assignedProcessor(node)] + - instance.getComputationalDag().vertex_mem_weight(succ) + - std::max(current_proc_transient_memory[schedule.assignedProcessor(node)], - instance.getComputationalDag().vertex_comm_weight(succ)) > - instance.getArchitecture().memoryBound(schedule.assignedProcessor(node))) { - memory_ok = false; + if constexpr (use_memory_constraint) { + if (earliest_add == supstepIdx) { + memory_ok = memory_constraint.can_add(succ, proc_of_node); } } - } - - if (canAdd) { - for (unsigned step_to_add = earliest_add; step_to_add < supstepIdx + stale; step_to_add++) { + for (unsigned step_to_add = earliest_add; + step_to_add < supstepIdx + stale; ++step_to_add) { if ((step_to_add == supstepIdx) && !memory_ok) { - continue; + continue; } - procReady[step_to_add % stale][schedule.assignedProcessor(node)].emplace(succ, work_variances[succ]); + procReady[step_to_add % stale][proc_of_node].emplace( + succ, work_variances[succ]); } } } } - procFree[schedule.assignedProcessor(node)] = true; + procFree[proc_of_node] = true; ++free; } } @@ -490,13 +543,12 @@ class GreedyVarianceSspScheduler : public Scheduler { endSupStep = true; } while (CanChooseNode(instance, allReady, procReady[supstepIdx % stale], procFree)) { - std::cout << "10" << std::endl; VertexType nextNode = std::numeric_limits::max(); unsigned nextProc = P; Choose( instance, work_variances, allReady, procReady[supstepIdx % stale], procFree, - nextNode, nextProc, endSupStep, max_finish_time - time); + nextNode, nextProc, endSupStep, max_finish_time - time, procTypesCompatibleWithNodeType_skip_proctype); if (nextNode == std::numeric_limits::max() || nextProc == P) { endSupStep = true; @@ -550,18 +602,19 @@ class GreedyVarianceSspScheduler : public Scheduler { if (able_to_schedule_in_step) successive_empty_supersteps = 0; - else if (++successive_empty_supersteps > 100u + stale) + else if (++successive_empty_supersteps > 100 + stale) return RETURN_STATUS::ERROR; if (free > static_cast(P * max_percent_idle_processors) && ((!increase_parallelism_in_new_superstep) || - get_nr_parallelizable_nodes(instance, stale, - nr_old_ready_nodes_per_type, - nr_ready_stale_nodes_per_type[(supstepIdx + 1) % stale], - procReady[(supstepIdx + 1) % stale], - nr_procs_per_type) >= - std::min(std::min(P, static_cast(1.2 * (P - free))), - P - free + static_cast(0.5 * free)))) { + get_nr_parallelizable_nodes( + instance, stale, nr_old_ready_nodes_per_type, + nr_ready_stale_nodes_per_type[(supstepIdx + 1) % stale], + procReady[(supstepIdx + 1) % stale], + nr_procs_per_type) >= std::min( + std::min(P, static_cast(1.2 * (P - free))), + P - free + static_cast(0.5 * free)))) + { endSupStep = true; } } @@ -572,14 +625,15 @@ class GreedyVarianceSspScheduler : public Scheduler { return RETURN_STATUS::OSP_SUCCESS; } + RETURN_STATUS computeSchedule(BspSchedule &schedule) override { + return computeSspSchedule(schedule, 1U); + } - - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - // Default SSP = 1 (no staleness) or maybe configurable later - return computeSspSchedule(schedule, /*stale=*/1); + RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) { + return computeSspSchedule(schedule, 2U); } - virtual std::string getScheduleName() const override { + std::string getScheduleName() const override { if constexpr (use_memory_constraint) { return "GreedyVarianceSspMemory"; } else { diff --git a/include/osp/bsp/scheduler/MaxBspScheduler.hpp b/include/osp/bsp/scheduler/MaxBspScheduler.hpp new file mode 100644 index 00000000..76f525f7 --- /dev/null +++ b/include/osp/bsp/scheduler/MaxBspScheduler.hpp @@ -0,0 +1,87 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include "osp/bsp/model/MaxBspSchedule.hpp" +#include "osp/bsp/model/MaxBspScheduleCS.hpp" +#include "osp/bsp/scheduler/Scheduler.hpp" + +namespace osp { + +/** + * @class Scheduler + * @brief Abstract base class for scheduling scheduler. + * + * The Scheduler class provides a common interface for scheduling scheduler in the BSP scheduling system. + * It defines methods for setting and getting the time limit, as well as computing schedules. + */ +template +class MaxBspScheduler : public Scheduler { + + static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); + + /** + * @brief Get the name of the scheduling algorithm. + * @return The name of the scheduling algorithm. + */ + virtual std::string getScheduleName() const override = 0; + + /** + * @brief Compute a BSP schedule for the given BSP instance. + * @param instance The BSP instance for which to compute the schedule. + * @return A pair containing the return status and the computed schedule. + */ + virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { + MaxBspSchedule tmpSched(schedule.getInstance()); + RETURN_STATUS status = computeSchedule(tmpSched); + schedule = tmpSched; + return status; + } + + virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) { + + auto result = computeSchedule(schedule); + if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) { + schedule.setAutoCommunicationSchedule(); + return result; + } else { + return RETURN_STATUS::ERROR; + } + } + + /** + * @brief Compute a BSP schedule for the given BSP instance. + * @param instance The BSP instance for which to compute the schedule. + * @return A pair containing the return status and the computed schedule. + */ + virtual RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) = 0; + + virtual RETURN_STATUS computeScheduleCS(MaxBspScheduleCS &schedule) { +// Fix me todo + auto result = computeSchedule(schedule); + if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) { + // schedule.setAutoCommunicationSchedule(); + return result; + } else { + return RETURN_STATUS::ERROR; + } + } +}; + +} // namespace osp \ No newline at end of file diff --git a/tests/bsp_schedulers.cpp b/tests/bsp_schedulers.cpp index 94a9161b..d05b3b66 100644 --- a/tests/bsp_schedulers.cpp +++ b/tests/bsp_schedulers.cpp @@ -38,6 +38,7 @@ limitations under the License. #include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" #include "osp/bsp/scheduler/Serial.hpp" +#include "osp/bsp/scheduler/MaxBspScheduler.hpp" #include "osp/coarser/Sarkar/SarkarMul.hpp" #include "osp/coarser/SquashA/SquashAMul.hpp" #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" @@ -210,12 +211,6 @@ BOOST_AUTO_TEST_CASE(variancefillup_test) { run_test(&test); } -BOOST_AUTO_TEST_CASE(greedyvariancesspscheduler_test){ - GreedyVarianceSspScheduler test; - run_test(&test); -} - - BOOST_AUTO_TEST_CASE(etf_test_edge_desc_impl) { EtfScheduler test; @@ -367,4 +362,35 @@ BOOST_AUTO_TEST_CASE(SarkarMul_improver_test) { MultilevelCoarseAndSchedule coarsen_test(sched, improver, ml_coarsen); run_test(&coarsen_test); +} + +// Tests computeSchedule(BspSchedule&) → staleness = 1 +BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_vector_impl) { + GreedyVarianceSspScheduler test; + run_test(&test); +} + +// Tests computeSchedule(BspSchedule&) → staleness = 1 (different graph impl) +BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_edge_idx_impl) { + GreedyVarianceSspScheduler test; + run_test(&test); +} + +// Tests computeSchedule(MaxBspSchedule&) → staleness = 2 +BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_MaxBspSchedule_integration) { + using Graph_t = computational_dag_edge_idx_vector_impl_def_int_t; + BspInstance instance; + instance.setNumberOfProcessors(2); + auto &dag = instance.getComputationalDag(); + dag.add_vertex(5, 1, 0); + dag.add_vertex(3, 1, 0); + dag.add_edge(0, 1); + + GreedyVarianceSspScheduler scheduler; + MaxBspSchedule schedule(instance); + const auto result = scheduler.computeSchedule(schedule); + + BOOST_CHECK_EQUAL(result, RETURN_STATUS::OSP_SUCCESS); + BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.numberOfSupersteps() >= 2); } \ No newline at end of file From ad3bad478b6793e94483e3f83c2e0b75457d31c1 Mon Sep 17 00:00:00 2001 From: Christos Konstantinos Matzoros Date: Wed, 22 Oct 2025 15:07:35 +0200 Subject: [PATCH 3/6] Fixing bugs --- .../GreedyVarianceSspScheduler.hpp | 60 ++++++++--------- include/osp/bsp/scheduler/Scheduler.hpp | 21 ++++++ tests/bsp_schedulers.cpp | 65 ++++++++++++++----- 3 files changed, 100 insertions(+), 46 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp index 2a8b8d99..8a455347 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp @@ -166,8 +166,6 @@ class GreedyVarianceSspScheduler : public Scheduler { p = i; found_allocation = true; - procReady[i].erase(it); - if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) { const auto &compatibleTypes = procTypesCompatibleWithNodeType_skip_proctype[procType] @@ -191,8 +189,6 @@ class GreedyVarianceSspScheduler : public Scheduler { p = i; found_allocation = true; - procReady[i].erase(it); - if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) { const auto &compatibleTypes = procTypesCompatibleWithNodeType_skip_proctype[procType] @@ -243,8 +239,6 @@ class GreedyVarianceSspScheduler : public Scheduler { node = it->first; p = i; - readyList.erase(it); - const auto &compatibleTypes = procTypesCompatibleWithNodeType_skip_proctype[procType] [instance.getComputationalDag().vertex_type(node)]; @@ -260,8 +254,6 @@ class GreedyVarianceSspScheduler : public Scheduler { node = it->first; p = i; - readyList.erase(it); - const auto &compatibleTypes = procTypesCompatibleWithNodeType_skip_proctype[procType] [instance.getComputationalDag().vertex_type(node)]; @@ -270,11 +262,10 @@ class GreedyVarianceSspScheduler : public Scheduler { if (otherType < allReady.size()) allReady[otherType].erase(std::make_pair(node, work_variance[node])); } - + return; } } - ++it; } } @@ -364,20 +355,26 @@ class GreedyVarianceSspScheduler : public Scheduler { return nr_nodes; } - public: - RETURN_STATUS computeSspSchedule(BspSchedule &schedule, unsigned stale) { + /** + * @brief Default constructor for GreedyVarianceSspScheduler. + */ + GreedyVarianceSspScheduler(float max_percent_idle_processors_ = 0.2f, bool increase_parallelism_in_new_superstep_ = true) + : max_percent_idle_processors(max_percent_idle_processors_), + increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {} + /** + * @brief Default destructor for GreedyVarianceSspScheduler. + */ + virtual ~GreedyVarianceSspScheduler() = default; + + RETURN_STATUS computeSspSchedule(BspSchedule &schedule, unsigned stale) { const auto &instance = schedule.getInstance(); const auto &G = instance.getComputationalDag(); - const auto &N = instance.numberOfVertices(); + const VertexType &N = instance.numberOfVertices(); const unsigned &P = instance.numberOfProcessors(); - for (auto v : G.vertices()) { - schedule.setAssignedProcessor(v, std::numeric_limits::max()); - } - unsigned supstepIdx = 0; if constexpr (is_memory_constraint_v) { @@ -407,10 +404,13 @@ class GreedyVarianceSspScheduler : public Scheduler { ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; } - std::vector nrPredecRemain(N); - for (VertexType node = 0; node < static_cast(N); ++node) { + std::vector nrPredecRemain(N); + + for (VertexType node = 0; node < N; ++node) { const auto num_parents = G.in_degree(node); - nrPredecRemain[node] = static_cast(num_parents); + + nrPredecRemain[node] = num_parents; + if (num_parents == 0) { ready[0].insert(std::make_pair(node, work_variances[node])); nr_ready_stale_nodes_per_type[0][G.vertex_type(node)]++; @@ -426,9 +426,6 @@ class GreedyVarianceSspScheduler : public Scheduler { std::vector number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0); std::vector limit_of_number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - - - bool endSupStep = true; bool begin_outer_while = true; bool able_to_schedule_in_step = false; @@ -478,12 +475,11 @@ class GreedyVarianceSspScheduler : public Scheduler { } for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { - unsigned equal_split = (static_cast(allReady[procType].size()) + stale - 1) / stale; + unsigned equal_split = (static_cast(allReady[procType].size()) + stale - 1) / stale; unsigned at_least_for_long_step = 3 * nr_procs_per_type[procType]; - limit_of_number_of_allocated_allReady_tasks_in_superstep[procType] = std::max(at_least_for_long_step, equal_split); } - + endSupStep = false; finishTimes.emplace(0, std::numeric_limits::max()); } @@ -508,8 +504,7 @@ class GreedyVarianceSspScheduler : public Scheduler { unsigned earliest_add = supstepIdx; for (const auto& pred : G.parents(succ)) { if (schedule.assignedProcessor(pred) != proc_of_node) { - earliest_add = std::max(earliest_add, - stale + schedule.assignedSuperstep(pred)); + earliest_add = std::max(earliest_add, stale + schedule.assignedSuperstep(pred)); } } @@ -542,6 +537,7 @@ class GreedyVarianceSspScheduler : public Scheduler { if (!CanChooseNode(instance, allReady, procReady[supstepIdx % stale], procFree)) { endSupStep = true; } + while (CanChooseNode(instance, allReady, procReady[supstepIdx % stale], procFree)) { VertexType nextNode = std::numeric_limits::max(); unsigned nextProc = P; @@ -567,6 +563,7 @@ class GreedyVarianceSspScheduler : public Scheduler { nr_old_ready_nodes_per_type[G.vertex_type(nextNode)]--; const unsigned nextProcType = instance.getArchitecture().processorType(nextProc); number_of_allocated_allReady_tasks_in_superstep[nextProcType]++; + if (number_of_allocated_allReady_tasks_in_superstep[nextProcType] >= limit_of_number_of_allocated_allReady_tasks_in_superstep[nextProcType]) { allReady[nextProcType].clear(); } @@ -575,6 +572,7 @@ class GreedyVarianceSspScheduler : public Scheduler { for (size_t i = 0; i < stale; i++) { ready[i].erase(std::make_pair(nextNode, work_variances[nextNode])); } + old_ready.erase(std::make_pair(nextNode, work_variances[nextNode])); schedule.setAssignedProcessor(nextNode, nextProc); @@ -605,7 +603,7 @@ class GreedyVarianceSspScheduler : public Scheduler { else if (++successive_empty_supersteps > 100 + stale) return RETURN_STATUS::ERROR; - if (free > static_cast(P * max_percent_idle_processors) && + if (free > (P * max_percent_idle_processors) && ((!increase_parallelism_in_new_superstep) || get_nr_parallelizable_nodes( instance, stale, nr_old_ready_nodes_per_type, @@ -626,10 +624,12 @@ class GreedyVarianceSspScheduler : public Scheduler { } RETURN_STATUS computeSchedule(BspSchedule &schedule) override { + std::cout<< "BspSchedule"<< std::endl; return computeSspSchedule(schedule, 1U); } - RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) { + RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) override { + std::cout<< "MaxBspSchedule"<< std::endl; return computeSspSchedule(schedule, 2U); } diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp index dd06d187..2b275271 100644 --- a/include/osp/bsp/scheduler/Scheduler.hpp +++ b/include/osp/bsp/scheduler/Scheduler.hpp @@ -22,6 +22,7 @@ limitations under the License. #include #include "osp/bsp/model/BspInstance.hpp" #include "osp/bsp/model/BspSchedule.hpp" +#include "osp/bsp/model/MaxBspSchedule.hpp" #include "osp/bsp/model/BspScheduleCS.hpp" #include "osp/concepts/computational_dag_concept.hpp" @@ -93,6 +94,26 @@ class Scheduler { */ virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) = 0; + /** + * @brief Compute a Max-BSP (Stale Synchronous Parallel) schedule for the given instance. + * + * This overload allows schedulers that support the Max-BSP or SSP model + * to implement asynchronous-like scheduling with configurable staleness. + * By default, this base implementation throws an exception, since not all + * schedulers provide Max-BSP support. Derived schedulers such as + * GreedyVarianceSspScheduler should override this function to perform + * their custom SSP scheduling logic. + * + * @param schedule Reference to a MaxBspSchedule object representing + * the schedule to be computed and populated. + * @return A RETURN_STATUS code indicating the success or failure of the computation. + * @throws std::runtime_error If the scheduler does not implement Max-BSP scheduling. + */ + virtual RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) { + (void)schedule; + throw std::runtime_error("Not implemented for this scheduler"); + } + virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) { auto result = computeSchedule(schedule); diff --git a/tests/bsp_schedulers.cpp b/tests/bsp_schedulers.cpp index d05b3b66..22e3e805 100644 --- a/tests/bsp_schedulers.cpp +++ b/tests/bsp_schedulers.cpp @@ -149,6 +149,51 @@ void run_test_2(Scheduler *test_scheduler) { } }; +template +void run_test_max_bsp(Scheduler* test_scheduler) { + std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenames_architectures = test_architectures(); + + // Locate project root + std::filesystem::path cwd = std::filesystem::current_path(); + while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { + cwd = cwd.parent_path(); + } + + for (auto& filename_graph : filenames_graph) { + for (auto& filename_machine : filenames_architectures) { + std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); + name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); + name_machine = name_machine.substr(0, name_machine.rfind(".")); + + std::cout << std::endl + << "Scheduler (MaxBsp): " << test_scheduler->getScheduleName() << std::endl + << "Graph: " << name_graph << std::endl + << "Architecture: " << name_machine << std::endl; + + computational_dag_edge_idx_vector_impl_def_int_t graph; + BspArchitecture arch; + + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph); + bool status_architecture = + file_reader::readBspArchitecture((cwd / filename_machine).string(), arch); + + BOOST_REQUIRE_MESSAGE(status_graph, "Failed to read graph: " << filename_graph); + BOOST_REQUIRE_MESSAGE(status_architecture, "Failed to read architecture: " << filename_machine); + + BspInstance instance(graph, arch); + + MaxBspSchedule schedule(instance); + + const auto result = test_scheduler->computeSchedule(schedule); + + BOOST_CHECK_EQUAL(result, RETURN_STATUS::OSP_SUCCESS); + BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + } + } +} + BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test) { GreedyBspScheduler test; @@ -377,20 +422,8 @@ BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_edge_idx_impl) { } // Tests computeSchedule(MaxBspSchedule&) → staleness = 2 -BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_MaxBspSchedule_integration) { +BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_MaxBspSchedule_large_test) { using Graph_t = computational_dag_edge_idx_vector_impl_def_int_t; - BspInstance instance; - instance.setNumberOfProcessors(2); - auto &dag = instance.getComputationalDag(); - dag.add_vertex(5, 1, 0); - dag.add_vertex(3, 1, 0); - dag.add_edge(0, 1); - - GreedyVarianceSspScheduler scheduler; - MaxBspSchedule schedule(instance); - const auto result = scheduler.computeSchedule(schedule); - - BOOST_CHECK_EQUAL(result, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.numberOfSupersteps() >= 2); -} \ No newline at end of file + GreedyVarianceSspScheduler test; + run_test_max_bsp(&test); +} From 66a2e9305799bb02d93977a1e9c7185621c4dd3f Mon Sep 17 00:00:00 2001 From: Christos Konstantinos Matzoros Date: Mon, 17 Nov 2025 15:02:42 +0100 Subject: [PATCH 4/6] Fixes --- include/osp/bsp/model/BspSchedule.hpp | 2 +- .../GreedyVarianceSspScheduler.hpp | 10 +- include/osp/bsp/scheduler/MaxBspScheduler.hpp | 11 +- include/osp/bsp/scheduler/Scheduler.hpp | 20 --- tests/CMakeLists.txt | 2 + tests/bsp_schedulers.cpp | 67 +------- tests/max_bsp_schedulers.cpp | 151 ++++++++++++++++++ 7 files changed, 164 insertions(+), 99 deletions(-) create mode 100644 tests/max_bsp_schedulers.cpp diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp index b30cb26c..299e1716 100644 --- a/include/osp/bsp/model/BspSchedule.hpp +++ b/include/osp/bsp/model/BspSchedule.hpp @@ -65,7 +65,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval node_to_processor_assignment; - std::vector node_to_superstep_assignment; + std::vector node_to_superstep_assignment; template inline bool satisfies_precedence_constraints_staleness() const { diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp index 8a455347..0ac5c041 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp @@ -31,9 +31,7 @@ limitations under the License. #include "MemoryConstraintModules.hpp" #include "osp/auxiliary/misc.hpp" -#include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/model/MaxBspSchedule.hpp" -#include "osp/bsp/scheduler/Scheduler.hpp" +#include "osp/bsp/scheduler/MaxBspScheduler.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" namespace osp { @@ -45,7 +43,7 @@ namespace osp { * It computes schedules for BspInstance using variance-based priorities. */ template -class GreedyVarianceSspScheduler : public Scheduler { +class GreedyVarianceSspScheduler : public MaxBspScheduler { static_assert(is_computational_dag_v, "GreedyVarianceSspScheduler can only be used with computational DAGs."); @@ -111,7 +109,7 @@ class GreedyVarianceSspScheduler : public Scheduler { struct VarianceCompare { bool operator()(const std::pair &lhs, const std::pair &rhs) const { - return ((lhs.second > rhs.second) || ((lhs.second == rhs.second) && (lhs.first < rhs.first))); + return ((lhs.second > rhs.second) || ((lhs.second >= rhs.second) && (lhs.first < rhs.first))); } }; @@ -624,12 +622,10 @@ class GreedyVarianceSspScheduler : public Scheduler { } RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - std::cout<< "BspSchedule"<< std::endl; return computeSspSchedule(schedule, 1U); } RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) override { - std::cout<< "MaxBspSchedule"<< std::endl; return computeSspSchedule(schedule, 2U); } diff --git a/include/osp/bsp/scheduler/MaxBspScheduler.hpp b/include/osp/bsp/scheduler/MaxBspScheduler.hpp index 76f525f7..c6accf25 100644 --- a/include/osp/bsp/scheduler/MaxBspScheduler.hpp +++ b/include/osp/bsp/scheduler/MaxBspScheduler.hpp @@ -33,6 +33,7 @@ namespace osp { */ template class MaxBspScheduler : public Scheduler { + public: static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); @@ -54,10 +55,11 @@ class MaxBspScheduler : public Scheduler { return status; } - virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) { - - auto result = computeSchedule(schedule); + virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) override { + MaxBspScheduleCS tmpSchedule(schedule.getInstance()); + auto result = computeScheduleCS(tmpSchedule); if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) { + schedule = tmpSchedule; schedule.setAutoCommunicationSchedule(); return result; } else { @@ -73,7 +75,6 @@ class MaxBspScheduler : public Scheduler { virtual RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) = 0; virtual RETURN_STATUS computeScheduleCS(MaxBspScheduleCS &schedule) { -// Fix me todo auto result = computeSchedule(schedule); if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) { // schedule.setAutoCommunicationSchedule(); @@ -81,7 +82,7 @@ class MaxBspScheduler : public Scheduler { } else { return RETURN_STATUS::ERROR; } - } + }; }; } // namespace osp \ No newline at end of file diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp index 2b275271..092d9946 100644 --- a/include/osp/bsp/scheduler/Scheduler.hpp +++ b/include/osp/bsp/scheduler/Scheduler.hpp @@ -94,26 +94,6 @@ class Scheduler { */ virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) = 0; - /** - * @brief Compute a Max-BSP (Stale Synchronous Parallel) schedule for the given instance. - * - * This overload allows schedulers that support the Max-BSP or SSP model - * to implement asynchronous-like scheduling with configurable staleness. - * By default, this base implementation throws an exception, since not all - * schedulers provide Max-BSP support. Derived schedulers such as - * GreedyVarianceSspScheduler should override this function to perform - * their custom SSP scheduling logic. - * - * @param schedule Reference to a MaxBspSchedule object representing - * the schedule to be computed and populated. - * @return A RETURN_STATUS code indicating the success or failure of the computation. - * @throws std::runtime_error If the scheduler does not implement Max-BSP scheduling. - */ - virtual RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) { - (void)schedule; - throw std::runtime_error("Not implemented for this scheduler"); - } - virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) { auto result = computeSchedule(schedule); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 17975dde..8738643f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -122,6 +122,8 @@ endif() _add_test( bsp_schedulers ) +_add_test( max_bsp_schedulers ) + _add_test( bsp_schedulers_mem_const ) _add_test( bsp_improvementschedulers ) diff --git a/tests/bsp_schedulers.cpp b/tests/bsp_schedulers.cpp index 22e3e805..215e4e6b 100644 --- a/tests/bsp_schedulers.cpp +++ b/tests/bsp_schedulers.cpp @@ -38,7 +38,6 @@ limitations under the License. #include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" #include "osp/bsp/scheduler/Serial.hpp" -#include "osp/bsp/scheduler/MaxBspScheduler.hpp" #include "osp/coarser/Sarkar/SarkarMul.hpp" #include "osp/coarser/SquashA/SquashAMul.hpp" #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" @@ -149,51 +148,6 @@ void run_test_2(Scheduler *test_scheduler) { } }; -template -void run_test_max_bsp(Scheduler* test_scheduler) { - std::vector filenames_graph = tiny_spaa_graphs(); - std::vector filenames_architectures = test_architectures(); - - // Locate project root - std::filesystem::path cwd = std::filesystem::current_path(); - while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { - cwd = cwd.parent_path(); - } - - for (auto& filename_graph : filenames_graph) { - for (auto& filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); - - std::cout << std::endl - << "Scheduler (MaxBsp): " << test_scheduler->getScheduleName() << std::endl - << "Graph: " << name_graph << std::endl - << "Architecture: " << name_machine << std::endl; - - computational_dag_edge_idx_vector_impl_def_int_t graph; - BspArchitecture arch; - - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph); - bool status_architecture = - file_reader::readBspArchitecture((cwd / filename_machine).string(), arch); - - BOOST_REQUIRE_MESSAGE(status_graph, "Failed to read graph: " << filename_graph); - BOOST_REQUIRE_MESSAGE(status_architecture, "Failed to read architecture: " << filename_machine); - - BspInstance instance(graph, arch); - - MaxBspSchedule schedule(instance); - - const auto result = test_scheduler->computeSchedule(schedule); - - BOOST_CHECK_EQUAL(result, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - } - } -} - BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test) { GreedyBspScheduler test; @@ -407,23 +361,4 @@ BOOST_AUTO_TEST_CASE(SarkarMul_improver_test) { MultilevelCoarseAndSchedule coarsen_test(sched, improver, ml_coarsen); run_test(&coarsen_test); -} - -// Tests computeSchedule(BspSchedule&) → staleness = 1 -BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_vector_impl) { - GreedyVarianceSspScheduler test; - run_test(&test); -} - -// Tests computeSchedule(BspSchedule&) → staleness = 1 (different graph impl) -BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_edge_idx_impl) { - GreedyVarianceSspScheduler test; - run_test(&test); -} - -// Tests computeSchedule(MaxBspSchedule&) → staleness = 2 -BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_MaxBspSchedule_large_test) { - using Graph_t = computational_dag_edge_idx_vector_impl_def_int_t; - GreedyVarianceSspScheduler test; - run_test_max_bsp(&test); -} +} \ No newline at end of file diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp new file mode 100644 index 00000000..dbbd1da2 --- /dev/null +++ b/tests/max_bsp_schedulers.cpp @@ -0,0 +1,151 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#define BOOST_TEST_MODULE BSP_SCHEDULERS +#include + +#include +#include +#include + + +#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp" +#include "osp/bsp/scheduler/Serial.hpp" +#include "osp/bsp/scheduler/MaxBspScheduler.hpp" +#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" +#include "osp/auxiliary/io/general_file_reader.hpp" +#include "test_graphs.hpp" + +using namespace osp; + +std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } + +template +void run_test(Scheduler *test_scheduler) { + // static_assert(std::is_base_of::value, "Class is not a scheduler!"); + std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenames_architectures = test_architectures(); + + // Getting root git directory + std::filesystem::path cwd = std::filesystem::current_path(); + std::cout << cwd << std::endl; + while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { + cwd = cwd.parent_path(); + std::cout << cwd << std::endl; + } + + for (auto &filename_graph : filenames_graph) { + for (auto &filename_machine : filenames_architectures) { + std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); + name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); + name_machine = name_machine.substr(0, name_machine.rfind(".")); + + std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; + std::cout << "Graph: " << name_graph << std::endl; + std::cout << "Architecture: " << name_machine << std::endl; + + BspInstance instance; + + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), + instance.getComputationalDag()); + bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), + instance.getArchitecture()); + + if (!status_graph || !status_architecture) { + + std::cout << "Reading files failed." << std::endl; + BOOST_CHECK(false); + } + + BspSchedule schedule(instance); + const auto result = test_scheduler->computeSchedule(schedule); + + BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); + BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + } + } +}; + +template +void run_test_max_bsp(MaxBspScheduler* test_scheduler) { + std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenames_architectures = test_architectures(); + + // Locate project root + std::filesystem::path cwd = std::filesystem::current_path(); + while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { + cwd = cwd.parent_path(); + } + + for (auto& filename_graph : filenames_graph) { + for (auto& filename_machine : filenames_architectures) { + std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); + name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); + name_machine = name_machine.substr(0, name_machine.rfind(".")); + + std::cout << std::endl + << "Scheduler (MaxBsp): " << test_scheduler->getScheduleName() << std::endl + << "Graph: " << name_graph << std::endl + << "Architecture: " << name_machine << std::endl; + + computational_dag_edge_idx_vector_impl_def_int_t graph; + BspArchitecture arch; + + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph); + bool status_architecture = + file_reader::readBspArchitecture((cwd / filename_machine).string(), arch); + + BOOST_REQUIRE_MESSAGE(status_graph, "Failed to read graph: " << filename_graph); + BOOST_REQUIRE_MESSAGE(status_architecture, "Failed to read architecture: " << filename_machine); + + BspInstance instance(graph, arch); + + MaxBspSchedule schedule(instance); + + const auto result = test_scheduler->computeSchedule(schedule); + + BOOST_CHECK_EQUAL(result, RETURN_STATUS::OSP_SUCCESS); + BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + } + } +} + +// Tests computeSchedule(BspSchedule&) → staleness = 1 +BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_vector_impl) { + GreedyVarianceSspScheduler test; + run_test(&test); +} + +// Tests computeSchedule(BspSchedule&) → staleness = 1 (different graph impl) +BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_edge_idx_impl) { + GreedyVarianceSspScheduler test; + run_test(&test); +} + +// Tests computeSchedule(MaxBspSchedule&) → staleness = 2 +BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_MaxBspSchedule_large_test) { + GreedyVarianceSspScheduler test; + run_test_max_bsp(&test); +} From c998da845dcc8bba0728ed677216d8070ca31777 Mon Sep 17 00:00:00 2001 From: Christos Konstantinos Matzoros Date: Mon, 17 Nov 2025 15:29:36 +0100 Subject: [PATCH 5/6] Fix cast warning --- .../GreedySchedulers/GreedyVarianceSspScheduler.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp index 0ac5c041..7a6c454d 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp @@ -75,7 +75,14 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } temp = std::log(temp) / 2 + max_priority; - double node_weight = std::log((double)std::max(graph.vertex_work_weight(*r_iter), static_cast>(1))); + double node_weight = std::log( + static_cast( + std::max( + graph.vertex_work_weight(*r_iter), + static_cast>(1) + ) + ) + ); double larger_val = node_weight > temp ? node_weight : temp; work_variance[*r_iter] = From a2ed355d6238b88c1ca9a4936805faf56d2bfdeb Mon Sep 17 00:00:00 2001 From: Christos Konstantinos Matzoros Date: Mon, 17 Nov 2025 17:37:39 +0100 Subject: [PATCH 6/6] Minor fixes --- include/osp/bsp/scheduler/Scheduler.hpp | 1 - tests/bsp_schedulers.cpp | 1 - tests/max_bsp_schedulers.cpp | 4 +--- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp index 092d9946..dd06d187 100644 --- a/include/osp/bsp/scheduler/Scheduler.hpp +++ b/include/osp/bsp/scheduler/Scheduler.hpp @@ -22,7 +22,6 @@ limitations under the License. #include #include "osp/bsp/model/BspInstance.hpp" #include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/model/MaxBspSchedule.hpp" #include "osp/bsp/model/BspScheduleCS.hpp" #include "osp/concepts/computational_dag_concept.hpp" diff --git a/tests/bsp_schedulers.cpp b/tests/bsp_schedulers.cpp index 215e4e6b..936bad69 100644 --- a/tests/bsp_schedulers.cpp +++ b/tests/bsp_schedulers.cpp @@ -33,7 +33,6 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp" #include "osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp" #include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp index dbbd1da2..a3a9aeda 100644 --- a/tests/max_bsp_schedulers.cpp +++ b/tests/max_bsp_schedulers.cpp @@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Christos Matzoros, Pal Andras Papp, Raphael S. Steiner */ #define BOOST_TEST_MODULE BSP_SCHEDULERS @@ -26,9 +26,7 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp" -#include "osp/bsp/scheduler/Serial.hpp" #include "osp/bsp/scheduler/MaxBspScheduler.hpp" -#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/auxiliary/io/arch_file_reader.hpp"