diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp index e7b2ead1..0af27959 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp @@ -72,18 +72,22 @@ class GreedyChildren : public Scheduler { unsigned processor_to_be_allocated = 0; for (const auto &par : graph.parents(node)) { - if (processor_set && - (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend()) && - (sched.assignedProcessor(par) != processor_to_be_allocated)) { - failed_to_allocate = true; - break; - } - if ((!processor_set) && - (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend())) { - processor_set = true; - processor_to_be_allocated = sched.assignedProcessor(par); + if (nodes_assigned_this_superstep.count(par)) { + if (!processor_set) { + const unsigned par_proc = sched.assignedProcessor(par); + if(!instance.isCompatible(node, par_proc)) { + failed_to_allocate = true; + break; + } + processor_set = true; + processor_to_be_allocated = par_proc; + } else if (sched.assignedProcessor(par) != processor_to_be_allocated) { + failed_to_allocate = true; + break; + } } } + if (failed_to_allocate) continue; @@ -91,12 +95,18 @@ class GreedyChildren : public Scheduler { if (processor_set) { sched.setAssignedProcessor(node, processor_to_be_allocated); } else { - - auto min_iter = std::min_element(processor_weights.begin(), processor_weights.end()); - assert(std::distance(processor_weights.begin(), min_iter) >= 0); - sched.setAssignedProcessor( - node, static_cast(std::distance(processor_weights.begin(), min_iter))); - } + v_workw_t min_weight = std::numeric_limits>::max(); + unsigned best_proc = std::numeric_limits::max(); + for (unsigned p = 0; p < instance.numberOfProcessors(); ++p) { + if (instance.isCompatible(node, p)) { + if (processor_weights[p] < min_weight) { + min_weight = processor_weights[p]; + best_proc = p; + } + } + } + sched.setAssignedProcessor(node, best_proc); + } nodes_assigned_this_superstep.emplace(node); processor_weights[sched.assignedProcessor(node)] += graph.vertex_work_weight(node); diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp index df19a347..c0956ae2 100644 --- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp @@ -198,15 +198,15 @@ class IsomorphicSubgraphScheduler { std::sort(rep_subgraph_vertices_sorted.begin(), rep_subgraph_vertices_sorted.end()); BspInstance representative_instance; - create_induced_subgraph(instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted); - + auto rep_global_to_local_map = create_induced_subgraph_map(instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted); + representative_instance.setArchitecture(instance.getArchitecture()); - std::vector> dummy_mem_weights(sub_sched.node_assigned_worker_per_type[grou_idx].size(), 0); + std::vector> mem_weights(sub_sched.node_assigned_worker_per_type[grou_idx].size(), 0); for (unsigned proc_type = 0; proc_type < sub_sched.node_assigned_worker_per_type[grou_idx].size(); ++proc_type) { - dummy_mem_weights[proc_type] = static_cast>(instance.getArchitecture().maxMemoryBoundProcType(proc_type)); + mem_weights[proc_type] = static_cast>(instance.getArchitecture().maxMemoryBoundProcType(proc_type)); } const auto& procs_for_group = sub_sched.node_assigned_worker_per_type[grou_idx]; - representative_instance.getArchitecture().set_processors_consequ_types(procs_for_group, dummy_mem_weights); + representative_instance.getArchitecture().set_processors_consequ_types(procs_for_group, mem_weights); representative_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); // Schedule the representative to get the pattern @@ -215,7 +215,20 @@ class IsomorphicSubgraphScheduler { if constexpr (verbose) { std::cout << "--- Scheduling representative for group " << grou_idx << " ---" << std::endl; std::cout << " Number of subgraphs in group: " << group.subgraphs.size() << std::endl; - std::cout << " Representative subgraph size: " << rep_subgraph_vertices_sorted.size() << " vertices" << std::endl; + const auto& rep_dag = representative_instance.getComputationalDag(); + std::cout << " Representative subgraph size: " << rep_dag.num_vertices() << " vertices" << std::endl; + std::vector node_type_counts(rep_dag.num_vertex_types(), 0); + for (const auto& v : rep_dag.vertices()) { + node_type_counts[rep_dag.vertex_type(v)]++; + } + std::cout << " Node type counts: "; + for (size_t type_idx = 0; type_idx < node_type_counts.size(); ++type_idx) { + if (node_type_counts[type_idx] > 0) { + std::cout << "T" << type_idx << ":" << node_type_counts[type_idx] << " "; + } + } + std::cout << std::endl; + const auto& sub_arch = representative_instance.getArchitecture(); std::cout << " Sub-architecture for scheduling:" << std::endl; std::cout << " Processors: " << sub_arch.numberOfProcessors() << std::endl; @@ -226,9 +239,45 @@ class IsomorphicSubgraphScheduler { } std::cout << std::endl; std::cout << " Sync cost: " << sub_arch.synchronisationCosts() << ", Comm cost: " << sub_arch.communicationCosts() << std::endl; + std::cout << " Sub-problem compatibility matrix:" << std::endl; + const auto & sub_comp_matrix = representative_instance.getNodeNodeCompatabilityMatrix(); + for(unsigned i = 0; i < sub_comp_matrix.size(); ++i) { + std::cout << " Node Type " << i << ": [ "; + for (unsigned j = 0; j < sub_comp_matrix[i].size(); ++j) { + std::cout << (sub_comp_matrix[i][j] ? "1" : "0") << " "; + } + std::cout << "]" << std::endl; + } + } bsp_scheduler_->computeSchedule(bsp_schedule); + if constexpr (verbose) { + std::cout << " Schedule satisfies precedence constraints: "; + std::cout << bsp_schedule.satisfiesPrecedenceConstraints() << std::endl; + std::cout << " Schedule satisfies node type constraints: "; + std::cout << bsp_schedule.satisfiesNodeTypeConstraints() << std::endl; + } + + + if (plot_dot_graphs_) { + const auto& rep_dag = bsp_schedule.getInstance().getComputationalDag(); + std::vector colors(rep_dag.num_vertices()); + std::map, unsigned> proc_ss_to_color; + unsigned next_color = 0; + + for (const auto& v : rep_dag.vertices()) { + const auto assignment = std::make_pair(bsp_schedule.assignedProcessor(v), bsp_schedule.assignedSuperstep(v)); + if (proc_ss_to_color.find(assignment) == proc_ss_to_color.end()) { + proc_ss_to_color[assignment] = next_color++; + } + colors[v] = proc_ss_to_color[assignment]; + } + DotFileWriter writer; + writer.write_colored_graph("iso_group_rep_" + std::to_string(grou_idx) + ".dot", rep_dag, colors); + writer.write_schedule("iso_group_rep_schedule_" + std::to_string(grou_idx) + ".dot", bsp_schedule); + } + // Build data structures for applying the pattern --- // Map (superstep, processor) -> relative partition ID std::map, vertex_idx_t> sp_proc_to_relative_partition; @@ -252,13 +301,11 @@ class IsomorphicSubgraphScheduler { std::unordered_map, vertex_idx_t> current_vertex_to_rep_local_idx; if (i == 0) { // The first subgraph is the representative itself - for (size_t j = 0; j < rep_subgraph_vertices_sorted.size(); ++j) { - current_vertex_to_rep_local_idx[rep_subgraph_vertices_sorted[j]] = static_cast>(j); - } + current_vertex_to_rep_local_idx = std::move(rep_global_to_local_map); } else { // For other subgraphs, build the isomorphic mapping Constr_Graph_t current_subgraph_graph; create_induced_subgraph(instance.getComputationalDag(), current_subgraph_graph, current_subgraph_vertices_sorted); - + MerkleHashComputer current_hasher(current_subgraph_graph); for(const auto& [hash, rep_orbit_nodes] : rep_hasher.get_orbits()) { diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp new file mode 100644 index 00000000..07a9d241 --- /dev/null +++ b/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp @@ -0,0 +1,184 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "MerkleHashComputer.hpp" +#include "osp/graph_algorithms/directed_graph_util.hpp" + +namespace osp { + +/** + * @brief Finds a correct isomorphic mapping between a known "representative" + * subgraph and a new "current" subgraph, assuming they are isomorphic. + * + * This class uses a backtracking algorithm pruned by Merkle hashes to + * efficiently find the vertex-to-vertex mapping. + * + * @tparam Graph_t The original graph type (for global vertex IDs). + * @tparam Constr_Graph_t The subgraph/contracted graph type. + */ +template +class IsomorphismMapper { + + using VertexC = vertex_idx_t; // Local vertex ID + using VertexG = vertex_idx_t; // Global vertex ID + + const Constr_Graph_t& rep_graph; + const MerkleHashComputer rep_hasher; + + public: + /** + * @brief Constructs an IsomorphismMapper. + * @param representative_graph The subgraph to use as the "pattern". + */ + IsomorphismMapper(const Constr_Graph_t& representative_graph) + : rep_graph(representative_graph), rep_hasher(representative_graph), + num_vertices(representative_graph.num_vertices()) {} + + virtual ~IsomorphismMapper() = default; + + /** + * @brief Finds the isomorphism between the representative graph and a new graph. + * + * This method assumes the two graphs are isomorphic and finds one such mapping. + * + * @param current_graph The new isomorphic subgraph. + * @return A map from `current_local_vertex_id` -> `representative_local_vertex_id`. + */ + std::unordered_map find_mapping(const Constr_Graph_t& current_graph) const { + if (current_graph.num_vertices() != num_vertices) { + throw std::runtime_error("IsomorphismMapper: Graph sizes do not match."); + } + if (num_vertices == 0) { + return {}; + } + + // 1. Compute hashes and orbits for the current graph. + MerkleHashComputer current_hasher(current_graph); + const auto& rep_orbits = rep_hasher.get_orbits(); + const auto& current_orbits = current_hasher.get_orbits(); + + // 2. Verify that the orbit structures are identical. + if (rep_orbits.size() != current_orbits.size()) { + throw std::runtime_error("IsomorphismMapper: Graphs have a different number of orbits."); + } + for (const auto& [hash, rep_orbit_nodes] : rep_orbits) { + auto it = current_orbits.find(hash); + if (it == current_orbits.end() || it->second.size() != rep_orbit_nodes.size()) { + throw std::runtime_error("IsomorphismMapper: Mismatched orbit structure between graphs."); + } + } + + // 3. Iteratively map all components of the graph. + std::vector map_current_to_rep(num_vertices, std::numeric_limits::max()); + std::vector rep_is_mapped(num_vertices, false); + std::vector current_is_mapped(num_vertices, false); + size_t mapped_count = 0; + + while (mapped_count < num_vertices) { + std::queue> q; + + // Find an unmapped vertex in the representative graph to seed the next component traversal. + VertexC rep_seed = std::numeric_limits::max(); + for (VertexC i = 0; i < num_vertices; ++i) { + if (!rep_is_mapped[i]) { + rep_seed = i; + break; + } + } + + if (rep_seed == std::numeric_limits::max()) break; // Should be unreachable if mapped_count < num_vertices + + // Find a corresponding unmapped vertex in the current graph's orbit. + const auto& candidates = current_orbits.at(rep_hasher.get_vertex_hash(rep_seed)); + VertexC current_seed = std::numeric_limits::max(); // Should always be found + for (const auto& candidate : candidates) { + if (!current_is_mapped[candidate]) { + current_seed = candidate; + break; + } + } + if (current_seed == std::numeric_limits::max()) { + throw std::runtime_error("IsomorphismMapper: Could not find an unmapped candidate to seed component mapping."); + } + + // Seed the queue and start the traversal for this component. + q.push({rep_seed, current_seed}); + map_current_to_rep[rep_seed] = current_seed; + rep_is_mapped[rep_seed] = true; + current_is_mapped[current_seed] = true; + mapped_count++; + + while (!q.empty()) { + auto [u_rep, u_curr] = q.front(); + q.pop(); + + // Match neighbors (both parents and children) + match_neighbors(current_graph, current_hasher, u_rep, u_curr, map_current_to_rep, rep_is_mapped, current_is_mapped, mapped_count, q, true); + match_neighbors(current_graph, current_hasher, u_rep, u_curr, map_current_to_rep, rep_is_mapped, current_is_mapped, mapped_count, q, false); + } + } + + if (mapped_count != num_vertices) { + throw std::runtime_error("IsomorphismMapper: Failed to map all vertices."); + } + + // 4. Return the inverted map. + std::unordered_map current_local_to_rep_local; + current_local_to_rep_local.reserve(num_vertices); + for (VertexC i = 0; i < num_vertices; ++i) current_local_to_rep_local[map_current_to_rep[i]] = i; + return current_local_to_rep_local; + } + +private: + const size_t num_vertices; + + void match_neighbors(const Constr_Graph_t& current_graph, const MerkleHashComputer& current_hasher, + VertexC u_rep, VertexC u_curr, std::vector& map_current_to_rep, + std::vector& rep_is_mapped, std::vector& current_is_mapped, + size_t& mapped_count, std::queue>& q, bool match_children) const { + + const auto& rep_neighbors_range = match_children ? rep_graph.children(u_rep) : rep_graph.parents(u_rep); + const auto& curr_neighbors_range = match_children ? current_graph.children(u_curr) : current_graph.parents(u_curr); + + for (const auto& v_rep : rep_neighbors_range) { + if (rep_is_mapped[v_rep]) continue; + + for (const auto& v_curr : curr_neighbors_range) { + if (current_is_mapped[v_curr]) continue; + + if (rep_hasher.get_vertex_hash(v_rep) == current_hasher.get_vertex_hash(v_curr)) { + map_current_to_rep[v_rep] = v_curr; + rep_is_mapped[v_rep] = true; + current_is_mapped[v_curr] = true; + mapped_count++; + q.push({v_rep, v_curr}); + break; // Found a match for v_rep, move to the next rep neighbor. + } + } + } + } +}; + +} // namespace osp \ No newline at end of file diff --git a/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp index 62cdd84c..3ba865c3 100644 --- a/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp +++ b/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp @@ -102,8 +102,8 @@ class MerkleHashComputer { inline const std::vector &get_vertex_hashes() const { return vertex_hashes; } inline std::size_t num_orbits() const { return orbits.size(); } - inline const std::vector &get_orbit(const VertexType &v) { return get_orbit_from_hash(get_vertex_hash(v)); } - inline const std::unordered_map> &get_orbits() { return orbits; } + inline const std::vector &get_orbit(const VertexType &v) const { return get_orbit_from_hash(get_vertex_hash(v)); } + inline const std::unordered_map> &get_orbits() const { return orbits; } inline const std::vector& get_orbit_from_hash(const std::size_t& hash) const { return orbits.at(hash); diff --git a/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp b/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp index edd1ed80..adf90bc3 100644 --- a/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp +++ b/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp @@ -145,21 +145,28 @@ class OrbitGraphProcessor { bool changed = true; while (changed) { - const std::vector< vertex_idx_t > vertexPoset = get_top_node_distance>(current_coarse_graph); + const std::vector< vertex_idx_t > vertexPoset = get_top_node_distance>(current_coarse_graph); + const std::vector< vertex_idx_t > vertexBotPoset = get_bottom_node_distance>(current_coarse_graph); + changed = false; for (const auto& edge : edges(current_coarse_graph)) { VertexType u = source(edge, current_coarse_graph); VertexType v = target(edge, current_coarse_graph); - if (vertexPoset[u] + 1 != vertexPoset[v]) continue; + if ((vertexPoset[u] + 1 != vertexPoset[v]) && (vertexBotPoset[u] != 1 + vertexBotPoset[v])) + { + if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " not viable poset. poste v: " << vertexBotPoset[v] << " poste u: " << vertexBotPoset[u] << "\n";} + continue; + } std::vector> new_subgraphs; // --- Check Constraints --- // Symmetry Threshold const bool merge_viable = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs); - const bool both_below_symmetry_threshold = (current_groups[u].size() < symmetry_threshold_) && (current_groups[v].size() < symmetry_threshold_); + const bool both_below_symmetry_threshold = (current_groups[u].size() < symmetry_threshold_) && (current_groups[v].size() < symmetry_threshold_);// && (not ((current_groups[u].size() == 1 && current_groups[v].size() > 1) || (current_groups[u].size() > 1 && current_groups[v].size() == 1))); + if (!merge_viable && !both_below_symmetry_threshold) { if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " not viable (symmetry threshold)\n"; } continue; @@ -178,12 +185,14 @@ class OrbitGraphProcessor { temp_contraction_map[v] = temp_contraction_map[u]; coarser_util::construct_coarse_dag(current_coarse_graph, temp_coarse_graph, temp_contraction_map); - if (!is_acyclic(temp_coarse_graph)) { + if (!is_acyclic(temp_coarse_graph)) { // not necessary if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " creates a cycle. Skipping.\n"; } continue; } - if (critical_path_weight(temp_coarse_graph) > critical_path_weight(current_coarse_graph)) { + const bool crtical_path_longer = critical_path_weight(temp_coarse_graph) > critical_path_weight(current_coarse_graph); + // Check if merging increases the critical path + if (crtical_path_longer) { if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " increases critical path. Skipping.\n"; } continue; } @@ -223,7 +232,7 @@ class OrbitGraphProcessor { break; // Restart scan on the new, smaller graph } } - + // --- Finalize --- final_coarse_graph_ = std::move(current_coarse_graph); final_contraction_map_ = std::move(current_contraction_map); diff --git a/include/osp/graph_algorithms/subgraph_algorithms.hpp b/include/osp/graph_algorithms/subgraph_algorithms.hpp index e7ea0562..190cc2f7 100644 --- a/include/osp/graph_algorithms/subgraph_algorithms.hpp +++ b/include/osp/graph_algorithms/subgraph_algorithms.hpp @@ -218,4 +218,58 @@ std::vector create_induced_subgraphs(const Graph_t_in &dag_in, return split_dags; } +template +std::unordered_map, vertex_idx_t> create_induced_subgraph_map(const Graph_t_in &dag, Graph_t_out &dag_out, + const std::vector> &selected_nodes) { + + static_assert(std::is_same_v, vertex_idx_t>, + "Graph_t_in and out must have the same vertex_idx types"); + + static_assert(is_constructable_cdag_vertex_v, + "Graph_t_out must satisfy the constructable_cdag_vertex concept"); + + static_assert(is_constructable_cdag_edge_v, + "Graph_t_out must satisfy the constructable_cdag_edge concept"); + + assert(dag_out.num_vertices() == 0); + + std::unordered_map, vertex_idx_t> local_idx; + local_idx.reserve(selected_nodes.size()); + + for (const auto &node : selected_nodes) { + local_idx[node] = dag_out.num_vertices(); + + if constexpr (is_constructable_cdag_typed_vertex_v and has_typed_vertices_v) { + // add vertex with type + dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), + dag.vertex_type(node)); + } else { + // add vertex without type + dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node)); + } + } + + if constexpr (has_edge_weights_v and has_edge_weights_v) { + + // add edges with edge comm weights + for (const auto &node : selected_nodes) + for (const auto &in_edge : in_edges(node, dag)) { + const auto &pred = source(in_edge, dag); + if (local_idx.count(pred)) + dag_out.add_edge(local_idx[pred], local_idx[node], dag.edge_comm_weight(in_edge)); + } + + } else { + + // add edges without edge comm weights + for (const auto &node : selected_nodes) + for (const auto &pred : dag.parents(node)) { + if (local_idx.count(pred)) + dag_out.add_edge(local_idx[pred], local_idx[node]); + } + } + + return local_idx; +} + }; // namespace osp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 17975dde..2ce7cc96 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -64,6 +64,8 @@ _add_test( kl_mem_constr ) _add_test( isomorphic_subgraph_scheduler ) +_add_test( isomorphism_mapper ) + _add_test( merkle_hash_computer ) #_add_test( merkle_hash_divider ) diff --git a/tests/isomorphism_mapper.cpp b/tests/isomorphism_mapper.cpp new file mode 100644 index 00000000..a64e7b99 --- /dev/null +++ b/tests/isomorphism_mapper.cpp @@ -0,0 +1,244 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#define BOOST_TEST_MODULE IsomorphismMapper +#include + +#include "osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "test_graphs.hpp" + +#include +#include +#include + +using namespace osp; + +BOOST_AUTO_TEST_SUITE(IsomorphismMapperTestSuite) + +using graph_t = computational_dag_vector_impl_def_t; +using constr_graph_t = computational_dag_vector_impl_def_t; + +BOOST_AUTO_TEST_CASE(Mapper_SimpleChain) { + // Rep: 0 -> 1 -> 2 + constr_graph_t rep_graph; + rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(30,1,1); + rep_graph.add_edge(0, 1); rep_graph.add_edge(1, 2); + std::vector> rep_map = {100, 101, 102}; + + // Current: 2 -> 0 -> 1 (isomorphic, but different local IDs) + constr_graph_t current_graph; + current_graph.add_vertex(20,1,1); // local 0 (work 20) + current_graph.add_vertex(30,1,1); // local 1 (work 30) + current_graph.add_vertex(10,1,1); // local 2 (work 10) + current_graph.add_edge(2, 0); current_graph.add_edge(0, 1); + std::vector> current_map = {201, 202, 200}; + + IsomorphismMapper mapper(rep_graph); + auto result_map_local = mapper.find_mapping(current_graph); + + // Translate local map to global map for the test + std::unordered_map, vertex_idx_t> result_map; + for(const auto& [curr_local, rep_local] : result_map_local) { + result_map[current_map[curr_local]] = rep_map[rep_local]; + } + + BOOST_REQUIRE_EQUAL(result_map.size(), 3); + // current global ID -> rep global ID + // 200 (work 10) -> 100 (work 10) + // 201 (work 20) -> 101 (work 20) + // 202 (work 30) -> 102 (work 30) + BOOST_CHECK_EQUAL(result_map.at(200), 100); + BOOST_CHECK_EQUAL(result_map.at(201), 101); + BOOST_CHECK_EQUAL(result_map.at(202), 102); +} + +BOOST_AUTO_TEST_CASE(Mapper_ForkJoin) { + // Rep: 0 -> {1,2} -> 3 + constr_graph_t rep_graph; + rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(30,1,1); + rep_graph.add_edge(0,1); rep_graph.add_edge(0,2); rep_graph.add_edge(1,3); rep_graph.add_edge(2,3); + std::vector> rep_map = {10, 11, 12, 13}; + + // Current: 3 -> {0,2} -> 1 + constr_graph_t current_graph; + current_graph.add_vertex(20,1,1); // local 0 + current_graph.add_vertex(30,1,1); // local 1 + current_graph.add_vertex(20,1,1); // local 2 + current_graph.add_vertex(10,1,1); // local 3 + current_graph.add_edge(3,0); current_graph.add_edge(3,2); current_graph.add_edge(0,1); current_graph.add_edge(2,1); + std::vector> current_map = {21, 23, 22, 20}; + + IsomorphismMapper mapper(rep_graph); + auto result_map_local = mapper.find_mapping(current_graph); + + std::unordered_map, vertex_idx_t> result_map; + for(const auto& [curr_local, rep_local] : result_map_local) { + result_map[current_map[curr_local]] = rep_map[rep_local]; + } + + BOOST_REQUIRE_EQUAL(result_map.size(), 4); + // current global ID -> rep global ID + // 20 (work 10) -> 10 (work 10) + // 23 (work 30) -> 13 (work 30) + BOOST_CHECK_EQUAL(result_map.at(20), 10); + BOOST_CHECK_EQUAL(result_map.at(23), 13); + + // The two middle nodes are symmetric. The mapping could be either way. + // current {21, 22} -> rep {11, 12} + bool mapping1 = (result_map.at(21) == 11 && result_map.at(22) == 12); + bool mapping2 = (result_map.at(21) == 12 && result_map.at(22) == 11); + BOOST_CHECK(mapping1 || mapping2); +} + +BOOST_AUTO_TEST_CASE(Mapper_DisconnectedComponents) { + // Rep: {0->1}, {2->3}. Two identical but disconnected components. + constr_graph_t rep_graph; + rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); // 0, 1 + rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); // 2, 3 + rep_graph.add_edge(0,1); rep_graph.add_edge(2,3); + std::vector> rep_map = {10, 11, 12, 13}; + + // Current: {2->3}, {0->1}. Same components, but different local IDs. + constr_graph_t current_graph; + current_graph.add_vertex(10,1,1); current_graph.add_vertex(20,1,1); // 0, 1 + current_graph.add_vertex(10,1,1); current_graph.add_vertex(20,1,1); // 2, 3 + current_graph.add_edge(2,3); current_graph.add_edge(0,1); + std::vector> current_map = {22, 23, 20, 21}; + + IsomorphismMapper mapper(rep_graph); + auto result_map_local = mapper.find_mapping(current_graph); + + std::unordered_map, vertex_idx_t> result_map; + for(const auto& [curr_local, rep_local] : result_map_local) { + result_map[current_map[curr_local]] = rep_map[rep_local]; + } + + BOOST_REQUIRE_EQUAL(result_map.size(), 4); + + // The two components are symmetric. The mapping could be component {0,1} -> {0,1} + // and {2,3} -> {2,3}, OR component {0,1} -> {2,3} and {2,3} -> {0,1}. + + // Mapping Option 1: + // rep {10,11} -> current {20,21} + // rep {12,13} -> current {22,23} + bool mapping1 = (result_map.at(20) == 12 && result_map.at(21) == 13 && + result_map.at(22) == 10 && result_map.at(23) == 11); + + // Mapping Option 2: + // rep {10,11} -> current {22,23} + // rep {12,13} -> current {20,21} + bool mapping2 = (result_map.at(22) == 12 && result_map.at(23) == 13 && + result_map.at(20) == 10 && result_map.at(21) == 11); + + BOOST_CHECK(mapping1 || mapping2); +} + +BOOST_AUTO_TEST_CASE(Mapper_MultiPipeline) { + // This test checks the mapping of a graph that is composed of multiple + // isomorphic disconnected components (two parallel pipelines). + + // Rep: Two pipelines {0->1->2} and {3->4->5} + // All nodes at the same stage have the same work weight. + constr_graph_t rep_graph = construct_multi_pipeline_dag(2, 3); + std::vector> rep_map = {10, 11, 12, 20, 21, 22}; + + // Current: Isomorphic to rep, but the pipelines are swapped and vertex IDs are shuffled. + // Pipeline 1 (local IDs 0,1,2) corresponds to rep pipeline 2 (global 20,21,22) + // Pipeline 2 (local IDs 3,4,5) corresponds to rep pipeline 1 (global 10,11,12) + constr_graph_t current_graph; + current_graph.add_vertex(10,1,1); // local 0, stage 0 + current_graph.add_vertex(20,1,1); // local 1, stage 1 + current_graph.add_vertex(30,1,1); // local 2, stage 2 + current_graph.add_vertex(10,1,1); // local 3, stage 0 + current_graph.add_vertex(20,1,1); // local 4, stage 1 + current_graph.add_vertex(30,1,1); // local 5, stage 2 + current_graph.add_edge(0, 1); current_graph.add_edge(1, 2); // First pipeline + current_graph.add_edge(3, 4); current_graph.add_edge(4, 5); // Second pipeline + std::vector> current_map = {120, 121, 122, 110, 111, 112}; + + IsomorphismMapper mapper(rep_graph); + auto result_map_local = mapper.find_mapping(current_graph); + + std::unordered_map, vertex_idx_t> result_map; + for(const auto& [curr_local, rep_local] : result_map_local) { + result_map[current_map[curr_local]] = rep_map[rep_local]; + } + + BOOST_REQUIRE_EQUAL(result_map.size(), 6); + + // The two pipelines are symmetric, so the mapping can go either way. + + // Mapping Option 1: current pipeline 1 -> rep pipeline 1, current pipeline 2 -> rep pipeline 2 + bool mapping1 = (result_map.at(110) == 10 && result_map.at(111) == 11 && result_map.at(112) == 12 && + result_map.at(120) == 20 && result_map.at(121) == 21 && result_map.at(122) == 22); + + // Mapping Option 2: current pipeline 1 -> rep pipeline 2, current pipeline 2 -> rep pipeline 1 + bool mapping2 = (result_map.at(110) == 20 && result_map.at(111) == 21 && result_map.at(112) == 22 && + result_map.at(120) == 10 && result_map.at(121) == 11 && result_map.at(122) == 12); + + BOOST_CHECK(mapping1 || mapping2); +} + +BOOST_AUTO_TEST_CASE(Mapper_ShuffledSymmetric) { + // This test uses a symmetric graph (a ladder) and shuffles the vertex IDs + // of the 'current' graph to ensure the mapper correctly finds the structural + // isomorphism, not just a naive index-based mapping. + + // Rep: A ladder graph with 2 rungs. + // Structure: {0,1} -> {2,3} -> {4,5} + // Nodes {0,2,4} have work 10 (left side). + // Nodes {1,3,5} have work 20 (right side). + constr_graph_t rep_graph = construct_ladder_dag(2); + std::vector> rep_map = {10, 11, 12, 13, 14, 15}; + + // Current: Isomorphic to rep, but with shuffled local IDs. + // A naive mapping of local IDs (0->0, 1->1, etc.) would be incorrect + // because the work weights would not match. + constr_graph_t current_graph; + current_graph.add_vertex(20,1,1); // local 0 (work 20, right) + current_graph.add_vertex(10,1,1); // local 1 (work 10, left) + current_graph.add_vertex(20,1,1); // local 2 (work 20, right) + current_graph.add_vertex(10,1,1); // local 3 (work 10, left) + current_graph.add_vertex(20,1,1); // local 4 (work 20, right) + current_graph.add_vertex(10,1,1); // local 5 (work 10, left) + // Edges for {5,0} -> {3,2} -> {1,4} + current_graph.add_edge(5, 3); current_graph.add_edge(5, 2); // Rung 1 + current_graph.add_edge(0, 3); current_graph.add_edge(0, 2); + + current_graph.add_edge(3, 1); current_graph.add_edge(3, 4); // Rung 2 + current_graph.add_edge(2, 1); current_graph.add_edge(2, 4); + + std::vector> current_map = {111, 114, 113, 112, 115, 110}; + + IsomorphismMapper mapper(rep_graph); + auto result_map_local = mapper.find_mapping(current_graph); + + std::unordered_map, vertex_idx_t> result_map; + for(const auto& [curr_local, rep_local] : result_map_local) { + result_map[current_map[curr_local]] = rep_map[rep_local]; + } + + BOOST_REQUIRE_EQUAL(result_map.size(), 6); + // Check that structurally identical nodes are mapped, regardless of their original IDs. + // E.g., current global 110 (from local 5, work 10) must map to a rep node with work 10. + BOOST_CHECK_EQUAL(result_map.at(110), 10); // current 5 (work 10) -> rep 0 (work 10) + BOOST_CHECK_EQUAL(result_map.at(111), 11); // current 0 (work 20) -> rep 1 (work 20) +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/merkle_hash_divider.cpp b/tests/merkle_hash_divider.cpp index 7c0a1571..c991aeea 100644 --- a/tests/merkle_hash_divider.cpp +++ b/tests/merkle_hash_divider.cpp @@ -28,6 +28,8 @@ limitations under the License. #include "osp/auxiliary/io/dot_graph_file_reader.hpp" #include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp" #include "osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" @@ -39,6 +41,25 @@ limitations under the License. using namespace osp; +template +void check_partition_type_homogeneity(const GraphT& dag, const std::vector>& partition) { + // Group partitions by their ID + std::map, std::vector>> partitions; + for (vertex_idx_t i = 0; i < dag.num_vertices(); ++i) { + partitions[partition[i]].push_back(i); + } + + // For each partition, check that all vertices have the same type + for (const auto& [part_id, vertices] : partitions) { + if (vertices.empty()) continue; + const auto first_node_type = dag.vertex_type(vertices[0]); + for (const auto& vertex : vertices) { + BOOST_CHECK_EQUAL(dag.vertex_type(vertex), first_node_type); + } + } +} + + BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) { using graph_t = computational_dag_vector_impl_def_t; @@ -50,17 +71,17 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) for (const auto& v : instance.vertices()) { - instance.getComputationalDag().set_vertex_comm_weight(v, instance.getComputationalDag().vertex_comm_weight(v) / 1064 + 1); - instance.getComputationalDag().set_vertex_work_weight(v, instance.getComputationalDag().vertex_work_weight(v) / 1000 + 1); + instance.getComputationalDag().set_vertex_comm_weight(v, instance.getComputationalDag().vertex_comm_weight(v)); + instance.getComputationalDag().set_vertex_work_weight(v, instance.getComputationalDag().vertex_work_weight(v)); } instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); instance.setDiagonalCompatibilityMatrix(2); - instance.setSynchronisationCosts(1000); - instance.setCommunicationCosts(1); + instance.setSynchronisationCosts(20000); + instance.setCommunicationCosts(10); - BspLocking greedy; - kl_total_comm_improver_mt kl; + GreedyChildren greedy; + kl_total_lambda_comm_improver_mt kl; ComboScheduler combo(greedy, kl); @@ -91,6 +112,8 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) auto partition = iso_scheduler.compute_partition(instance); + check_partition_type_homogeneity(instance.getComputationalDag(), partition); + graph_t corase_graph; coarser_util::construct_coarse_dag(instance.getComputationalDag(), corase_graph, partition);