victorli2002
diff --git a/‎lib/compiler/include/compiler/machine_mapping/allowed_machine_views.h‎
Lines changed: 21 additions & 0 deletions b/‎lib/compiler/include/compiler/machine_mapping/allowed_machine_views.h‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎lib/compiler/include/compiler/machine_mapping/apply_substitution_and_update_machine_mapping.h‎
Lines changed: 32 additions & 0 deletions b/‎lib/compiler/include/compiler/machine_mapping/apply_substitution_and_update_machine_mapping.h‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎lib/compiler/include/compiler/machine_mapping/machine_mapping_mutation_set.h‎
Lines changed: 19 additions & 0 deletions b/‎lib/compiler/include/compiler/machine_mapping/machine_mapping_mutation_set.h‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎lib/compiler/include/compiler/mcmc/mcmc_over_mapped_pcg.h‎
Lines changed: 22 additions & 0 deletions b/‎lib/compiler/include/compiler/mcmc/mcmc_over_mapped_pcg.h‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎lib/compiler/include/compiler/mcmc/mcmc_over_mapped_pcg_config.struct.toml‎
Lines changed: 28 additions & 0 deletions b/‎lib/compiler/include/compiler/mcmc/mcmc_over_mapped_pcg_config.struct.toml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎lib/compiler/include/compiler/search_result.h‎
Lines changed: 13 additions & 0 deletions b/‎lib/compiler/include/compiler/search_result.h‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎lib/compiler/include/compiler/search_result.struct.toml‎
Lines changed: 17 additions & 0 deletions b/‎lib/compiler/include/compiler/search_result.struct.toml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎lib/compiler/src/compiler/allowed_machine_views.cc‎
Lines changed: 2 additions & 0 deletions b/‎lib/compiler/src/compiler/allowed_machine_views.cc‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎lib/compiler/src/compiler/machine_mapping/apply_substitution_and_update_machine_mapping.cc‎
Lines changed: 197 additions & 0 deletions b/‎lib/compiler/src/compiler/machine_mapping/apply_substitution_and_update_machine_mapping.cc‎
Lines changed: 197 additions & 0 deletions
@@ -0,0 +1,21 @@
+#ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H
+#define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H
+
+#include "pcg/machine_specification.dtg.h"
+#include "pcg/machine_view.dtg.h"
+#include "pcg/operator_task_space.dtg.h"
+
+namespace FlexFlow {
+
+bool is_valid_machine_view(MachineView const &mv,
+                           OperatorTaskSpace const &task,
+                           MachineSpecification const &ms);
+
+std::unordered_set<MachineView>
+    get_allowed_machine_views(MachineSpecification const &machine_spec,
+                              OperatorTaskSpace const &task,
+                              DeviceType device_type);
+
+} // namespace FlexFlow
+
+#endif
@@ -0,0 +1,32 @@
+#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_APPLY_SUBSTITUTION_AND_UPDATE_MACHINE_MAPPING_H
+#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_APPLY_SUBSTITUTION_AND_UPDATE_MACHINE_MAPPING_H
+
+#include "compiler/search_result.dtg.h"
+#include "substitutions/pcg_pattern_match.dtg.h"
+#include "substitutions/sub_parallel_computation_graph.dtg.h"
+#include "substitutions/substitution.dtg.h"
+
+namespace FlexFlow {
+/**
+ * @brief Applies \p substitution to \p mapped_pcg at the location specified by
+ * \p match, returning the resulting SearchResult (mapped pcg)
+ *
+ * @param mapped_pcg
+ * @param substitution
+ * @param match The location at which to apply substitution. This location in
+ * sub_pcg should match substitution's PCGPattern. Likely created by running
+ * FlexFlow::find_pattern_matches(PCGPattern const &,
+ * SubParallelComputationGraph const &).
+ * @return SearchResult A mapped pcg similar to mapped_pcg, but with
+ * the subgraph of the pcg specified by match replaced with the result of the
+ * output expression of substitution and the machine mapping updated to account
+ * for the new output
+ */
+SearchResult apply_substitution_and_update_machine_mapping(
+    SearchResult const &mapped_pcg,
+    Substitution const &sub,
+    PCGPatternMatch const &match);
+
+} // namespace FlexFlow
+
+#endif
@@ -0,0 +1,19 @@
+#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MCMC_MACHINE_MAPPING_MUTATION_SET_H
+#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MCMC_MACHINE_MAPPING_MUTATION_SET_H
+
+#include "compiler/machine_mapping/machine_mapping.h"
+#include "compiler/search_result.dtg.h"
+
+namespace FlexFlow {
+std::optional<MachineMapping>
+    get_naive_mapping(ParallelComputationGraph &pcg,
+                      MachineSpecification const &resources,
+                      DeviceType const &device_type);
+
+std::optional<MachineMapping>
+    get_random_mutation(SearchResult mapped_pcg,
+                        MachineSpecification const &resource,
+                        DeviceType const &device_type);
+} // namespace FlexFlow
+
+#endif
@@ -0,0 +1,22 @@
+#ifndef _FLEXFLOW_COMPILER_MCMC_OVER_MAPPED_PCG_H
+#define _FLEXFLOW_COMPILER_MCMC_OVER_MAPPED_PCG_H
+
+#include "compiler/cost_estimator/runtime_only_cost_estimator.h"
+#include "compiler/mcmc/mcmc_over_mapped_pcg_config.dtg.h"
+#include "compiler/search_result.dtg.h"
+#include "pcg/computation_graph.h"
+#include "pcg/machine_specification.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h"
+#include "substitutions/sub_parallel_computation_graph.h"
+#include "substitutions/substitution.h"
+
+namespace FlexFlow {
+
+SearchResult mcmc_graph_optimize(ParallelComputationGraph &pcg,
+                                 RuntimeOnlyCostEstimator const &cost_estimator,
+                                 MachineSpecification const &resources,
+                                 MCMCOverMappedPCGConfig const &search_config);
+
+} // namespace FlexFlow
+
+#endif
@@ -0,0 +1,28 @@
+namespace = "FlexFlow"
+name = "MCMCOverMappedPCGConfig"
+features = [
+  "eq",
+  "hash",
+  "fmt",
+]
+
+includes = [
+  "pcg/device_type.dtg.h",
+  "utils/nonnegative_int/nonnegative_int.h"
+]
+
+[[fields]]
+name = "temperature"
+type = "float"
+
+[[fields]]
+name = "num_iterations"
+type = "::FlexFlow::nonnegative_int"
+
+[[fields]]
+name = "substitution_interval"
+type = "::FlexFlow::nonnegative_int"
+
+[[fields]]
+name = "device_type"
+type = "::FlexFlow::DeviceType"
@@ -0,0 +1,13 @@
+#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_GRAPH_OPTIMIZE_RESULT_H
+#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_GRAPH_OPTIMIZE_RESULT_H
+
+#include "compiler/search_result.dtg.h"
+
+namespace FlexFlow {
+
+std::string format_as(SearchResult const &);
+std::ostream &operator<<(std::ostream &, SearchResult const &);
+
+} // namespace FlexFlow
+
+#endif
@@ -0,0 +1,17 @@
+namespace = "FlexFlow"
+name = "SearchResult"
+features = [
+]
+
+includes = [
+  "pcg/parallel_computation_graph/parallel_computation_graph.h",
+  "compiler/machine_mapping/machine_mapping.h",
+]
+
+[[fields]]
+name = "pcg"
+type = "::FlexFlow::ParallelComputationGraph"
+
+[[fields]]
+name = "machine_mapping"
+type = "::FlexFlow::MachineMapping"
@@ -57,6 +57,8 @@ static std::unordered_set<MachineView>
         product(transform(tensor_dims, [](positive_int num_devices) {
           return nonnegative_int{num_devices.int_from_positive_int() - 1};
         }));
+    min_num_devices_with_full_stride_volume =
+        std::max(min_num_devices_with_full_stride_volume, 1_n);
     return ceildiv(total_devices,
                    positive_int{min_num_devices_with_full_stride_volume});
   };
 
@@ -0,0 +1,197 @@
+#include "compiler/machine_mapping/apply_substitution_and_update_machine_mapping.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.h"
+#include "substitutions/apply_substitution/apply_substitution.h"
+#include "substitutions/apply_substitution/evaluate_substitution_output.h"
+#include "substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.h"
+#include "substitutions/open_parallel_tensor_guid_t.h"
+#include "substitutions/pcg_pattern_match.h"
+#include "substitutions/sub_parallel_computation_graph.h"
+#include "substitutions/sub_parallel_computation_graph_data.dtg.h"
+#include "substitutions/sub_parallel_computation_graph_edge.h"
+#include "utils/containers/is_subseteq_of.h"
+#include "utils/containers/keys.h"
+#include "utils/containers/merge_maps.h"
+#include "utils/containers/restrict_keys.h"
+#include "utils/containers/set_minus.h"
+#include "utils/containers/values.h"
+
+namespace FlexFlow {
+
+SearchResult apply_substitution_and_update_machine_mapping(
+    SearchResult const &mapped_pcg,
+    Substitution const &sub,
+    PCGPatternMatch const &match) {
+  SubParallelComputationGraph spcg = sub_pcg_from_full_pcg(mapped_pcg.pcg);
+
+  auto substitution_output_result =
+      evaluate_substitution_output(spcg, sub, match);
+  SubParallelComputationGraph substitution_output_graph =
+      substitution_output_result.first;
+  OutputExprToResultSubPCGMapping output_expr_to_result_sub_pcg_mapping =
+      substitution_output_result.second;
+
+  SubParallelComputationGraphData output_graph_data =
+      get_sub_pcg_data(substitution_output_graph);
+  SubParallelComputationGraphData pre_data = get_sub_pcg_data(spcg);
+
+  std::unordered_set<parallel_layer_guid_t> pre_nodes =
+      keys(pre_data.node_data);
+  std::unordered_set<parallel_layer_guid_t> matched_nodes =
+      unordered_set_of(values(match.node_assignment));
+  std::unordered_set<parallel_layer_guid_t> post_nodes_from_original_graph =
+      set_minus(pre_nodes, matched_nodes);
+
+  std::unordered_map<parallel_layer_guid_t, MachineView> machine_views =
+      mapped_pcg.machine_mapping.machine_views;
+
+  std::unordered_set<MachineView> substituted_machine_views =
+      transform(matched_nodes, [&](parallel_layer_guid_t const &node) {
+        return machine_views.at(node);
+      });
+  MachineView first_substituted_machine_view =
+      *substituted_machine_views.begin();
+
+  std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs> post_node_data =
+      [&] {
+        std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs>
+            post_node_data_from_orig = restrict_keys(
+                pre_data.node_data, post_nodes_from_original_graph);
+        std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs>
+            post_node_data_from_sub = output_graph_data.node_data;
+
+        for (auto [layer, attrs] : post_node_data_from_sub) {
+          machine_views.insert_or_assign(layer, first_substituted_machine_view);
+        }
+
+        return merge_disjoint_maps(post_node_data_from_orig,
+                                   post_node_data_from_sub);
+      }();
+
+  std::unordered_set<SubParallelComputationGraphEdge> post_edges = [&] {
+    std::unordered_set<SubParallelComputationGraphEdge> post_edges_from_orig =
+        filter(pre_data.edges, [&](SubParallelComputationGraphEdge const &e) {
+          if (e.raw_edge.has<DataflowInputEdge>()) {
+            return true;
+          } else {
+            DataflowEdge dfe = e.raw_edge.get<DataflowEdge>();
+            parallel_layer_guid_t src = parallel_layer_guid_t{dfe.src.node};
+            parallel_layer_guid_t dst = parallel_layer_guid_t{dfe.dst.node};
+            return !(contains(matched_nodes, src) ||
+                     contains(matched_nodes, dst));
+          }
+        });
+
+    std::unordered_set<SubParallelComputationGraphEdge> post_edges_from_sub =
+        filter(output_graph_data.edges,
+               [&](SubParallelComputationGraphEdge const &e) {
+                 return !e.raw_edge.has<DataflowInputEdge>();
+               });
+
+    bidict<PatternNodeOutput, parallel_tensor_guid_t>
+        output_orig_pattern_mapping = get_output_mapping_for_pcg_pattern_match(
+            match, sub.pcg_pattern, spcg);
+    bidict<parallel_tensor_guid_t, OutputGraphExprNodeOutput>
+        output_post_outexpr_mapping = get_output_graph_expr_output_mapping(
+            output_expr_to_result_sub_pcg_mapping,
+            sub.output_graph_expr,
+            substitution_output_graph);
+
+    std::unordered_set<SubParallelComputationGraphEdge> incoming_to_sub_edges;
+    for (auto const &[pattern_input, base_graph_tensor] :
+         match.input_assignment) {
+      OutputGraphExprInput output_expr_input =
+          sub.inputs_mapping.at_l(pattern_input);
+      input_parallel_tensor_guid_t output_graph_input =
+          output_expr_to_result_sub_pcg_mapping.input_mapping.at_r(
+              output_expr_input);
+      std::unordered_set<parallel_tensor_use_t> uses = get_parallel_tensor_uses(
+          substitution_output_graph,
+          open_parallel_tensor_guid_from_input(output_graph_input));
+      for (parallel_tensor_use_t const &use : uses) {
+        SubParallelComputationGraphEdge new_edge =
+            subpcg_edge_from_tensor_and_use(base_graph_tensor, use);
+        incoming_to_sub_edges.insert(new_edge);
+      }
+    }
+
+    std::unordered_set<SubParallelComputationGraphEdge> outgoing_from_sub_edges;
+    for (ParallelComputationGraphEdge const &outgoing_edge :
+         get_subgraph_outgoing_edges(spcg, matched_nodes)) {
+      parallel_tensor_guid_t original_tensor =
+          get_parallel_tensor(outgoing_edge);
+      PatternNodeOutput pattern_tensor =
+          output_orig_pattern_mapping.at_r(original_tensor);
+      OutputGraphExprNodeOutput output_graph_tensor =
+          sub.outputs_mapping.at_l(pattern_tensor);
+      parallel_tensor_guid_t new_tensor =
+          output_post_outexpr_mapping.at_r(output_graph_tensor);
+
+      SubParallelComputationGraphEdge new_edge =
+          subpcg_edge_from_tensor_and_dst(
+              new_tensor,
+              get_dst_layer(outgoing_edge),
+              get_dst_layer_input_idx(outgoing_edge));
+      outgoing_from_sub_edges.insert(new_edge);
+    }
+
+    return set_union(std::vector{
+        post_edges_from_orig,
+        post_edges_from_sub,
+        incoming_to_sub_edges,
+        outgoing_from_sub_edges,
+    });
+  }();
+
+  std::unordered_set<input_parallel_tensor_guid_t> post_inputs =
+      pre_data.inputs;
+
+  std::unordered_map<open_parallel_tensor_guid_t, ParallelTensorAttrs>
+      post_value_data = [&] {
+        std::unordered_map<open_parallel_tensor_guid_t, ParallelTensorAttrs>
+            post_value_data_from_orig = filter_keys(
+                pre_data.value_data, [&](open_parallel_tensor_guid_t const &t) {
+                  return visit_open_parallel_tensor_guid(
+                      t,
+                      overload{
+                          [&](parallel_tensor_guid_t const &t) {
+                            return contains(post_nodes_from_original_graph,
+                                            get_source_layer(t));
+                          },
+                          [](input_parallel_tensor_guid_t const &) {
+                            return true;
+                          },
+                      });
+                });
+
+        std::unordered_map<open_parallel_tensor_guid_t, ParallelTensorAttrs>
+            post_value_data_from_sub = output_graph_data.value_data;
+        return merge_disjoint_maps(post_value_data_from_orig,
+                                   post_value_data_from_sub);
+      }();
+
+  SubParallelComputationGraphData post_data = SubParallelComputationGraphData{
+      post_node_data,
+      post_edges,
+      post_inputs,
+      post_value_data,
+  };
+
+  assert(is_subseteq_of(keys(post_node_data), keys(machine_views)));
+
+  for (auto it = machine_views.begin(); it != machine_views.end();) {
+    if (post_node_data.find(it->first) == post_node_data.end()) {
+      it = machine_views.erase(it);
+    } else {
+      ++it;
+    }
+  }
+
+  assert(keys(post_node_data) == keys(machine_views));
+
+  return SearchResult{
+      pcg_from_sub_pcg_by_dropping_inputs(sub_pcg_from_graph_data(post_data)),
+      MachineMapping{machine_views}};
+}
+
+} // namespace FlexFlow