flexflow
diff --git a/‎.proj.toml‎
Lines changed: 13 additions & 13 deletions b/‎.proj.toml‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎lib/compiler/src/compiler/graph_optimize_state.cc‎
Lines changed: 0 additions & 33 deletions b/‎lib/compiler/src/compiler/graph_optimize_state.cc‎
Lines changed: 0 additions & 33 deletions
diff --git a/‎lib/compiler/test/src/compiler/machine_mapping/machine_view.cc‎
Lines changed: 43 additions & 45 deletions b/‎lib/compiler/test/src/compiler/machine_mapping/machine_view.cc‎
Lines changed: 43 additions & 45 deletions
diff --git a/‎lib/compiler/test/src/compiler/series_parallel/pcg/get_pcg_series_parallel_decomposition.cc‎
Lines changed: 46 additions & 35 deletions b/‎lib/compiler/test/src/compiler/series_parallel/pcg/get_pcg_series_parallel_decomposition.cc‎
Lines changed: 46 additions & 35 deletions
diff --git a/‎lib/compiler/test/src/compiler/unity_algorithm.cc‎
Lines changed: 0 additions & 26 deletions b/‎lib/compiler/test/src/compiler/unity_algorithm.cc‎
Lines changed: 0 additions & 26 deletions
diff --git a/‎lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc‎
Lines changed: 0 additions & 42 deletions b/‎lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc‎
Lines changed: 0 additions & 42 deletions
@@ -78,19 +78,19 @@ has-cpu-only-benchmarks = false
 has-cuda-tests = false
 has-cuda-benchmarks = false
 
-[targets.local-execution]
-type = "lib"
-has-cpu-only-tests = true
-has-cpu-only-benchmarks = false
-has-cuda-tests = true
-has-cuda-benchmarks = false
-
-[targets.local-pcg-execution]
-type = "lib"
-has-cpu-only-tests = true
-has-cpu-only-benchmarks = false
-has-cuda-tests = false
-has-cuda-benchmarks = false
+# [targets.local-execution]
+# type = "lib"
+# has-cpu-only-tests = true
+# has-cpu-only-benchmarks = false
+# has-cuda-tests = true
+# has-cuda-benchmarks = false
+
+# [targets.local-pcg-execution]
+# type = "lib"
+# has-cpu-only-tests = true
+# has-cpu-only-benchmarks = false
+# has-cuda-tests = false
+# has-cuda-benchmarks = false
 
 [targets.models]
 type = "lib"
 
@@ -64,39 +64,6 @@ static
 
 bool GraphOptimizeState::operator==(GraphOptimizeState const &other) const {
   return get_layer_signature_set(this->graph_optimize_result.mapped_pcg) == get_layer_signature_set(other.graph_optimize_result.mapped_pcg);
-  // // Note(@wmdi): This is a hack to implement a partially correct homomorphism
-  // // check. Switch to the homomorphism check used in substitutions right after
-  // // https://github.com/flexflow/FlexFlow/pull/1471 is merged.
-  // auto layers1 = topological_ordering(graph_optimize_result.mapped_pcg.pcg);
-  // auto layers2 = topological_ordering(other.graph_optimize_result.mapped_pcg.pcg);
-  // if (layers1.size() != layers2.size()) {
-  //   return false;
-  // }
-  // std::unordered_map<parallel_tensor_guid_t, parallel_tensor_guid_t> mapping;
-  // for (size_t i = 0; i < layers1.size(); ++i) {
-  //   if (get_parallel_layer_attrs(graph_optimize_result.mapped_pcg.pcg, layers1[i]) !=
-  //       get_parallel_layer_attrs(other.graph_optimize_result.mapped_pcg.pcg, layers2[i])) {
-  //     return false;
-  //   }
-  //
-  //   std::unordered_map<TensorSlotName, parallel_tensor_guid_t> inputs1 = get_incoming_tensors(graph_optimize_result.mapped_pcg.pcg, layers1[i]);
-  //   std::unordered_map<TensorSlotName, parallel_tensor_guid_t> inputs2 =
-  //       get_incoming_tensors(other.graph_optimize_result.mapped_pcg.pcg, layers2[i]);
-  //
-  //   for (TensorSlotName slot_name : require_same(keys(inputs1), keys(inputs2))) {
-  //     if (inputs1.at(slot_name) != mapping.at(inputs2.at(slot_name))) {
-  //       return false;
-  //     }
-  //   }
-  //
-  //   std::unordered_map<TensorSlotName, parallel_tensor_guid_t> outputs1 = get_layer_outputs(graph_optimize_result.mapped_pcg.pcg, layers1[i]);
-  //   std::unordered_map<TensorSlotName, parallel_tensor_guid_t> outputs2 =
-  //       get_layer_outputs(other.graph_optimize_result.mapped_pcg.pcg, layers2[i]);
-  //   for (TensorSlotName slot_name : require_same(keys(outputs1), keys(outputs2))) {
-  //     mapping.emplace(outputs2.at(slot_name), outputs1.at(slot_name));
-  //   }
-  // }
-  // return true;
 }
 
 bool GraphOptimizeState::operator!=(GraphOptimizeState const &other) const {
 
@@ -58,14 +58,13 @@ TEST_SUITE(FF_TEST_SUITE) {
 
   TEST_CASE("get_machine_space_coordinate") {
     SUBCASE("1D case") {
-
-      // This operator has shape (3,), and thus 3 tasks.
-      // The (only) dimension is projected on the INTER (device) dimension with
-      // a stride of 2. The start of the projection defined by MachineView
-      // starts at MachineSpaceCoordinate (0,1), and the machine space has 1
-      // node and 6 devices per node.
-
       /**
+       * This operator has shape (3,), and thus 3 tasks.
+       * The (only) dimension is projected on the INTER (device) dimension with
+       * a stride of 2. The start of the projection defined by MachineView
+       * starts at MachineSpaceCoordinate (0,1), and the machine space has 1
+       * node and 6 devices per node.
+       * 
        * The tasks will thus be distributed like this:
        *  +-------+-------+-------+-------+-------+-------+
        *  |       | (0,)  |       | (1,)  |       | (2,)  |
@@ -147,14 +146,14 @@ TEST_SUITE(FF_TEST_SUITE) {
     }
 
     SUBCASE("2D case - projection on different dimensions") {
-      // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
-      // The first dimension is projected onto the INTER (node) dimension with
-      // stride 1, while the second dimension is projected onto the INTRA
-      // (device) dimension with stride 2. The start of the projection defined
-      // by MachineView is at MachineSpaceCoordinates (1, 2), and the machine
-      // space has 3 nodes and 5 devices per node.
-
       /**
+       * This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
+       * The first dimension is projected onto the INTER (node) dimension with
+       * stride 1, while the second dimension is projected onto the INTRA
+       * (device) dimension with stride 2. The start of the projection defined
+       * by MachineView is at MachineSpaceCoordinates (1, 2), and the machine
+       * space has 3 nodes and 5 devices per node.
+       * 
        * The tasks will thus be distributed like this:
        *  +-------+-------+-------+-------+-------+
        *  |       |       |       |       |       |
@@ -229,13 +228,13 @@ TEST_SUITE(FF_TEST_SUITE) {
     }
 
     SUBCASE("2D case - projection on same dimension") {
-      // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
-      // Both dimensions are projected on the INTRA (device) dimension, with
-      // strides 1 and 2 respectively. The start of the projection defined by
-      // MachineView is at MachineSpaceCoordinates (1, 0), and the machine
-      // space has 2 nodes and 6 devices per node.
-
       /**
+       * This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
+       * Both dimensions are projected on the INTRA (device) dimension, with
+       * strides 1 and 2 respectively. The start of the projection defined by
+       * MachineView is at MachineSpaceCoordinates (1, 0), and the machine
+       * space has 2 nodes and 6 devices per node.
+       * 
        *  +-------+-------+-------+-------+-------+-------+
        *  | (0,0) | (1,0) |       |       | (0,1) | (1,1) |
        *  +-------+-------+-------+-------+-------+-------+
@@ -305,17 +304,17 @@ TEST_SUITE(FF_TEST_SUITE) {
     }
 
     SUBCASE("3D case") {
-      // This operator has shape (2, 2, 2), and thus 2 * 2 * 2 = 8 tasks.
-      // - The first dimension is projected onto the INTER (node) dimension
-      // with stride 1,
-      // - The second dimension is projected onto the INTRA (device) dimension
-      // with stride 2,
-      // - The third dimension is projected onto the INTRA (device) dimension
-      // with stride 1. The start of the projection defined by MachineView is
-      // at MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes
-      // and 8 devices per node.
-
       /**
+       * This operator has shape (2, 2, 2), and thus 2 * 2 * 2 = 8 tasks.
+       * - The first dimension is projected onto the INTER (node) dimension
+       * with stride 1,
+       * - The second dimension is projected onto the INTRA (device) dimension
+       * with stride 2,
+       * - The third dimension is projected onto the INTRA (device) dimension
+       * with stride 1. The start of the projection defined by MachineView is
+       * at MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes
+       * and 8 devices per node.
+       *
        * The tasks will thus be distributed like this:
        *  +-------+-------+-------+-------+-------+-------+-------+-------+
        *  |       |(0,0,0)|       |(0,0,1)|       |(0,1,0)|       |(0,1,1)|
@@ -377,14 +376,13 @@ TEST_SUITE(FF_TEST_SUITE) {
 
   TEST_CASE("get_device_ids") {
     SUBCASE("1D machine view") {
-
-      // This operator has shape (3,), and thus 3 tasks.
-      // The (only) dimension is projected onto the INTRA (device) dimension
-      // with a stride of 2. The start of the projection defined by MachineView
-      // is at MachineSpaceCoordinate (0, 1), and the machine space has 1 node
-      // and 6 devices per node.
-
       /**
+       * This operator has shape (3,), and thus 3 tasks.
+       * The (only) dimension is projected onto the INTRA (device) dimension
+       * with a stride of 2. The start of the projection defined by MachineView
+       * is at MachineSpaceCoordinate (0, 1), and the machine space has 1 node
+       * and 6 devices per node.
+       *
        * The tasks will thus be distributed like this:
        *  +-------+-------+-------+-------+-------+-------+
        *  |   0   | ((1)) |   2   | ((3)) |   4   | ((5)) |
@@ -420,15 +418,15 @@ TEST_SUITE(FF_TEST_SUITE) {
     }
 
     SUBCASE("2D machine view") {
-      // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
-      // - The first dimension is projected onto the INTER (node) dimension with
-      // stride 1,
-      // - The second dimension is projected onto the INTRA (device) dimension
-      // with stride 2. The start of the projection defined by MachineView is at
-      // MachineSpaceCoordinate (1, 2), and the machine space has 3 nodes and 5
-      // devices per node.
-
       /**
+       * This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
+       * - The first dimension is projected onto the INTER (node) dimension with
+       * stride 1,
+       * - The second dimension is projected onto the INTRA (device) dimension
+       * with stride 2. The start of the projection defined by MachineView is at
+       * MachineSpaceCoordinate (1, 2), and the machine space has 3 nodes and 5
+       * devices per node.
+       *
        * The tasks will thus be distributed like this:
        *  +-------+-------+-------+-------+-------+
        *  |   0   |   1   |   2   |   3   |   4   |
 
@@ -132,14 +132,16 @@ TEST_SUITE(FF_TEST_SUITE) {
 
     SUBCASE("SP without weight nodes but non-SP with weight nodes (parallel op "
             "chain following is not necessary)") {
-      // A minimal computation graph where without weights (w1 and w2) the
-      // computation graph is series-parallel, but with weight nodes it is not,
-      // but parallel op chain following is not necessary
-      // (in this case because there are no parallel ops involved)
-      //
-      // w1   input   w2
-      //  \   /   \   /
-      //   op1     op2
+      /**
+       * A minimal computation graph where without weights (w1 and w2) the
+       * computation graph is series-parallel, but with weight nodes it is not,
+       * but parallel op chain following is not necessary
+       * (in this case because there are no parallel ops involved)
+       *
+       * w1   input   w2
+       *  \   /   \   /
+       *   op1     op2
+       */
 
       ParallelComputationGraph pcg = empty_parallel_computation_graph();
 
@@ -227,20 +229,23 @@ TEST_SUITE(FF_TEST_SUITE) {
 
     SUBCASE("SP without weight nodes but non-SP with weight node (parallel op "
             "chain following necessary)") {
-      // A minimal computation graph where without weights (w1 and w2) the
-      // computation graph is series-parallel, but with weight nodes it is not
-      // and parallel op chain following is necessary
-      //
-      // w1   input   w2
-      //  |    |       |
-      //  |    p2     p4
-      //  |    |       |
-      // p1    p3     p5
-      //  |    |       |
-      //  |    |\     /
-      //  |  op0 \    |
-      //  \   /   |  /
-      //   op1    op2
+      
+      /**
+       * A minimal computation graph where without weights (w1 and w2) the
+       * computation graph is series-parallel, but with weight nodes it is not
+       * and parallel op chain following is necessary
+       *
+       * w1   input   w2
+       *  |    |       |
+       *  |    p2     p4
+       *  |    |       |
+       * p1    p3     p5
+       *  |    |       |
+       *  |    |\     /
+       *  |  op0 \    |
+       *  \   /   |  /
+       *   op1    op2
+       */
 
       ParallelComputationGraph pcg = empty_parallel_computation_graph();
 
@@ -419,11 +424,14 @@ TEST_SUITE(FF_TEST_SUITE) {
     SUBCASE("SP with or without preprocessing, but preprocessing would change "
             "resulting SP "
             "decomposition") {
-      // parallel computation graph:
-      //
-      //  input1   input2
-      //    |        |
-      //   op1      op2
+
+      /**
+       * parallel computation graph:
+       *
+       *  input1   input2
+       *    |        |
+       *   op1      op2
+       */
 
       ParallelLayerAddedResult input1_added =
           add_parallel_layer(pcg, make_layer_attrs(input_attrs), {}, {});
@@ -457,14 +465,17 @@ TEST_SUITE(FF_TEST_SUITE) {
     }
 
     SUBCASE("not SP with or without weight nodes") {
-      // parallel computation graph:
-      //
-      //    input1
-      //     /  \
-      //   op1  op2
-      //    | \  |
-      //    |  \ |
-      //   op3  op4
+
+      /**
+       * parallel computation graph:
+       *
+       *    input1
+       *     /  \
+       *   op1  op2
+       *    | \  |
+       *    |  \ |
+       *   op3  op4
+       */
 
       ParallelLayerAddedResult input1_added =
           add_parallel_layer(pcg, make_layer_attrs(input_attrs), {}, {});
 
@@ -16,46 +16,4 @@ std::unordered_map<symbolic_layer_guid_t, std::optional<DeviceSpecificPerDeviceO
     });
 }
 
-
-//
-//
-// TaskArgumentAccessor
-//     get_task_arg_accessor(LocalParallelTensorBacking const &local_parallel_tensor_backing,
-//                           RuntimeArgConfig const &runtime_arg_config,
-//                           TaskInvocation const &invocation,
-//                           Allocator &allocator) {
-//   std::unordered_map<training_tensor_slot_id_t, TensorSlotBacking>
-//       tensor_slots_backing = construct_tensor_slots_backing_for_binding(
-//           local_tensor_backing, invocation.binding);
-//
-//   std::unordered_map<slot_id_t, ConcreteArgSpec> arg_slots_backing = 
-//       construct_arg_slots_backing(invocation.binding, runtime_arg_config);
-//
-//   return TaskArgumentAccessor::create<LocalTaskArgumentAccessor>(
-//       allocator, tensor_slots_backing, arg_slots_backing, );
-// }
-//
-// LocalPcgArgsBacking make_local_pcg_args_backing_for_parallel_computation_graph(
-//     LocalTaskRegistry const &task_registry,
-//     TrainingParallelComputationGraph const &training_pcg,
-//     RuntimeArgConfig const &runtime_arg_config,
-//     LocalParallelTensorBacking const &local_parallel_tensor_backing,
-//     Allocator &allocator) {
-//
-//   std::unordered_map<parallel_layer_instance_id, std::optional<DeviceSpecificPerDeviceOpState>>
-//       per_device_op_states = generate_map(
-//           get_parallel_layers(training_pcg.pcg),
-//           [&](parallel_layer_instance_id const &parallel_layer_guid) {
-//             return create_per_device_op_state(
-//                 task_registry,
-//                 local_tensor_backing,
-//                 runtime_arg_config,
-//                 allocator,
-//                 get_training_layer_plus_context(training_computation_graph,
-//                                                 layer_guid));
-//           });
-//
-// }
-
-
 } // namespace FlexFlow