Skip to content

Commit 85eded6

Browse files
committed
Pass tests
1 parent c8803ea commit 85eded6

File tree

25 files changed

+592
-429
lines changed

25 files changed

+592
-429
lines changed

.proj.toml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -78,19 +78,19 @@ has-cpu-only-benchmarks = false
7878
has-cuda-tests = false
7979
has-cuda-benchmarks = false
8080

81-
[targets.local-execution]
82-
type = "lib"
83-
has-cpu-only-tests = true
84-
has-cpu-only-benchmarks = false
85-
has-cuda-tests = true
86-
has-cuda-benchmarks = false
87-
88-
[targets.local-pcg-execution]
89-
type = "lib"
90-
has-cpu-only-tests = true
91-
has-cpu-only-benchmarks = false
92-
has-cuda-tests = false
93-
has-cuda-benchmarks = false
81+
# [targets.local-execution]
82+
# type = "lib"
83+
# has-cpu-only-tests = true
84+
# has-cpu-only-benchmarks = false
85+
# has-cuda-tests = true
86+
# has-cuda-benchmarks = false
87+
88+
# [targets.local-pcg-execution]
89+
# type = "lib"
90+
# has-cpu-only-tests = true
91+
# has-cpu-only-benchmarks = false
92+
# has-cuda-tests = false
93+
# has-cuda-benchmarks = false
9494

9595
[targets.models]
9696
type = "lib"

lib/compiler/src/compiler/graph_optimize_state.cc

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -64,39 +64,6 @@ static
6464

6565
bool GraphOptimizeState::operator==(GraphOptimizeState const &other) const {
6666
return get_layer_signature_set(this->graph_optimize_result.mapped_pcg) == get_layer_signature_set(other.graph_optimize_result.mapped_pcg);
67-
// // Note(@wmdi): This is a hack to implement a partially correct homomorphism
68-
// // check. Switch to the homomorphism check used in substitutions right after
69-
// // https://github.com/flexflow/FlexFlow/pull/1471 is merged.
70-
// auto layers1 = topological_ordering(graph_optimize_result.mapped_pcg.pcg);
71-
// auto layers2 = topological_ordering(other.graph_optimize_result.mapped_pcg.pcg);
72-
// if (layers1.size() != layers2.size()) {
73-
// return false;
74-
// }
75-
// std::unordered_map<parallel_tensor_guid_t, parallel_tensor_guid_t> mapping;
76-
// for (size_t i = 0; i < layers1.size(); ++i) {
77-
// if (get_parallel_layer_attrs(graph_optimize_result.mapped_pcg.pcg, layers1[i]) !=
78-
// get_parallel_layer_attrs(other.graph_optimize_result.mapped_pcg.pcg, layers2[i])) {
79-
// return false;
80-
// }
81-
//
82-
// std::unordered_map<TensorSlotName, parallel_tensor_guid_t> inputs1 = get_incoming_tensors(graph_optimize_result.mapped_pcg.pcg, layers1[i]);
83-
// std::unordered_map<TensorSlotName, parallel_tensor_guid_t> inputs2 =
84-
// get_incoming_tensors(other.graph_optimize_result.mapped_pcg.pcg, layers2[i]);
85-
//
86-
// for (TensorSlotName slot_name : require_same(keys(inputs1), keys(inputs2))) {
87-
// if (inputs1.at(slot_name) != mapping.at(inputs2.at(slot_name))) {
88-
// return false;
89-
// }
90-
// }
91-
//
92-
// std::unordered_map<TensorSlotName, parallel_tensor_guid_t> outputs1 = get_layer_outputs(graph_optimize_result.mapped_pcg.pcg, layers1[i]);
93-
// std::unordered_map<TensorSlotName, parallel_tensor_guid_t> outputs2 =
94-
// get_layer_outputs(other.graph_optimize_result.mapped_pcg.pcg, layers2[i]);
95-
// for (TensorSlotName slot_name : require_same(keys(outputs1), keys(outputs2))) {
96-
// mapping.emplace(outputs2.at(slot_name), outputs1.at(slot_name));
97-
// }
98-
// }
99-
// return true;
10067
}
10168

10269
bool GraphOptimizeState::operator!=(GraphOptimizeState const &other) const {

lib/compiler/test/src/compiler/machine_mapping/machine_view.cc

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,13 @@ TEST_SUITE(FF_TEST_SUITE) {
5858

5959
TEST_CASE("get_machine_space_coordinate") {
6060
SUBCASE("1D case") {
61-
62-
// This operator has shape (3,), and thus 3 tasks.
63-
// The (only) dimension is projected on the INTER (device) dimension with
64-
// a stride of 2. The start of the projection defined by MachineView
65-
// starts at MachineSpaceCoordinate (0,1), and the machine space has 1
66-
// node and 6 devices per node.
67-
6861
/**
62+
* This operator has shape (3,), and thus 3 tasks.
63+
* The (only) dimension is projected on the INTER (device) dimension with
64+
* a stride of 2. The start of the projection defined by MachineView
65+
* starts at MachineSpaceCoordinate (0,1), and the machine space has 1
66+
* node and 6 devices per node.
67+
*
6968
* The tasks will thus be distributed like this:
7069
* +-------+-------+-------+-------+-------+-------+
7170
* | | (0,) | | (1,) | | (2,) |
@@ -147,14 +146,14 @@ TEST_SUITE(FF_TEST_SUITE) {
147146
}
148147

149148
SUBCASE("2D case - projection on different dimensions") {
150-
// This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
151-
// The first dimension is projected onto the INTER (node) dimension with
152-
// stride 1, while the second dimension is projected onto the INTRA
153-
// (device) dimension with stride 2. The start of the projection defined
154-
// by MachineView is at MachineSpaceCoordinates (1, 2), and the machine
155-
// space has 3 nodes and 5 devices per node.
156-
157149
/**
150+
* This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
151+
* The first dimension is projected onto the INTER (node) dimension with
152+
* stride 1, while the second dimension is projected onto the INTRA
153+
* (device) dimension with stride 2. The start of the projection defined
154+
* by MachineView is at MachineSpaceCoordinates (1, 2), and the machine
155+
* space has 3 nodes and 5 devices per node.
156+
*
158157
* The tasks will thus be distributed like this:
159158
* +-------+-------+-------+-------+-------+
160159
* | | | | | |
@@ -229,13 +228,13 @@ TEST_SUITE(FF_TEST_SUITE) {
229228
}
230229

231230
SUBCASE("2D case - projection on same dimension") {
232-
// This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
233-
// Both dimensions are projected on the INTRA (device) dimension, with
234-
// strides 1 and 2 respectively. The start of the projection defined by
235-
// MachineView is at MachineSpaceCoordinates (1, 0), and the machine
236-
// space has 2 nodes and 6 devices per node.
237-
238231
/**
232+
* This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
233+
* Both dimensions are projected on the INTRA (device) dimension, with
234+
* strides 1 and 2 respectively. The start of the projection defined by
235+
* MachineView is at MachineSpaceCoordinates (1, 0), and the machine
236+
* space has 2 nodes and 6 devices per node.
237+
*
239238
* +-------+-------+-------+-------+-------+-------+
240239
* | (0,0) | (1,0) | | | (0,1) | (1,1) |
241240
* +-------+-------+-------+-------+-------+-------+
@@ -305,17 +304,17 @@ TEST_SUITE(FF_TEST_SUITE) {
305304
}
306305

307306
SUBCASE("3D case") {
308-
// This operator has shape (2, 2, 2), and thus 2 * 2 * 2 = 8 tasks.
309-
// - The first dimension is projected onto the INTER (node) dimension
310-
// with stride 1,
311-
// - The second dimension is projected onto the INTRA (device) dimension
312-
// with stride 2,
313-
// - The third dimension is projected onto the INTRA (device) dimension
314-
// with stride 1. The start of the projection defined by MachineView is
315-
// at MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes
316-
// and 8 devices per node.
317-
318307
/**
308+
* This operator has shape (2, 2, 2), and thus 2 * 2 * 2 = 8 tasks.
309+
* - The first dimension is projected onto the INTER (node) dimension
310+
* with stride 1,
311+
* - The second dimension is projected onto the INTRA (device) dimension
312+
* with stride 2,
313+
* - The third dimension is projected onto the INTRA (device) dimension
314+
* with stride 1. The start of the projection defined by MachineView is
315+
* at MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes
316+
* and 8 devices per node.
317+
*
319318
* The tasks will thus be distributed like this:
320319
* +-------+-------+-------+-------+-------+-------+-------+-------+
321320
* | |(0,0,0)| |(0,0,1)| |(0,1,0)| |(0,1,1)|
@@ -377,14 +376,13 @@ TEST_SUITE(FF_TEST_SUITE) {
377376

378377
TEST_CASE("get_device_ids") {
379378
SUBCASE("1D machine view") {
380-
381-
// This operator has shape (3,), and thus 3 tasks.
382-
// The (only) dimension is projected onto the INTRA (device) dimension
383-
// with a stride of 2. The start of the projection defined by MachineView
384-
// is at MachineSpaceCoordinate (0, 1), and the machine space has 1 node
385-
// and 6 devices per node.
386-
387379
/**
380+
* This operator has shape (3,), and thus 3 tasks.
381+
* The (only) dimension is projected onto the INTRA (device) dimension
382+
* with a stride of 2. The start of the projection defined by MachineView
383+
* is at MachineSpaceCoordinate (0, 1), and the machine space has 1 node
384+
* and 6 devices per node.
385+
*
388386
* The tasks will thus be distributed like this:
389387
* +-------+-------+-------+-------+-------+-------+
390388
* | 0 | ((1)) | 2 | ((3)) | 4 | ((5)) |
@@ -420,15 +418,15 @@ TEST_SUITE(FF_TEST_SUITE) {
420418
}
421419

422420
SUBCASE("2D machine view") {
423-
// This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
424-
// - The first dimension is projected onto the INTER (node) dimension with
425-
// stride 1,
426-
// - The second dimension is projected onto the INTRA (device) dimension
427-
// with stride 2. The start of the projection defined by MachineView is at
428-
// MachineSpaceCoordinate (1, 2), and the machine space has 3 nodes and 5
429-
// devices per node.
430-
431421
/**
422+
* This operator has shape (2, 2), and thus 2 * 2 = 4 tasks.
423+
* - The first dimension is projected onto the INTER (node) dimension with
424+
* stride 1,
425+
* - The second dimension is projected onto the INTRA (device) dimension
426+
* with stride 2. The start of the projection defined by MachineView is at
427+
* MachineSpaceCoordinate (1, 2), and the machine space has 3 nodes and 5
428+
* devices per node.
429+
*
432430
* The tasks will thus be distributed like this:
433431
* +-------+-------+-------+-------+-------+
434432
* | 0 | 1 | 2 | 3 | 4 |

lib/compiler/test/src/compiler/series_parallel/pcg/get_pcg_series_parallel_decomposition.cc

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,16 @@ TEST_SUITE(FF_TEST_SUITE) {
132132

133133
SUBCASE("SP without weight nodes but non-SP with weight nodes (parallel op "
134134
"chain following is not necessary)") {
135-
// A minimal computation graph where without weights (w1 and w2) the
136-
// computation graph is series-parallel, but with weight nodes it is not,
137-
// but parallel op chain following is not necessary
138-
// (in this case because there are no parallel ops involved)
139-
//
140-
// w1 input w2
141-
// \ / \ /
142-
// op1 op2
135+
/**
136+
* A minimal computation graph where without weights (w1 and w2) the
137+
* computation graph is series-parallel, but with weight nodes it is not,
138+
* but parallel op chain following is not necessary
139+
* (in this case because there are no parallel ops involved)
140+
*
141+
* w1 input w2
142+
* \ / \ /
143+
* op1 op2
144+
*/
143145

144146
ParallelComputationGraph pcg = empty_parallel_computation_graph();
145147

@@ -227,20 +229,23 @@ TEST_SUITE(FF_TEST_SUITE) {
227229

228230
SUBCASE("SP without weight nodes but non-SP with weight node (parallel op "
229231
"chain following necessary)") {
230-
// A minimal computation graph where without weights (w1 and w2) the
231-
// computation graph is series-parallel, but with weight nodes it is not
232-
// and parallel op chain following is necessary
233-
//
234-
// w1 input w2
235-
// | | |
236-
// | p2 p4
237-
// | | |
238-
// p1 p3 p5
239-
// | | |
240-
// | |\ /
241-
// | op0 \ |
242-
// \ / | /
243-
// op1 op2
232+
233+
/**
234+
* A minimal computation graph where without weights (w1 and w2) the
235+
* computation graph is series-parallel, but with weight nodes it is not
236+
* and parallel op chain following is necessary
237+
*
238+
* w1 input w2
239+
* | | |
240+
* | p2 p4
241+
* | | |
242+
* p1 p3 p5
243+
* | | |
244+
* | |\ /
245+
* | op0 \ |
246+
* \ / | /
247+
* op1 op2
248+
*/
244249

245250
ParallelComputationGraph pcg = empty_parallel_computation_graph();
246251

@@ -419,11 +424,14 @@ TEST_SUITE(FF_TEST_SUITE) {
419424
SUBCASE("SP with or without preprocessing, but preprocessing would change "
420425
"resulting SP "
421426
"decomposition") {
422-
// parallel computation graph:
423-
//
424-
// input1 input2
425-
// | |
426-
// op1 op2
427+
428+
/**
429+
* parallel computation graph:
430+
*
431+
* input1 input2
432+
* | |
433+
* op1 op2
434+
*/
427435

428436
ParallelLayerAddedResult input1_added =
429437
add_parallel_layer(pcg, make_layer_attrs(input_attrs), {}, {});
@@ -457,14 +465,17 @@ TEST_SUITE(FF_TEST_SUITE) {
457465
}
458466

459467
SUBCASE("not SP with or without weight nodes") {
460-
// parallel computation graph:
461-
//
462-
// input1
463-
// / \
464-
// op1 op2
465-
// | \ |
466-
// | \ |
467-
// op3 op4
468+
469+
/**
470+
* parallel computation graph:
471+
*
472+
* input1
473+
* / \
474+
* op1 op2
475+
* | \ |
476+
* | \ |
477+
* op3 op4
478+
*/
468479

469480
ParallelLayerAddedResult input1_added =
470481
add_parallel_layer(pcg, make_layer_attrs(input_attrs), {}, {});

lib/compiler/test/src/compiler/unity_algorithm.cc

Lines changed: 0 additions & 26 deletions
This file was deleted.

lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -16,46 +16,4 @@ std::unordered_map<symbolic_layer_guid_t, std::optional<DeviceSpecificPerDeviceO
1616
});
1717
}
1818

19-
20-
//
21-
//
22-
// TaskArgumentAccessor
23-
// get_task_arg_accessor(LocalParallelTensorBacking const &local_parallel_tensor_backing,
24-
// RuntimeArgConfig const &runtime_arg_config,
25-
// TaskInvocation const &invocation,
26-
// Allocator &allocator) {
27-
// std::unordered_map<training_tensor_slot_id_t, TensorSlotBacking>
28-
// tensor_slots_backing = construct_tensor_slots_backing_for_binding(
29-
// local_tensor_backing, invocation.binding);
30-
//
31-
// std::unordered_map<slot_id_t, ConcreteArgSpec> arg_slots_backing =
32-
// construct_arg_slots_backing(invocation.binding, runtime_arg_config);
33-
//
34-
// return TaskArgumentAccessor::create<LocalTaskArgumentAccessor>(
35-
// allocator, tensor_slots_backing, arg_slots_backing, );
36-
// }
37-
//
38-
// LocalPcgArgsBacking make_local_pcg_args_backing_for_parallel_computation_graph(
39-
// LocalTaskRegistry const &task_registry,
40-
// TrainingParallelComputationGraph const &training_pcg,
41-
// RuntimeArgConfig const &runtime_arg_config,
42-
// LocalParallelTensorBacking const &local_parallel_tensor_backing,
43-
// Allocator &allocator) {
44-
//
45-
// std::unordered_map<parallel_layer_instance_id, std::optional<DeviceSpecificPerDeviceOpState>>
46-
// per_device_op_states = generate_map(
47-
// get_parallel_layers(training_pcg.pcg),
48-
// [&](parallel_layer_instance_id const &parallel_layer_guid) {
49-
// return create_per_device_op_state(
50-
// task_registry,
51-
// local_tensor_backing,
52-
// runtime_arg_config,
53-
// allocator,
54-
// get_training_layer_plus_context(training_computation_graph,
55-
// layer_guid));
56-
// });
57-
//
58-
// }
59-
60-
6119
} // namespace FlexFlow

0 commit comments

Comments
 (0)