Skip to content

Commit f27d31b

Browse files
committed
Even more nonnegative_int updating
1 parent 3728251 commit f27d31b

File tree

102 files changed

+981
-975
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+981
-975
lines changed

lib/compiler/src/compiler/allowed_machine_views.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "utils/containers/unordered_multiset_of.h"
1818
#include "utils/containers/unordered_set_of.h"
1919
#include "utils/containers/zip.h"
20+
#include "utils/nonnegative_int/ceildiv.h"
2021
#include "utils/nonnegative_int/nonnegative_range.h"
2122
#include "utils/nonnegative_int/num_elements.h"
2223
#include "utils/overload.h"
@@ -52,9 +53,8 @@ static std::unordered_set<MachineView>
5253
auto get_max_stride_upper_bound = [](std::vector<nonnegative_int> const &tensor_dims,
5354
nonnegative_int total_devices) -> nonnegative_int {
5455
nonnegative_int min_num_devices_with_full_stride_volume = product(transform(
55-
tensor_dims, [](nonnegative_int num_devices) { return nonnegative_int{num_devices.value() - 1}; }));
56-
return nonnegative_int{TODO colin
57-
static_cast<int>(std::ceil(static_cast<float>(total_devices.value()) / min_num_devices_with_full_stride_volume.value()))};
56+
tensor_dims, [](nonnegative_int num_devices) { return nonnegative_int{num_devices.unwrap_nonnegative() - 1}; }));
57+
return ceildiv(total_devices, min_num_devices_with_full_stride_volume);
5858
};
5959

6060
auto candidate_strides = [&](std::vector<nonnegative_int> const &tensor_dims,

lib/compiler/src/compiler/machine_mapping/get_machine_resource_splits.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@ std::unordered_set<std::pair<MachineSpecification, MachineSpecification>>
1111
for (int i = 1; i < resource.num_nodes; i *= 2) {
1212
MachineSpecification sub_resource1 = resource;
1313
MachineSpecification sub_resource2 = resource;
14-
sub_resource1.num_nodes = i;
15-
sub_resource2.num_nodes = resource.num_nodes - i;
14+
sub_resource1.num_nodes = nonnegative_int{i};
15+
sub_resource2.num_nodes = nonnegative_int{resource.num_nodes.unwrap_nonnegative() - i};
1616
result.insert(std::make_pair(sub_resource1, sub_resource2));
1717
result.insert(std::make_pair(sub_resource2, sub_resource1));
1818
}
1919

2020
for (int i = 1; i < resource.num_gpus_per_node; i *= 2) {
2121
MachineSpecification sub_resource1 = resource;
2222
MachineSpecification sub_resource2 = resource;
23-
sub_resource1.num_gpus_per_node = i;
24-
sub_resource2.num_gpus_per_node = resource.num_gpus_per_node - i;
23+
sub_resource1.num_gpus_per_node = nonnegative_int{i};
24+
sub_resource2.num_gpus_per_node = nonnegative_int{resource.num_gpus_per_node.unwrap_nonnegative() - i};
2525
result.insert(std::make_pair(sub_resource1, sub_resource2));
2626
result.insert(std::make_pair(sub_resource2, sub_resource1));
2727
}

lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ TEST_SUITE(FF_TEST_SUITE) {
2828
ParallelTensorShape input_shape = ParallelTensorShape{
2929
ParallelTensorDims{
3030
FFOrdered<ShardParallelDim>{
31-
ShardParallelDim{10, 2},
32-
ShardParallelDim{12, 1},
31+
ShardParallelDim{10_n, 2_n},
32+
ShardParallelDim{12_n, 1_n},
3333
},
3434
ReplicaParallelDimSet{
35-
SumDegree{1},
36-
DiscardCopyDegree{1},
35+
SumDegree{1_n},
36+
DiscardCopyDegree{1_n},
3737
},
3838
},
3939
DataType::FLOAT,

lib/compiler/test/src/compiler/machine_mapping/get_machine_resource_splits.cc

Lines changed: 94 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@ using namespace ::FlexFlow;
88

99
TEST_SUITE(FF_TEST_SUITE) {
1010
TEST_CASE("get_machine_resource_splits") {
11-
auto make_machine_spec = [](int num_nodes, int num_gpus_per_node) {
11+
auto make_machine_spec = [](nonnegative_int num_nodes, nonnegative_int num_gpus_per_node) {
1212
return MachineSpecification{
1313
/*num_nodes=*/num_nodes,
14-
/*num_cpus_per_node=*/1,
14+
/*num_cpus_per_node=*/1_n,
1515
/*num_gpus_per_node=*/num_gpus_per_node,
1616
/*inter_node_bandwidth=*/1.0,
1717
/*intra_node_bandwidth=*/1.0,
1818
};
1919
};
2020

2121
SUBCASE("returns no splits if no splits are possible") {
22-
MachineSpecification input = make_machine_spec(/*num_nodes=*/1,
23-
/*num_gpus_per_node=*/1);
22+
MachineSpecification input = make_machine_spec(/*num_nodes=*/1_n,
23+
/*num_gpus_per_node=*/1_n);
2424

2525
std::unordered_set<std::pair<MachineSpecification, MachineSpecification>>
2626
result = get_machine_resource_splits(input);
@@ -32,25 +32,25 @@ TEST_SUITE(FF_TEST_SUITE) {
3232

3333
SUBCASE(
3434
"returns splits in gpu and node dimensions, but not at the same time") {
35-
MachineSpecification input = make_machine_spec(/*num_nodes=*/2,
36-
/*num_gpus_per_node=*/2);
35+
MachineSpecification input = make_machine_spec(/*num_nodes=*/2_n,
36+
/*num_gpus_per_node=*/2_n);
3737

3838
std::unordered_set<std::pair<MachineSpecification, MachineSpecification>>
3939
result = get_machine_resource_splits(input);
4040

4141
std::unordered_set<std::pair<MachineSpecification, MachineSpecification>>
4242
correct = {
4343
{
44-
make_machine_spec(/*num_nodes=*/2,
45-
/*num_gpus_per_node=*/1),
46-
make_machine_spec(/*num_nodes=*/2,
47-
/*num_gpus_per_node=*/1),
44+
make_machine_spec(/*num_nodes=*/2_n,
45+
/*num_gpus_per_node=*/1_n),
46+
make_machine_spec(/*num_nodes=*/2_n,
47+
/*num_gpus_per_node=*/1_n),
4848
},
4949
{
50-
make_machine_spec(/*num_nodes=*/1,
51-
/*num_gpus_per_node=*/2),
52-
make_machine_spec(/*num_nodes=*/1,
53-
/*num_gpus_per_node=*/2),
50+
make_machine_spec(/*num_nodes=*/1_n,
51+
/*num_gpus_per_node=*/2_n),
52+
make_machine_spec(/*num_nodes=*/1_n,
53+
/*num_gpus_per_node=*/2_n),
5454
},
5555

5656
};
@@ -60,8 +60,8 @@ TEST_SUITE(FF_TEST_SUITE) {
6060

6161
SUBCASE("returns splits in node dimension in powers of two") {
6262
SUBCASE("num_nodes is a power of 2") {
63-
MachineSpecification input = make_machine_spec(/*num_nodes=*/8,
64-
/*num_gpus_per_node=*/1);
63+
MachineSpecification input = make_machine_spec(/*num_nodes=*/8_n,
64+
/*num_gpus_per_node=*/1_n);
6565

6666
std::unordered_set<
6767
std::pair<MachineSpecification, MachineSpecification>>
@@ -71,43 +71,43 @@ TEST_SUITE(FF_TEST_SUITE) {
7171
std::pair<MachineSpecification, MachineSpecification>>
7272
correct = {
7373
{
74-
make_machine_spec(/*num_nodes=*/1,
75-
/*num_gpus_per_node=*/1),
76-
make_machine_spec(/*num_nodes=*/7,
77-
/*num_gpus_per_node=*/1),
74+
make_machine_spec(/*num_nodes=*/1_n,
75+
/*num_gpus_per_node=*/1_n),
76+
make_machine_spec(/*num_nodes=*/7_n,
77+
/*num_gpus_per_node=*/1_n),
7878
},
7979
{
80-
make_machine_spec(/*num_nodes=*/2,
81-
/*num_gpus_per_node=*/1),
82-
make_machine_spec(/*num_nodes=*/6,
83-
/*num_gpus_per_node=*/1),
80+
make_machine_spec(/*num_nodes=*/2_n,
81+
/*num_gpus_per_node=*/1_n),
82+
make_machine_spec(/*num_nodes=*/6_n,
83+
/*num_gpus_per_node=*/1_n),
8484
},
8585
{
86-
make_machine_spec(/*num_nodes=*/4,
87-
/*num_gpus_per_node=*/1),
88-
make_machine_spec(/*num_nodes=*/4,
89-
/*num_gpus_per_node=*/1),
86+
make_machine_spec(/*num_nodes=*/4_n,
87+
/*num_gpus_per_node=*/1_n),
88+
make_machine_spec(/*num_nodes=*/4_n,
89+
/*num_gpus_per_node=*/1_n),
9090
},
9191
{
92-
make_machine_spec(/*num_nodes=*/6,
93-
/*num_gpus_per_node=*/1),
94-
make_machine_spec(/*num_nodes=*/2,
95-
/*num_gpus_per_node=*/1),
92+
make_machine_spec(/*num_nodes=*/6_n,
93+
/*num_gpus_per_node=*/1_n),
94+
make_machine_spec(/*num_nodes=*/2_n,
95+
/*num_gpus_per_node=*/1_n),
9696
},
9797
{
98-
make_machine_spec(/*num_nodes=*/7,
99-
/*num_gpus_per_node=*/1),
100-
make_machine_spec(/*num_nodes=*/1,
101-
/*num_gpus_per_node=*/1),
98+
make_machine_spec(/*num_nodes=*/7_n,
99+
/*num_gpus_per_node=*/1_n),
100+
make_machine_spec(/*num_nodes=*/1_n,
101+
/*num_gpus_per_node=*/1_n),
102102
},
103103
};
104104

105105
CHECK(result == correct);
106106
}
107107

108108
SUBCASE("num_nodes is not a power of 2") {
109-
MachineSpecification input = make_machine_spec(/*num_nodes=*/6,
110-
/*num_gpus_per_node=*/1);
109+
MachineSpecification input = make_machine_spec(/*num_nodes=*/6_n,
110+
/*num_gpus_per_node=*/1_n);
111111

112112
std::unordered_set<
113113
std::pair<MachineSpecification, MachineSpecification>>
@@ -117,28 +117,28 @@ TEST_SUITE(FF_TEST_SUITE) {
117117
std::pair<MachineSpecification, MachineSpecification>>
118118
correct = {
119119
{
120-
make_machine_spec(/*num_nodes=*/1,
121-
/*num_gpus_per_node=*/1),
122-
make_machine_spec(/*num_nodes=*/5,
123-
/*num_gpus_per_node=*/1),
120+
make_machine_spec(/*num_nodes=*/1_n,
121+
/*num_gpus_per_node=*/1_n),
122+
make_machine_spec(/*num_nodes=*/5_n,
123+
/*num_gpus_per_node=*/1_n),
124124
},
125125
{
126-
make_machine_spec(/*num_nodes=*/2,
127-
/*num_gpus_per_node=*/1),
128-
make_machine_spec(/*num_nodes=*/4,
129-
/*num_gpus_per_node=*/1),
126+
make_machine_spec(/*num_nodes=*/2_n,
127+
/*num_gpus_per_node=*/1_n),
128+
make_machine_spec(/*num_nodes=*/4_n,
129+
/*num_gpus_per_node=*/1_n),
130130
},
131131
{
132-
make_machine_spec(/*num_nodes=*/4,
133-
/*num_gpus_per_node=*/1),
134-
make_machine_spec(/*num_nodes=*/2,
135-
/*num_gpus_per_node=*/1),
132+
make_machine_spec(/*num_nodes=*/4_n,
133+
/*num_gpus_per_node=*/1_n),
134+
make_machine_spec(/*num_nodes=*/2_n,
135+
/*num_gpus_per_node=*/1_n),
136136
},
137137
{
138-
make_machine_spec(/*num_nodes=*/5,
139-
/*num_gpus_per_node=*/1),
140-
make_machine_spec(/*num_nodes=*/1,
141-
/*num_gpus_per_node=*/1),
138+
make_machine_spec(/*num_nodes=*/5_n,
139+
/*num_gpus_per_node=*/1_n),
140+
make_machine_spec(/*num_nodes=*/1_n,
141+
/*num_gpus_per_node=*/1_n),
142142
},
143143
};
144144

@@ -148,8 +148,8 @@ TEST_SUITE(FF_TEST_SUITE) {
148148

149149
SUBCASE("returns splits in gpu dimension in powers of two") {
150150
SUBCASE("num_gpus_per_node is a power of 2") {
151-
MachineSpecification input = make_machine_spec(/*num_nodes=*/1,
152-
/*num_gpus_per_node=*/8);
151+
MachineSpecification input = make_machine_spec(/*num_nodes=*/1_n,
152+
/*num_gpus_per_node=*/8_n);
153153

154154
std::unordered_set<
155155
std::pair<MachineSpecification, MachineSpecification>>
@@ -159,43 +159,43 @@ TEST_SUITE(FF_TEST_SUITE) {
159159
std::pair<MachineSpecification, MachineSpecification>>
160160
correct = {
161161
{
162-
make_machine_spec(/*num_nodes=*/1,
163-
/*num_gpus_per_node=*/1),
164-
make_machine_spec(/*num_nodes=*/1,
165-
/*num_gpus_per_node=*/7),
162+
make_machine_spec(/*num_nodes=*/1_n,
163+
/*num_gpus_per_node=*/1_n),
164+
make_machine_spec(/*num_nodes=*/1_n,
165+
/*num_gpus_per_node=*/7_n),
166166
},
167167
{
168-
make_machine_spec(/*num_nodes=*/1,
169-
/*num_gpus_per_node=*/2),
170-
make_machine_spec(/*num_nodes=*/1,
171-
/*num_gpus_per_node=*/6),
168+
make_machine_spec(/*num_nodes=*/1_n,
169+
/*num_gpus_per_node=*/2_n),
170+
make_machine_spec(/*num_nodes=*/1_n,
171+
/*num_gpus_per_node=*/6_n),
172172
},
173173
{
174-
make_machine_spec(/*num_nodes=*/1,
175-
/*num_gpus_per_node=*/4),
176-
make_machine_spec(/*num_nodes=*/1,
177-
/*num_gpus_per_node=*/4),
174+
make_machine_spec(/*num_nodes=*/1_n,
175+
/*num_gpus_per_node=*/4_n),
176+
make_machine_spec(/*num_nodes=*/1_n,
177+
/*num_gpus_per_node=*/4_n),
178178
},
179179
{
180-
make_machine_spec(/*num_nodes=*/1,
181-
/*num_gpus_per_node=*/6),
182-
make_machine_spec(/*num_nodes=*/1,
183-
/*num_gpus_per_node=*/2),
180+
make_machine_spec(/*num_nodes=*/1_n,
181+
/*num_gpus_per_node=*/6_n),
182+
make_machine_spec(/*num_nodes=*/1_n,
183+
/*num_gpus_per_node=*/2_n),
184184
},
185185
{
186-
make_machine_spec(/*num_nodes=*/1,
187-
/*num_gpus_per_node=*/7),
188-
make_machine_spec(/*num_nodes=*/1,
189-
/*num_gpus_per_node=*/1),
186+
make_machine_spec(/*num_nodes=*/1_n,
187+
/*num_gpus_per_node=*/7_n),
188+
make_machine_spec(/*num_nodes=*/1_n,
189+
/*num_gpus_per_node=*/1_n),
190190
},
191191
};
192192

193193
CHECK(result == correct);
194194
}
195195

196196
SUBCASE("num_gpus_per_node is not a power of 2") {
197-
MachineSpecification input = make_machine_spec(/*num_nodes=*/1,
198-
/*num_gpus_per_node=*/6);
197+
MachineSpecification input = make_machine_spec(/*num_nodes=*/1_n,
198+
/*num_gpus_per_node=*/6_n);
199199

200200
std::unordered_set<
201201
std::pair<MachineSpecification, MachineSpecification>>
@@ -205,28 +205,28 @@ TEST_SUITE(FF_TEST_SUITE) {
205205
std::pair<MachineSpecification, MachineSpecification>>
206206
correct = {
207207
{
208-
make_machine_spec(/*num_nodes=*/1,
209-
/*num_gpus_per_node=*/1),
210-
make_machine_spec(/*num_nodes=*/1,
211-
/*num_gpus_per_node=*/5),
208+
make_machine_spec(/*num_nodes=*/1_n,
209+
/*num_gpus_per_node=*/1_n),
210+
make_machine_spec(/*num_nodes=*/1_n,
211+
/*num_gpus_per_node=*/5_n),
212212
},
213213
{
214-
make_machine_spec(/*num_nodes=*/1,
215-
/*num_gpus_per_node=*/2),
216-
make_machine_spec(/*num_nodes=*/1,
217-
/*num_gpus_per_node=*/4),
214+
make_machine_spec(/*num_nodes=*/1_n,
215+
/*num_gpus_per_node=*/2_n),
216+
make_machine_spec(/*num_nodes=*/1_n,
217+
/*num_gpus_per_node=*/4_n),
218218
},
219219
{
220-
make_machine_spec(/*num_nodes=*/1,
221-
/*num_gpus_per_node=*/4),
222-
make_machine_spec(/*num_nodes=*/1,
223-
/*num_gpus_per_node=*/2),
220+
make_machine_spec(/*num_nodes=*/1_n,
221+
/*num_gpus_per_node=*/4_n),
222+
make_machine_spec(/*num_nodes=*/1_n,
223+
/*num_gpus_per_node=*/2_n),
224224
},
225225
{
226-
make_machine_spec(/*num_nodes=*/1,
227-
/*num_gpus_per_node=*/5),
228-
make_machine_spec(/*num_nodes=*/1,
229-
/*num_gpus_per_node=*/1),
226+
make_machine_spec(/*num_nodes=*/1_n,
227+
/*num_gpus_per_node=*/5_n),
228+
make_machine_spec(/*num_nodes=*/1_n,
229+
/*num_gpus_per_node=*/1_n),
230230
},
231231
};
232232
}

0 commit comments

Comments
 (0)