Skip to content

Commit c553ea7

Browse files
committed
Revert "tuner: transform coordinates to log-log scale"
This reverts commit 75b1002. Reason: I should first merge aws#1082 (tuner region test CI), and then resubmit this change. That way, any region tuner decision change can be caught easily. E.g. such diff is found by running the tuner region test CI manually on this commmit (to be reverted here): One diff that caught my eyes: in tests/integration/test-output/tuner_ranks8_nodes256.out This line is missing: 256 2048 34359738368 ReduceScatter Ring Simple 0.0 P6 Which means on P6, with 256 nodes, the it will not switch eventually to Ring Simple. This is something I need to investigate more.
1 parent 8975b88 commit c553ea7

File tree

3 files changed

+34
-127
lines changed

3 files changed

+34
-127
lines changed

include/tuner/nccl_ofi_tuner_region.h

Lines changed: 2 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
#ifndef NCCL_OFI_TUNER_REGION_H_
66
#define NCCL_OFI_TUNER_REGION_H_
77

8-
#include <cassert>
9-
#include <cmath>
108
#include <stddef.h>
119
#include "tuner/nccl_ofi_tuner_common.h"
1210

@@ -59,47 +57,6 @@ typedef struct nccl_ofi_tuner_point
5957
{
6058
double x;
6159
double y;
62-
enum COORD_SCALE {
63-
UNSPECIFIED,
64-
ORIGINAL,
65-
LOG2
66-
67-
} coord_scale = UNSPECIFIED;
68-
69-
inline void transform_log2() {
70-
if (coord_scale == LOG2) {
71-
assert(false && "Coordinate already in LOG2 scale");
72-
return;
73-
}
74-
75-
if (x >= 0 && y >= 0) {
76-
// for 0, set to a small positive number for log2().
77-
const double eps = 1e-6;
78-
if (x == 0) {
79-
x = eps;
80-
}
81-
if (y == 0) {
82-
y = eps;
83-
}
84-
85-
x = std::log2(x);
86-
y = std::log2(y);
87-
coord_scale = LOG2;
88-
} else {
89-
assert(false && "Invalid coordinates for LOG2 transformation");
90-
}
91-
}
92-
93-
inline void transform_pow2() {
94-
if (coord_scale != LOG2) {
95-
assert(false && "Coordinate not in LOG2 scale for POW2 transformation");
96-
return;
97-
}
98-
99-
x = std::pow(2.0, x);
100-
y = std::pow(2.0, y);
101-
coord_scale = ORIGINAL;
102-
}
10360
} nccl_ofi_tuner_point_t;
10461

10562
typedef struct nccl_ofi_tuner_region {
@@ -113,8 +70,7 @@ nccl_ofi_tuner_point_t extend_region(nccl_ofi_tuner_point_t a,
11370
nccl_ofi_tuner_point_t b,
11471
nccl_ofi_tuner_point_t z);
11572

116-
int is_inside_region(
117-
nccl_ofi_tuner_point_t point,
118-
const nccl_ofi_tuner_region_t *region);
73+
int is_inside_region(nccl_ofi_tuner_point_t point,
74+
nccl_ofi_tuner_region_t *region);
11975

12076
#endif /* NCCL_OFI_TUNER_REGION_H_ */

src/tuner/nccl_ofi_regions.cpp

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ static inline double distance(nccl_ofi_tuner_point_t x,
127127
double eps)
128128
{
129129
nccl_ofi_tuner_point_t dy = vsub(y1, y0);
130-
nccl_ofi_tuner_point_t x1, s = {0, 0};
130+
nccl_ofi_tuner_point_t x1, s;
131131
int r;
132132

133133
x1.x = x.x + dy.y;
@@ -155,12 +155,12 @@ static inline double distance(nccl_ofi_tuner_point_t x,
155155
* -1 for outside
156156
* 0 for on edge.
157157
*/
158-
int is_inside_region(nccl_ofi_tuner_point_t point, const nccl_ofi_tuner_region_t *region)
158+
int is_inside_region(nccl_ofi_tuner_point_t point, nccl_ofi_tuner_region_t *region)
159159
{
160160
assert(region->num_vertices > 1);
161161

162162
size_t i, k;
163-
const nccl_ofi_tuner_point_t *pv;
163+
nccl_ofi_tuner_point_t *pv;
164164
double min_x, max_x, min_y, max_y;
165165
const double eps = 1e-10;
166166

@@ -201,7 +201,6 @@ int is_inside_region(nccl_ofi_tuner_point_t point, const nccl_ofi_tuner_region_t
201201

202202
/* Pick a point far enough to be outside any polygon */
203203
nccl_ofi_tuner_point_t e = {.x = 2.0 * TUNER_MAX_SIZE, .y = 2.0 * TUNER_MAX_RANKS};
204-
e.transform_log2();
205204

206205
int crosses = 0;
207206
int intersectResult = -1;
@@ -234,14 +233,6 @@ static ncclResult_t set_regions(nccl_ofi_tuner_region_context_t *region_ctx,
234233
}
235234

236235
memcpy(region_ctx->regions[collType], &regions[0], num_regions * sizeof(nccl_ofi_tuner_region_t));
237-
238-
for (size_t i = 0; i < num_regions; i++) {
239-
nccl_ofi_tuner_region& region = region_ctx->regions[collType][i];
240-
for (size_t j = 0; j < region.num_vertices; j++) {
241-
region.vertices[j].transform_log2();
242-
}
243-
}
244-
245236
return ncclSuccess;
246237
}
247238

@@ -254,22 +245,15 @@ nccl_ofi_tuner_point_t extend_region(nccl_ofi_tuner_point_t a, nccl_ofi_tuner_po
254245
{
255246
nccl_ofi_tuner_point_t ret;
256247

257-
a.transform_log2();
258-
b.transform_log2();
259-
z.transform_log2();
260-
ret.coord_scale = nccl_ofi_tuner_point_t::LOG2;
261-
262248
if (a.x == b.x) {
263249
/* a and b are on the same vertical line */
264-
ret.x = a.x, ret.y = z.y;
265-
ret.transform_pow2();
250+
ret = (nccl_ofi_tuner_point_t){.x = a.x, .y = z.y};
266251
return ret;
267252
}
268253

269254
if (a.y == b.y) {
270255
/* a and b are on the same horizontal line */
271-
ret.x = z.x, ret.y = a.y;
272-
ret.transform_pow2();
256+
ret = (nccl_ofi_tuner_point_t){.x = z.x, .y = a.y};
273257
return ret;
274258
}
275259

@@ -278,12 +262,11 @@ nccl_ofi_tuner_point_t extend_region(nccl_ofi_tuner_point_t a, nccl_ofi_tuner_po
278262
double projected_zy = m * z.x + c;
279263

280264
if (projected_zy < z.y) {
281-
ret.x = z.x, ret.y = projected_zy;
265+
ret = (nccl_ofi_tuner_point_t){.x = z.x, .y = projected_zy};
282266
} else {
283-
ret.x = (z.y - c) / m, ret.y = z.y;
267+
ret = (nccl_ofi_tuner_point_t){.x = (z.y - c) / m, .y = z.y};
284268
}
285269

286-
ret.transform_pow2();
287270
return ret;
288271
}
289272

@@ -1999,7 +1982,6 @@ ncclResult_t region_get_coll_info_internal_v2(nccl_ofi_tuner_context_t *ctx,
19991982

20001983
p.x = (double)nBytes;
20011984
p.y = (double)region_ctx->dims.num_ranks;
2002-
p.transform_log2();
20031985

20041986
/* Check all regions */
20051987
for (size_t i = 0; i < region_ctx->num_regions[collType] && in_out < 0; i++) {
@@ -2067,7 +2049,6 @@ ncclResult_t region_get_coll_info_internal_v3(nccl_ofi_tuner_context_t *ctx,
20672049

20682050
p.x = (double)nBytes;
20692051
p.y = (double)region_ctx->dims.num_ranks;
2070-
p.transform_log2();
20712052

20722053
/* Check all regions */
20732054
for (size_t i = 0; i < region_ctx->num_regions[collType] && in_out < 0; i++) {

tests/unit/region_based_tuner.cpp

Lines changed: 25 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,6 @@
1111
#include "tuner/nccl_ofi_tuner_region.h"
1212
#include "nccl_ofi_param.h"
1313

14-
using std::abs;
15-
using std::log2;
16-
using std::pow;
17-
const double eps = 1e-4;
18-
1914
static int test_extend_region(void)
2015
{
2116
nccl_ofi_tuner_point_t extended_point;
@@ -26,9 +21,8 @@ static int test_extend_region(void)
2621
extended_point = extend_region((nccl_ofi_tuner_point_t){2, 8},
2722
(nccl_ofi_tuner_point_t){4, 8},
2823
(nccl_ofi_tuner_point_t){TUNER_MAX_SIZE, TUNER_MAX_RANKS});
29-
if (abs(extended_point.x - TUNER_MAX_SIZE) > eps || extended_point.y != 8) {
30-
printf("X-Axis Extend Test Failed : Extended Points : x = %f (diff = %f) y = %f\n", extended_point.x,
31-
extended_point.x - TUNER_MAX_SIZE, extended_point.y);
24+
if (extended_point.x != TUNER_MAX_SIZE || extended_point.y != 8) {
25+
printf("X-Axis Extend Test Failed : Extended Points : x = %f y = %f\n", extended_point.x, extended_point.y);
3226
return -1;
3327
}
3428

@@ -45,26 +39,22 @@ static int test_extend_region(void)
4539
extended_point = extend_region((nccl_ofi_tuner_point_t){8, 64},
4640
(nccl_ofi_tuner_point_t){8290304, 72},
4741
(nccl_ofi_tuner_point_t){TUNER_MAX_SIZE, TUNER_MAX_RANKS});
48-
slope = (log2(72.0) - log2(64.0)) / (log2(8290304.0) - log2(8.0)); // slope = (y2 - y1)/(x2 - x1)
42+
slope = (72.0 - 64.0) / (8290304.0 - 8.0); // slope = (y2 - y1)/(x2 - x1)
4943
// y3 = mx3 + c and substitute for m=(y2-y1)/(x2-x1) and c = y2 - mx2
50-
projected_y = pow(2.0, log2(72.0) + slope * (log2(TUNER_MAX_SIZE) - log2(8290304.0))); // y3 = y2 + mx3 - mx2
51-
if (abs(extended_point.x - TUNER_MAX_SIZE) > eps || extended_point.y != projected_y) {
52-
printf("X-Axis Upper Bound Test Failed : Extended Points : x = %f (diff = %f) y = %f (diff = %f) \n",
53-
extended_point.x, extended_point.x - TUNER_MAX_SIZE,
54-
extended_point.y, extended_point.y - projected_y);
44+
projected_y = 72.0 + slope * (TUNER_MAX_SIZE - 8290304.0); // y3 = y2 + mx3 - mx2
45+
if (extended_point.x != TUNER_MAX_SIZE || extended_point.y != projected_y) {
46+
printf("X-Axis Upper Bound Test Failed : Extended Points : x = %f y = %f\n", extended_point.x, extended_point.y);
5547
return -1;
5648
}
5749

5850
/* Extend the line to TUNER_MAX_RANKS (y-axis) */
5951
extended_point = extend_region((nccl_ofi_tuner_point_t){8, 64},
6052
(nccl_ofi_tuner_point_t){16, 1024},
6153
(nccl_ofi_tuner_point_t){TUNER_MAX_SIZE, TUNER_MAX_RANKS});
62-
slope = (log2(1024.0) - log2(64.0)) / (log2(16) - log2(8.0));
63-
projected_x = pow(2, ((log2(TUNER_MAX_RANKS) - log2(1024.0)) / slope) + log2(16));
64-
if (abs(extended_point.x - projected_x) > eps || extended_point.y != TUNER_MAX_RANKS) {
65-
printf("X-Axis Upper Bound Test 2 Failed : Extended Points : x = %f (diff = %f) y = %f (diff = %f) \n",
66-
extended_point.x, extended_point.x - projected_x,
67-
extended_point.y, extended_point.y - TUNER_MAX_RANKS);
54+
slope = (1024.0 - 64.0) / (16.0 - 8.0);
55+
projected_x = ((TUNER_MAX_RANKS - 1024.0) / slope) + 16;
56+
if (extended_point.x != projected_x || extended_point.y != TUNER_MAX_RANKS) {
57+
printf("X-Axis Upper Bound Test Failed : Extended Points : x = %f y = %f\n", extended_point.x, extended_point.y);
6858
return -1;
6959
}
7060

@@ -85,7 +75,7 @@ static int test_extend_region(void)
8575
| . |
8676
| . |
8777
|--------*------*----------*----------*---------*--------*---
88-
| p3(4M, 2) |p5(TUNER_MAX_SIZE, 2))
78+
| p3(4M, 2) |p4(TUNER_MAX_SIZE, 2))
8979
| |
9080
*/
9181
static int test_is_inside_region(void) {
@@ -97,14 +87,6 @@ static int test_is_inside_region(void) {
9787
(nccl_ofi_tuner_point_t){(double)48.0 * 1024 * 1024, 16},
9888
(nccl_ofi_tuner_point_t){(double)288.0 * 1024 * 1024, 128},
9989
(nccl_ofi_tuner_point_t){TUNER_MAX_SIZE, TUNER_MAX_RANKS});
100-
printf("INFO extended point: %f %f \n", e_48M_16_288M_128.x, e_48M_16_288M_128.y );
101-
102-
p1_288M_128.transform_log2();
103-
p2_38M_16.transform_log2();
104-
p3_4M_2.transform_log2();
105-
p5_maxM_2.transform_log2();
106-
e_48M_16_288M_128.transform_log2();
107-
printf("INFO extended point after transform_log2: %f %f \n", e_48M_16_288M_128.x, e_48M_16_288M_128.y );
10890

10991
nccl_ofi_tuner_region_t region = {
11092
.algorithm = NCCL_ALGO_RING,
@@ -134,17 +116,15 @@ static int test_is_inside_region(void) {
134116
To find the points on the edge of the polygons:
135117
1. Consider two vertices of the polygon
136118
2. Calculate the slope and y-intercept of the line.
137-
3. Using the equation y = m * x + c, get multiple points on the line in powers of 2.
119+
3. Using the equation y = mx + c, get multiple points on the line in powers of 2.
138120
*/
139121
for (size_t i = 0; i < region.num_vertices; i++) {
140122
size_t k = (i + 1) % region.num_vertices;
141123
double slope = (region.vertices[k].y - region.vertices[i].y) / (region.vertices[k].x - region.vertices[i].x);
142124
double c = region.vertices[k].y - (slope * (region.vertices[i].x));
143125
for (double x = region.vertices[i].x; x < region.vertices[k].x; x = x * 2) {
144126
double y = (slope * x) + c;
145-
nccl_ofi_tuner_point_t test_point {x, y, nccl_ofi_tuner_point_t::LOG2};
146-
147-
if (is_inside_region(test_point, &region) != 0)
127+
if (is_inside_region((nccl_ofi_tuner_point_t){x, y}, &region) != 0)
148128
return -1;
149129
// printf(" Is (%.10f, %.10f) inside the region : %d\n", x, y, is_inside_region(
150130
// (nccl_ofi_tuner_point_t){x, y}, &region));
@@ -153,8 +133,8 @@ static int test_is_inside_region(void) {
153133

154134
printf("All points on the edges of the polygon are detected correcltly\n");
155135

156-
const size_t num_points = 20;
157-
nccl_ofi_tuner_point_t inside_vertices[] = {{16.0 * 1024 * 1024, 4},
136+
size_t num_points = 20;
137+
const nccl_ofi_tuner_point_t inside_vertices[] = {{16.0 * 1024 * 1024, 4},
158138
{128.0 * 1024 * 1024, 4},
159139
{1.0 * 1024 * 1024 * 1024, 4},
160140
{4.0 * 1024 * 1024 * 1024, 4},
@@ -172,24 +152,19 @@ static int test_is_inside_region(void) {
172152
{32.0 * 1024 * 1024 * 1024, 128},
173153
{64.0 * 1024 * 1024 * 1024, 128},
174154
{64.0 * 1024 * 1024 * 1024, 256},
175-
// Note, set a big enough diff (10.0) below, otherwise
176-
// the delta after log2 is within floating error (eps).
177-
{TUNER_MAX_SIZE - 10.0, 128},
178-
{e_48M_16_288M_128.x - 1.0, e_48M_16_288M_128.y - 10.0, nccl_ofi_tuner_point_t::LOG2}};
155+
{TUNER_MAX_SIZE - 1.0, 128},
156+
{e_48M_16_288M_128.x - 1.0, e_48M_16_288M_128.y - 1.0}};
179157

180158
/* These points should be inside the polygon */
181159
for (size_t i = 0; i < num_points; i++) {
182-
inside_vertices[i].transform_log2();
183-
int d = is_inside_region(inside_vertices[i], &region);
184-
if (d != 1) {
185-
printf("%ld: %.10f, %.10f is_inside_region: %d\n", i, inside_vertices[i].x, inside_vertices[i].y, d);
160+
if (is_inside_region(inside_vertices[i], &region) != 1) {
161+
printf("%.10f, %.10f\n", inside_vertices[i].x, inside_vertices[i].y);
186162
return -1;
187163
};
188164
}
189165

190166
printf("All points inside the polygon are detected correcltly\n");
191167

192-
const size_t outside_num_points = 24;
193168
const nccl_ofi_tuner_point_t outside_vertices[] = {{8.0 * 1024 * 1024, 4},
194169
{8.0 * 1024 * 1024, 32},
195170
{8.0 * 1024 * 1024, 128},
@@ -216,10 +191,9 @@ static int test_is_inside_region(void) {
216191
{e_48M_16_288M_128.x + 1.0, e_48M_16_288M_128.y + 1.0}};
217192

218193
/* These points should be outside the polygons */
219-
for (size_t i = 0; i < outside_num_points; i++) {
220-
int d = is_inside_region(outside_vertices[i], &region);
221-
if ( d != -1) {
222-
printf("%ld: %.10f, %.10f is_inside_region: %d\n", i, outside_vertices[i].x, outside_vertices[i].y, d);
194+
for (size_t i = 0; i < num_points; i++) {
195+
if (is_inside_region(outside_vertices[i], &region) != -1) {
196+
printf("%.10f, %.10f\n", outside_vertices[i].x, outside_vertices[i].y);
223197
return -1;
224198
};
225199
}
@@ -240,17 +214,13 @@ static int test_is_inside_region(void) {
240214
}
241215

242216
int main(int argc, const char **argv) {
243-
int ret = 0;
244-
if ((ret |= test_extend_region()) < 0) {
217+
if (test_extend_region() < 0) {
245218
printf("Extend Region test failed\n");
246219
};
247220

248-
if ((ret |= test_is_inside_region()) < 0) {
221+
if (test_is_inside_region() < 0) {
249222
printf("Is inside region function failed\n");
250223
}
251224

252-
if (ret == 0) {
253-
printf("All tests passed.\n");
254-
}
255-
return ret;
225+
return 0;
256226
}

0 commit comments

Comments
 (0)