Skip to content

Commit 1917cea

Browse files
committed
tuner: transform x coordinates to log2 scale.
This makes it the same as in the tuning scripts. Signed-off-by: fengnji <[email protected]>
1 parent 4decd69 commit 1917cea

File tree

3 files changed

+104
-35
lines changed

3 files changed

+104
-35
lines changed

include/tuner/nccl_ofi_tuner_region.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#ifndef NCCL_OFI_TUNER_REGION_H_
66
#define NCCL_OFI_TUNER_REGION_H_
77

8+
#include <cmath>
89
#include <stddef.h>
910
#include "tuner/nccl_ofi_tuner_common.h"
1011

@@ -57,6 +58,28 @@ typedef struct nccl_ofi_tuner_point
5758
{
5859
double x;
5960
double y;
61+
enum COORD_SCALE {
62+
UNSPECIFIED,
63+
ORIGINAL,
64+
X_LOG2
65+
66+
} coord_scale = UNSPECIFIED;
67+
68+
inline void transform_log2_x() {
69+
if (coord_scale == X_LOG2) return;
70+
71+
if (x > 0) {
72+
x = std::log2(x);
73+
coord_scale = X_LOG2;
74+
}
75+
}
76+
77+
inline void transform_pow2_x() {
78+
if (coord_scale != X_LOG2) return;
79+
80+
x = std::pow(2.0, x);
81+
coord_scale = ORIGINAL;
82+
}
6083
} nccl_ofi_tuner_point_t;
6184

6285
typedef struct nccl_ofi_tuner_region {
@@ -70,7 +93,8 @@ nccl_ofi_tuner_point_t extend_region(nccl_ofi_tuner_point_t a,
7093
nccl_ofi_tuner_point_t b,
7194
nccl_ofi_tuner_point_t z);
7295

73-
int is_inside_region(nccl_ofi_tuner_point_t point,
74-
nccl_ofi_tuner_region_t *region);
96+
int is_inside_region(
97+
nccl_ofi_tuner_point_t point,
98+
const nccl_ofi_tuner_region_t *region);
7599

76100
#endif /* NCCL_OFI_TUNER_REGION_H_ */

src/tuner/nccl_ofi_regions.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ static inline double distance(nccl_ofi_tuner_point_t x,
127127
double eps)
128128
{
129129
nccl_ofi_tuner_point_t dy = vsub(y1, y0);
130-
nccl_ofi_tuner_point_t x1, s;
130+
nccl_ofi_tuner_point_t x1, s = {0, 0};
131131
int r;
132132

133133
x1.x = x.x + dy.y;
@@ -155,12 +155,12 @@ static inline double distance(nccl_ofi_tuner_point_t x,
155155
* -1 for outside
156156
* 0 for on edge.
157157
*/
158-
int is_inside_region(nccl_ofi_tuner_point_t point, nccl_ofi_tuner_region_t *region)
158+
int is_inside_region(nccl_ofi_tuner_point_t point, const nccl_ofi_tuner_region_t *region)
159159
{
160160
assert(region->num_vertices > 1);
161161

162162
size_t i, k;
163-
nccl_ofi_tuner_point_t *pv;
163+
const nccl_ofi_tuner_point_t *pv;
164164
double min_x, max_x, min_y, max_y;
165165
const double eps = 1e-10;
166166

@@ -233,6 +233,14 @@ static ncclResult_t set_regions(nccl_ofi_tuner_region_context_t *region_ctx,
233233
}
234234

235235
memcpy(region_ctx->regions[collType], &regions[0], num_regions * sizeof(nccl_ofi_tuner_region_t));
236+
237+
for (size_t i = 0; i < num_regions; i++) {
238+
nccl_ofi_tuner_region& region = region_ctx->regions[collType][i];
239+
for (size_t j = 0; j < region.num_vertices; j++) {
240+
region.vertices[j].transform_log2_x();
241+
}
242+
}
243+
236244
return ncclSuccess;
237245
}
238246

@@ -245,15 +253,22 @@ nccl_ofi_tuner_point_t extend_region(nccl_ofi_tuner_point_t a, nccl_ofi_tuner_po
245253
{
246254
nccl_ofi_tuner_point_t ret;
247255

256+
a.transform_log2_x();
257+
b.transform_log2_x();
258+
z.transform_log2_x();
259+
ret.coord_scale = nccl_ofi_tuner_point_t::X_LOG2;
260+
248261
if (a.x == b.x) {
249262
/* a and b are on the same vertical line */
250-
ret = (nccl_ofi_tuner_point_t){.x = a.x, .y = z.y};
263+
ret.x = a.x, ret.y = z.y;
264+
ret.transform_pow2_x();
251265
return ret;
252266
}
253267

254268
if (a.y == b.y) {
255269
/* a and b are on the same horizontal line */
256-
ret = (nccl_ofi_tuner_point_t){.x = z.x, .y = a.y};
270+
ret.x = z.x, ret.y = a.y;
271+
ret.transform_pow2_x();
257272
return ret;
258273
}
259274

@@ -262,11 +277,12 @@ nccl_ofi_tuner_point_t extend_region(nccl_ofi_tuner_point_t a, nccl_ofi_tuner_po
262277
double projected_zy = m * z.x + c;
263278

264279
if (projected_zy < z.y) {
265-
ret = (nccl_ofi_tuner_point_t){.x = z.x, .y = projected_zy};
280+
ret.x = z.x, ret.y = projected_zy;
266281
} else {
267-
ret = (nccl_ofi_tuner_point_t){.x = (z.y - c) / m, .y = z.y};
282+
ret.x = (z.y - c) / m, ret.y = z.y;
268283
}
269284

285+
ret.transform_pow2_x();
270286
return ret;
271287
}
272288

@@ -1419,6 +1435,7 @@ ncclResult_t region_get_coll_info_internal_v2(nccl_ofi_tuner_context_t *ctx,
14191435

14201436
p.x = (double)nBytes;
14211437
p.y = (double)region_ctx->dims.num_ranks;
1438+
p.transform_log2_x();
14221439

14231440
/* Check all regions */
14241441
for (size_t i = 0; i < region_ctx->num_regions[collType] && in_out < 0; i++) {
@@ -1486,6 +1503,7 @@ ncclResult_t region_get_coll_info_internal_v3(nccl_ofi_tuner_context_t *ctx,
14861503

14871504
p.x = (double)nBytes;
14881505
p.y = (double)region_ctx->dims.num_ranks;
1506+
p.transform_log2_x();
14891507

14901508
/* Check all regions */
14911509
for (size_t i = 0; i < region_ctx->num_regions[collType] && in_out < 0; i++) {

tests/unit/region_based_tuner.cpp

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
#include "tuner/nccl_ofi_tuner_region.h"
1212
#include "nccl_ofi_param.h"
1313

14+
using std::abs;
15+
using std::log2;
16+
using std::pow;
17+
const double eps = 1e-4;
18+
1419
static int test_extend_region(void)
1520
{
1621
nccl_ofi_tuner_point_t extended_point;
@@ -21,8 +26,9 @@ static int test_extend_region(void)
2126
extended_point = extend_region((nccl_ofi_tuner_point_t){2, 8},
2227
(nccl_ofi_tuner_point_t){4, 8},
2328
(nccl_ofi_tuner_point_t){TUNER_MAX_SIZE, TUNER_MAX_RANKS});
24-
if (extended_point.x != TUNER_MAX_SIZE || extended_point.y != 8) {
25-
printf("X-Axis Extend Test Failed : Extended Points : x = %f y = %f\n", extended_point.x, extended_point.y);
29+
if (abs(extended_point.x - TUNER_MAX_SIZE) > eps || extended_point.y != 8) {
30+
printf("X-Axis Extend Test Failed : Extended Points : x = %f (diff = %f) y = %f\n", extended_point.x,
31+
extended_point.x - TUNER_MAX_SIZE, extended_point.y);
2632
return -1;
2733
}
2834

@@ -39,22 +45,26 @@ static int test_extend_region(void)
3945
extended_point = extend_region((nccl_ofi_tuner_point_t){8, 64},
4046
(nccl_ofi_tuner_point_t){8290304, 72},
4147
(nccl_ofi_tuner_point_t){TUNER_MAX_SIZE, TUNER_MAX_RANKS});
42-
slope = (72.0 - 64.0) / (8290304.0 - 8.0); // slope = (y2 - y1)/(x2 - x1)
48+
slope = (72.0 - 64.0) / (log2(8290304.0) - log2(8.0)); // slope = (y2 - y1)/(x2 - x1)
4349
// y3 = mx3 + c and substitute for m=(y2-y1)/(x2-x1) and c = y2 - mx2
44-
projected_y = 72.0 + slope * (TUNER_MAX_SIZE - 8290304.0); // y3 = y2 + mx3 - mx2
45-
if (extended_point.x != TUNER_MAX_SIZE || extended_point.y != projected_y) {
46-
printf("X-Axis Upper Bound Test Failed : Extended Points : x = %f y = %f\n", extended_point.x, extended_point.y);
50+
projected_y = 72.0 + slope * (log2(TUNER_MAX_SIZE) - log2(8290304.0)); // y3 = y2 + mx3 - mx2
51+
if (abs(extended_point.x - TUNER_MAX_SIZE) > eps || extended_point.y != projected_y) {
52+
printf("X-Axis Upper Bound Test Failed : Extended Points : x = %f (diff = %f) y = %f (diff = %f) \n",
53+
extended_point.x, extended_point.x - TUNER_MAX_SIZE,
54+
extended_point.y, extended_point.y - projected_y);
4755
return -1;
4856
}
4957

5058
/* Extend the line to TUNER_MAX_RANKS (y-axis) */
5159
extended_point = extend_region((nccl_ofi_tuner_point_t){8, 64},
52-
(nccl_ofi_tuner_point_t){16, 1024},
60+
(nccl_ofi_tuner_point_t){8.01, 1024},
5361
(nccl_ofi_tuner_point_t){TUNER_MAX_SIZE, TUNER_MAX_RANKS});
54-
slope = (1024.0 - 64.0) / (16.0 - 8.0);
55-
projected_x = ((TUNER_MAX_RANKS - 1024.0) / slope) + 16;
56-
if (extended_point.x != projected_x || extended_point.y != TUNER_MAX_RANKS) {
57-
printf("X-Axis Upper Bound Test Failed : Extended Points : x = %f y = %f\n", extended_point.x, extended_point.y);
62+
slope = (1024.0 - 64.0) / (log2(8.01) - log2(8.0));
63+
projected_x = pow(2, ((TUNER_MAX_RANKS - 1024.0) / slope) + log2(8.01));
64+
if (abs(extended_point.x - projected_x) > eps || extended_point.y != TUNER_MAX_RANKS) {
65+
printf("X-Axis Upper Bound Test 2 Failed : Extended Points : x = %f (diff = %f) y = %f (diff = %f) \n",
66+
extended_point.x, extended_point.x - projected_x,
67+
extended_point.y, extended_point.y - TUNER_MAX_RANKS);
5868
return -1;
5969
}
6070

@@ -75,7 +85,7 @@ static int test_extend_region(void)
7585
| . |
7686
| . |
7787
|--------*------*----------*----------*---------*--------*---
78-
| p3(4M, 2) |p4(TUNER_MAX_SIZE, 2))
88+
| p3(4M, 2) |p5(TUNER_MAX_SIZE, 2))
7989
| |
8090
*/
8191
static int test_is_inside_region(void) {
@@ -87,6 +97,14 @@ static int test_is_inside_region(void) {
8797
(nccl_ofi_tuner_point_t){(double)48.0 * 1024 * 1024, 16},
8898
(nccl_ofi_tuner_point_t){(double)288.0 * 1024 * 1024, 128},
8999
(nccl_ofi_tuner_point_t){TUNER_MAX_SIZE, TUNER_MAX_RANKS});
100+
printf("INFO extended point: %f %f \n", e_48M_16_288M_128.x, e_48M_16_288M_128.y );
101+
102+
p1_288M_128.transform_log2_x();
103+
p2_38M_16.transform_log2_x();
104+
p3_4M_2.transform_log2_x();
105+
p5_maxM_2.transform_log2_x();
106+
e_48M_16_288M_128.transform_log2_x();
107+
printf("INFO extended point after transform_log2_x: %f %f \n", e_48M_16_288M_128.x, e_48M_16_288M_128.y );
90108

91109
nccl_ofi_tuner_region_t region = {
92110
.algorithm = NCCL_ALGO_RING,
@@ -98,6 +116,7 @@ static int test_is_inside_region(void) {
98116
p3_4M_2,
99117
p5_maxM_2}};
100118

119+
101120
/* Points on the vertices of the polygon should be classified to be on the edge of the region */
102121
if (is_inside_region(e_48M_16_288M_128, &region) != 0)
103122
return -1;
@@ -116,15 +135,17 @@ static int test_is_inside_region(void) {
116135
To find the points on the edge of the polygons:
117136
1. Consider two vertices of the polygon
118137
2. Calculate the slope and y-intercept of the line.
119-
3. Using the equation y = mx + c, get multiple points on the line in powers of 2.
138+
3. Using the equation y = m * x + c, get multiple points on the line in powers of 2.
120139
*/
121140
for (size_t i = 0; i < region.num_vertices; i++) {
122141
size_t k = (i + 1) % region.num_vertices;
123142
double slope = (region.vertices[k].y - region.vertices[i].y) / (region.vertices[k].x - region.vertices[i].x);
124143
double c = region.vertices[k].y - (slope * (region.vertices[i].x));
125144
for (double x = region.vertices[i].x; x < region.vertices[k].x; x = x * 2) {
126145
double y = (slope * x) + c;
127-
if (is_inside_region((nccl_ofi_tuner_point_t){x, y}, &region) != 0)
146+
nccl_ofi_tuner_point_t test_point {x, y, nccl_ofi_tuner_point_t::X_LOG2};
147+
148+
if (is_inside_region(test_point, &region) != 0)
128149
return -1;
129150
// printf(" Is (%.10f, %.10f) inside the region : %d\n", x, y, is_inside_region(
130151
// (nccl_ofi_tuner_point_t){x, y}, &region));
@@ -133,8 +154,8 @@ static int test_is_inside_region(void) {
133154

134155
printf("All points on the edges of the polygon are detected correcltly\n");
135156

136-
size_t num_points = 20;
137-
const nccl_ofi_tuner_point_t inside_vertices[] = {{16.0 * 1024 * 1024, 4},
157+
const size_t num_points = 20;
158+
nccl_ofi_tuner_point_t inside_vertices[] = {{16.0 * 1024 * 1024, 4},
138159
{128.0 * 1024 * 1024, 4},
139160
{1.0 * 1024 * 1024 * 1024, 4},
140161
{4.0 * 1024 * 1024 * 1024, 4},
@@ -152,25 +173,30 @@ static int test_is_inside_region(void) {
152173
{32.0 * 1024 * 1024 * 1024, 128},
153174
{64.0 * 1024 * 1024 * 1024, 128},
154175
{64.0 * 1024 * 1024 * 1024, 256},
155-
{TUNER_MAX_SIZE - 1.0, 128},
156-
{e_48M_16_288M_128.x - 1.0, e_48M_16_288M_128.y - 1.0}};
176+
// Note, set a big enough diff (10.0) below, otherwise
177+
// the delta after log2 is within floating error (eps).
178+
{TUNER_MAX_SIZE - 10.0, 128},
179+
{e_48M_16_288M_128.x - 0.1, e_48M_16_288M_128.y - 10.0, nccl_ofi_tuner_point_t::X_LOG2}};
157180

158181
/* These points should be inside the polygon */
159182
for (size_t i = 0; i < num_points; i++) {
160-
if (is_inside_region(inside_vertices[i], &region) != 1) {
161-
printf("%.10f, %.10f\n", inside_vertices[i].x, inside_vertices[i].y);
183+
inside_vertices[i].transform_log2_x();
184+
int d = is_inside_region(inside_vertices[i], &region);
185+
if (d != 1) {
186+
printf("%ld: %.10f, %.10f is_inside_region: %d\n", i, inside_vertices[i].x, inside_vertices[i].y, d);
162187
return -1;
163188
};
164189
}
165190

166191
printf("All points inside the polygon are detected correcltly\n");
167192

168-
const nccl_ofi_tuner_point_t outside_vertices[] = {{8.0 * 1024 * 1024, 4},
193+
const size_t outside_num_points = 24;
194+
const nccl_ofi_tuner_point_t outside_vertices[] = {{8.0 * 1024 * 1024, 6},
169195
{8.0 * 1024 * 1024, 32},
170196
{8.0 * 1024 * 1024, 128},
171197
{8.0 * 1024 * 1024, 512},
172198
{8.0 * 1024 * 1024, TUNER_MAX_RANKS},
173-
{16.0 * 1024 * 1024, 8},
199+
{16.0 * 1024 * 1024, 10},
174200
{16.0 * 1024 * 1024, 32},
175201
{16.0 * 1024 * 1024, 128},
176202
{16.0 * 1024 * 1024, 512},
@@ -180,7 +206,7 @@ static int test_is_inside_region(void) {
180206
{32.0 * 1024 * 1024, 64},
181207
{32.0 * 1024 * 1024, 128},
182208
{32.0 * 1024 * 1024, 256},
183-
{64 * 1024 * 1024, 32},
209+
{64 * 1024 * 1024, 35},
184210
{64.0 * 1024 * 1024, 64},
185211
{64.0 * 1024 * 1024, 256},
186212
{64.0 * 1024 * 1024, 1024},
@@ -191,9 +217,10 @@ static int test_is_inside_region(void) {
191217
{e_48M_16_288M_128.x + 1.0, e_48M_16_288M_128.y + 1.0}};
192218

193219
/* These points should be outside the polygons */
194-
for (size_t i = 0; i < num_points; i++) {
195-
if (is_inside_region(outside_vertices[i], &region) != -1) {
196-
printf("%.10f, %.10f\n", outside_vertices[i].x, outside_vertices[i].y);
220+
for (size_t i = 0; i < outside_num_points; i++) {
221+
int d = is_inside_region(outside_vertices[i], &region);
222+
if ( d != -1) {
223+
printf("%ld: %.10f, %.10f is_inside_region: %d\n", i, outside_vertices[i].x, outside_vertices[i].y, d);
197224
return -1;
198225
};
199226
}

0 commit comments

Comments
 (0)