Skip to content

Commit 84926f2

Browse files
committed
multi_scalar.c: cap stack utilization in ptype##s_precompute_wbits.
Fixes #278.
1 parent fe85dc3 commit 84926f2

File tree

1 file changed

+37
-9
lines changed

1 file changed

+37
-9
lines changed

src/multi_scalar.c

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,10 @@ POINTS_TO_AFFINE_IMPL(blst_p2, POINTonE2, 384x, fp2)
8383
/* The intermediate infinity points are encoded as [0, 0, 1]. */
8484

8585
#define PRECOMPUTE_WBITS_IMPL(prefix, ptype, bits, field, one) \
86-
static void ptype##_precompute_row_wbits(ptype row[], size_t wbits, \
87-
const ptype##_affine *point) \
86+
static void ptype##_precompute_row(ptype row[], size_t n, \
87+
const ptype##_affine *point) \
8888
{ \
89-
size_t i, j, n = (size_t)1 << (wbits-1); \
89+
size_t i, j; \
9090
bool_t inf = vec_is_zero(point, sizeof(*point)); \
9191
/* row[-1] is implicit infinity */\
9292
vec_copy(&row[0], point, sizeof(*point)); /* row[0]=p*1 */\
@@ -153,16 +153,44 @@ static void ptype##s_precompute_wbits(ptype##_affine table[], size_t wbits, \
153153
rows = row = (ptype *)(&table[top]); \
154154
for (i = 0; i < stride; i++, row += nwin) \
155155
point = *points ? *points++ : point+1, \
156-
ptype##_precompute_row_wbits(row, wbits, point); \
156+
ptype##_precompute_row(row, nwin, point); \
157157
ptype##s_to_affine_row_wbits(&table[top], rows, wbits, stride); \
158158
top += stride << (wbits-1); \
159159
npoints -= stride; \
160160
} \
161-
rows = row = alloca(2*sizeof(ptype##_affine) * npoints * nwin); \
162-
for (i = 0; i < npoints; i++, row += nwin) \
163-
point = *points ? *points++ : point+1, \
164-
ptype##_precompute_row_wbits(row, wbits, point); \
165-
ptype##s_to_affine_row_wbits(&table[top], rows, wbits, npoints); \
161+
if ((i = 2*sizeof(ptype##_affine)*npoints*nwin) <= SCRATCH_LIMIT) { \
162+
rows = row = alloca(i); \
163+
for (i = 0; i < npoints; i++, row += nwin) \
164+
point = *points ? *points++ : point+1, \
165+
ptype##_precompute_row(row, nwin, point); \
166+
ptype##s_to_affine_row_wbits(&table[top], rows, wbits, npoints); \
167+
} else { \
168+
const ptype *pp[2]; \
169+
\
170+
stride = SCRATCH_LIMIT / sizeof(ptype); \
171+
stride -= stride % 2; \
172+
if (stride > nwin) stride = nwin; \
173+
\
174+
pp[0] = row = alloca(stride * sizeof(ptype)); \
175+
pp[1] = NULL; \
176+
for (i = 0; i < npoints; i++, top += nwin) { \
177+
size_t j, k, n; \
178+
\
179+
point = *points ? *points++ : point+1; \
180+
ptype##_precompute_row(row, stride, point); \
181+
ptype##s_to_affine(&table[top], pp, stride); \
182+
for (j = stride; j < nwin; j += stride) { \
183+
n = (j+stride) <= nwin ? stride : nwin-j; \
184+
for (k = 0; k < n-1; k++) \
185+
ptype##_add_affine(&row[k], &row[stride-1], &table[top+k]); \
186+
if (j == stride) \
187+
ptype##_double(&row[k], &row[stride-1]); \
188+
else \
189+
ptype##_add_affine(&row[k], &row[stride-1], &table[top+k]); \
190+
ptype##s_to_affine(&table[top+j], pp, n); \
191+
} \
192+
} \
193+
} \
166194
} \
167195
\
168196
size_t prefix##s_mult_wbits_precompute_sizeof(size_t wbits, size_t npoints) \

0 commit comments

Comments
 (0)