Skip to content

Commit b2cc6d1

Browse files
committed
bench: Tooling for ecmult algo selection abcd calibration
1 parent 4b05991 commit b2cc6d1

File tree

2 files changed

+202
-17
lines changed

2 files changed

+202
-17
lines changed

src/bench_ecmult.c

Lines changed: 160 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "scalar_impl.h"
1717
#include "ecmult_impl.h"
1818
#include "bench.h"
19+
#include "tests_common.h"
1920

2021
#define POINTS 32768
2122

@@ -69,6 +70,142 @@ typedef struct {
6970
secp256k1_fe* output_xonly;
7071
} bench_data;
7172

73+
/*
74+
* ABCD Calibration Benchmarks
75+
*
76+
* Measures the performance of each algorithm at various batch sizes and
77+
* outputs. Use tools/ecmult_multi_calib.py to calculate optimal C and D
78+
* values from the output.
79+
*
80+
* Each algorithm is only calibrated within its optimal batch size range
81+
* to avoid skewing results with uninteresting results far away from that
82+
* range.
83+
*/
84+
static void run_ecmult_multi_calib(bench_data* data) {
85+
static const size_t batch_sizes[] = {
86+
/* Small numbers should help stabilize Strauss intercept */
87+
2, 3, 5, 7, 10, 15, 20, 30, 50, 70,
88+
/* Crossover region between Strauss and Pippenger */
89+
85, 88, 90, 100, 120, 150, 175,
90+
/* Pippenger windows, getting progressively larger */
91+
200, 300, 500, 750, 1000, 1200, 1500, 2000, 3000, 5000, 7500, 10000, 15000, 20000, 30000
92+
};
93+
static const size_t n_batch_sizes = sizeof(batch_sizes) / sizeof(batch_sizes[0]);
94+
95+
static const char* algo_names[] = {
96+
"TRIVIAL", "STRAUSS", "PIPPENGER_1", "PIPPENGER_2", "PIPPENGER_3", "PIPPENGER_4", "PIPPENGER_5", "PIPPENGER_6", "PIPPENGER_7", "PIPPENGER_8", "PIPPENGER_9", "PIPPENGER_10", "PIPPENGER_11", "PIPPENGER_12"
97+
};
98+
99+
/* Maximum batch size for Strauss calibration. */
100+
static const size_t STRAUSS_MAX_CALIB_BATCH = 500;
101+
102+
/* Per-window min/max batch sizes for Pippenger calibration. */
103+
static const size_t pippenger_min_calib_batch[12] = {
104+
/*w=1 2 3 4 5 6 7 8 9 10 11 12 */
105+
5, 5, 10, 30, 70, 150, 300, 750, 1500, 3000, 7500, 15000
106+
};
107+
static const size_t pippenger_max_calib_batch[12] = {
108+
/*w=1 2 3 4 5 6 7 8 9 10 11 12 */
109+
100, 200, 500, 1000, 2000, 5000, 10000, 20000, 30000, 30000, 30000, 30000
110+
};
111+
112+
secp256k1_ge *points = NULL;
113+
secp256k1_scalar *scalars = NULL;
114+
secp256k1_gej result;
115+
size_t max_points = batch_sizes[n_batch_sizes - 1];
116+
int algo;
117+
size_t i, j;
118+
int base_iters = 1000;
119+
120+
points = (secp256k1_ge *)malloc(max_points * sizeof(secp256k1_ge));
121+
scalars = (secp256k1_scalar *)malloc(max_points * sizeof(secp256k1_scalar));
122+
CHECK(points != NULL);
123+
CHECK(scalars != NULL);
124+
125+
for (i = 0; i < max_points; i++) {
126+
points[i] = data->pubkeys[i % POINTS];
127+
scalars[i] = data->scalars[i % POINTS];
128+
}
129+
130+
printf("# ECMULT_MULTI Calibration Data\n");
131+
printf("# Format: ALGO,N,TIME_US (microseconds per batch)\n");
132+
printf("# Copy the DATA section below into the Python script\n");
133+
printf("#\n");
134+
printf("# BEGIN DATA\n");
135+
136+
/* Measure STRAUSS */
137+
algo = SECP256K1_ECMULT_MULTI_ALGO_STRAUSS;
138+
for (i = 0; i < n_batch_sizes; i++) {
139+
size_t n = batch_sizes[i];
140+
int64_t t_start, t_end;
141+
double time_us;
142+
int iters = base_iters;
143+
int iter;
144+
145+
/* Only run up to the max to not skew result */
146+
if (n > STRAUSS_MAX_CALIB_BATCH) continue;
147+
148+
/* Using many iterations in Strauss since batch sizes are small */
149+
if (n >= 300) iters = base_iters / 2;
150+
if (iters < 100) iters = 100;
151+
152+
t_start = gettime_i64();
153+
for (iter = 0; iter < iters; iter++) {
154+
secp256k1_ecmult_multi_internal(&data->ctx->error_callback, algo,
155+
&result, n, points, scalars, NULL);
156+
}
157+
t_end = gettime_i64();
158+
159+
time_us = (double)(t_end - t_start) / iters;
160+
printf("%s,%lu,%.3f\n", algo_names[algo], (unsigned long)n, time_us);
161+
}
162+
163+
/* Measure PIPPENGER variants */
164+
for (algo = SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_1;
165+
algo <= SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_12;
166+
algo++) {
167+
int window = algo - SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_1;
168+
size_t min_batch = pippenger_min_calib_batch[window];
169+
size_t max_batch = pippenger_max_calib_batch[window];
170+
171+
for (j = 0; j < n_batch_sizes; j++) {
172+
size_t n = batch_sizes[j];
173+
int64_t t_start, t_end;
174+
double time_us;
175+
int iters = base_iters;
176+
int iter;
177+
178+
/* Only run for the selected range of each algo */
179+
if (n < min_batch || n > max_batch) continue;
180+
181+
/* Limiting iterations to keep run-time managable */
182+
if (n >= 1000) iters = base_iters / 10;
183+
if (n >= 5000) iters = base_iters / 50;
184+
if (n >= 15000) iters = base_iters / 100;
185+
if (iters < 3) iters = 3;
186+
187+
t_start = gettime_i64();
188+
for (iter = 0; iter < iters; iter++) {
189+
secp256k1_ecmult_multi_internal(&data->ctx->error_callback, algo,
190+
&result, n, points, scalars, NULL);
191+
}
192+
t_end = gettime_i64();
193+
194+
time_us = (double)(t_end - t_start) / iters;
195+
printf("%s,%lu,%.3f\n", algo_names[algo], (unsigned long)n, time_us);
196+
}
197+
}
198+
199+
printf("# END DATA\n");
200+
printf("#\n");
201+
printf("# To calculate ABCD constants, run:\n");
202+
printf("# ./bench_ecmult calib 2>&1 | python3 tools/ecmult_multi_calib.py\n");
203+
printf("#\n");
204+
205+
free(points);
206+
free(scalars);
207+
}
208+
72209
/* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */
73210
static void hash_into_offset(bench_data* data, size_t x) {
74211
data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS;
@@ -338,6 +475,7 @@ static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_
338475
int main(int argc, char **argv) {
339476
bench_data data;
340477
int i, p;
478+
int run_calib = 0;
341479

342480
int default_iters = 10000;
343481
int iters = get_iters(default_iters);
@@ -368,6 +506,8 @@ int main(int argc, char **argv) {
368506
} else if(have_flag(argc, argv, "auto")) {
369507
printf("Using automatic algorithm selection:\n");
370508
data.forced_algo = BENCH_ALGO_AUTO;
509+
} else if(have_flag(argc, argv, "calib")) {
510+
run_calib = 1;
371511
} else {
372512
fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]);
373513
help(argv, default_iters);
@@ -398,27 +538,30 @@ int main(int argc, char **argv) {
398538
}
399539
secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS);
400540

541+
if (run_calib) {
542+
run_ecmult_multi_calib(&data);
543+
} else {
544+
print_output_table_header_row();
545+
/* Initialize offset1 and offset2 */
546+
hash_into_offset(&data, 0);
547+
run_ecmult_bench(&data, iters);
401548

402-
print_output_table_header_row();
403-
/* Initialize offset1 and offset2 */
404-
hash_into_offset(&data, 0);
405-
run_ecmult_bench(&data, iters);
406-
407-
for (i = 1; i <= 8; ++i) {
408-
run_ecmult_multi_bench(&data, i, 1, iters);
409-
}
549+
for (i = 1; i <= 8; ++i) {
550+
run_ecmult_multi_bench(&data, i, 1, iters);
551+
}
410552

411-
/* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
412-
* and the higher it goes the longer the computation takes(more points)
413-
* So we don't run this benchmark with low iterations to prevent slow down */
414-
if (iters > 2) {
415-
for (p = 0; p <= 11; ++p) {
416-
for (i = 9; i <= 16; ++i) {
417-
run_ecmult_multi_bench(&data, i << p, 1, iters);
553+
/* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
554+
* and the higher it goes the longer the computation takes(more points)
555+
* So we don't run this benchmark with low iterations to prevent slow down */
556+
if (iters > 2) {
557+
for (p = 0; p <= 11; ++p) {
558+
for (i = 9; i <= 16; ++i) {
559+
run_ecmult_multi_bench(&data, i << p, 1, iters);
560+
}
418561
}
562+
} else {
563+
printf("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n");
419564
}
420-
} else {
421-
printf("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n");
422565
}
423566

424567
secp256k1_context_destroy(data.ctx);

tools/ecmult_multi_calib.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
from collections import defaultdict
4+
5+
def linreg(x, y):
6+
n, sx, sy = len(x), sum(x), sum(y)
7+
sxy, sx2 = sum(a*b for a,b in zip(x,y)), sum(a*a for a in x)
8+
d = n*sx2 - sx*sx
9+
if abs(d) < 1e-10: return sy/n, 0
10+
return (sy - ((n*sxy - sx*sy)/d)*sx)/n, (n*sxy - sx*sy)/d
11+
12+
data = defaultdict(list)
13+
for line in sys.stdin:
14+
line = line.strip()
15+
if not line or line.startswith('#'): continue
16+
p = line.split(',')
17+
if len(p) == 3: data[p[0]].append((int(p[1]), float(p[2])))
18+
19+
res = {}
20+
for algo, m in data.items():
21+
if len(m) >= 2:
22+
C, D = linreg([1.0/n for n,_ in m], [t/n for n,t in m])
23+
res[algo] = (C, D)
24+
25+
scale = 100.0 / res['PIPPENGER_4'][0] if 'PIPPENGER_4' in res else 1.0
26+
27+
print("static const struct secp256k1_ecmult_multi_abcd secp256k1_ecmult_multi_abcds[SECP256K1_ECMULT_MULTI_NUM_ALGOS] = {")
28+
print(" {0, 0, 1000, 0 },")
29+
if 'STRAUSS' in res:
30+
C, D = res['STRAUSS']
31+
Cs, Ds = max(1,int(C*scale)), max(0,int(D*scale)) if D>0 else 0
32+
Cstr = f"{Cs},"
33+
print(f" {{SECP256K1_STRAUSS_POINT_SIZE, 0, {Cstr:<6} {Ds:<5}}},")
34+
for i in range(1, 13):
35+
if f'PIPPENGER_{i}' in res:
36+
C, D = res[f'PIPPENGER_{i}']
37+
Cs, Ds = max(1,int(C*scale)), max(0,int(D*scale)) if D>0 else 0
38+
ps = f"SECP256K1_PIPPENGER_POINT_SIZE({i}),"
39+
fs = f"SECP256K1_PIPPENGER_FIXED_SIZE({i}),"
40+
Cstr = f"{Cs},"
41+
print(f" {{{ps:<35} {fs:<35} {Cstr:<6} {Ds:<5}}},")
42+
print("};")

0 commit comments

Comments
 (0)