Skip to content

Commit c2c9bc2

Browse files
committed
WIP: Tooling for algo selection abcd calibration
1 parent dd3eaa7 commit c2c9bc2

File tree

2 files changed

+170
-17
lines changed

2 files changed

+170
-17
lines changed

src/bench_ecmult.c

Lines changed: 128 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "scalar_impl.h"
1717
#include "ecmult_impl.h"
1818
#include "bench.h"
19+
#include "tests_common.h"
1920

2021
#define POINTS 32768
2122

@@ -69,6 +70,110 @@ typedef struct {
6970
secp256k1_fe* output_xonly;
7071
} bench_data;
7172

73+
/*
74+
* ABCD Calibration Benchmarks
75+
*
76+
* Measures the performance of each algorithm at various batch sizes and
77+
* outputs. Use tools/ecmult_multi_calib.py to calculate optimal C and D
78+
* values from the output.
79+
*/
80+
static void run_ecmult_multi_calib(bench_data* data) {
81+
static const size_t batch_sizes[] = {10, 20, 30, 50, 75, 100, 150, 200, 300, 500, 750, 1000, 1500, 2000, 3000, 5000, 7500, 10000, 15000, 20000, 30000};
82+
static const size_t n_batch_sizes = sizeof(batch_sizes) / sizeof(batch_sizes[0]);
83+
84+
static const char* algo_names[] = {
85+
"TRIVIAL", "STRAUSS", "PIPPENGER_1", "PIPPENGER_2", "PIPPENGER_3", "PIPPENGER_4", "PIPPENGER_5", "PIPPENGER_6", "PIPPENGER_7", "PIPPENGER_8", "PIPPENGER_9", "PIPPENGER_10", "PIPPENGER_11", "PIPPENGER_12"
86+
};
87+
88+
secp256k1_ge *points = NULL;
89+
secp256k1_scalar *scalars = NULL;
90+
secp256k1_gej result;
91+
size_t max_points = batch_sizes[n_batch_sizes - 1];
92+
int algo;
93+
size_t i, j;
94+
int base_iters = 1000;
95+
96+
points = (secp256k1_ge *)malloc(max_points * sizeof(secp256k1_ge));
97+
scalars = (secp256k1_scalar *)malloc(max_points * sizeof(secp256k1_scalar));
98+
CHECK(points != NULL);
99+
CHECK(scalars != NULL);
100+
101+
for (i = 0; i < max_points; i++) {
102+
points[i] = data->pubkeys[i % POINTS];
103+
scalars[i] = data->scalars[i % POINTS];
104+
}
105+
106+
printf("# ECMULT_MULTI Calibration Data\n");
107+
printf("# Format: ALGO,N,TIME_US (microseconds per batch)\n");
108+
printf("# Copy the DATA section below into the Python script\n");
109+
printf("#\n");
110+
printf("# BEGIN DATA\n");
111+
112+
/* Measure STRAUSS */
113+
algo = SECP256K1_ECMULT_MULTI_ALGO_STRAUSS;
114+
for (i = 0; i < n_batch_sizes; i++) {
115+
size_t n = batch_sizes[i];
116+
int64_t t_start, t_end;
117+
double time_us;
118+
int iters = base_iters;
119+
int iter;
120+
121+
/* Fewer iterations for large batches to keep runtime managable */
122+
if (n >= 1000) iters = base_iters / 10;
123+
if (n >= 5000) iters = base_iters / 50;
124+
if (n >= 15000) iters = base_iters / 100;
125+
if (iters < 3) iters = 3;
126+
127+
t_start = gettime_i64();
128+
for (iter = 0; iter < iters; iter++) {
129+
secp256k1_ecmult_multi_internal(&data->ctx->error_callback, algo,
130+
&result, n, points, scalars, NULL);
131+
}
132+
t_end = gettime_i64();
133+
134+
time_us = (double)(t_end - t_start) / iters;
135+
printf("%s,%lu,%.3f\n", algo_names[algo], (unsigned long)n, time_us);
136+
}
137+
138+
/* Measure PIPPENGER variants */
139+
for (algo = SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_1;
140+
algo <= SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_12;
141+
algo++) {
142+
for (j = 0; j < n_batch_sizes; j++) {
143+
size_t n = batch_sizes[j];
144+
int64_t t_start, t_end;
145+
double time_us;
146+
int iters = base_iters;
147+
int iter;
148+
149+
/* Fewer iterations for large batches to keep runtime managable */
150+
if (n >= 1000) iters = base_iters / 10;
151+
if (n >= 5000) iters = base_iters / 50;
152+
if (n >= 15000) iters = base_iters / 100;
153+
if (iters < 3) iters = 3;
154+
155+
t_start = gettime_i64();
156+
for (iter = 0; iter < iters; iter++) {
157+
secp256k1_ecmult_multi_internal(&data->ctx->error_callback, algo,
158+
&result, n, points, scalars, NULL);
159+
}
160+
t_end = gettime_i64();
161+
162+
time_us = (double)(t_end - t_start) / iters;
163+
printf("%s,%lu,%.3f\n", algo_names[algo], (unsigned long)n, time_us);
164+
}
165+
}
166+
167+
printf("# END DATA\n");
168+
printf("#\n");
169+
printf("# To calculate ABCD constants, run:\n");
170+
printf("# ./bench_ecmult calib 2>&1 | python3 tools/ecmult_multi_calib.py\n");
171+
printf("#\n");
172+
173+
free(points);
174+
free(scalars);
175+
}
176+
72177
/* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */
73178
static void hash_into_offset(bench_data* data, size_t x) {
74179
data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS;
@@ -338,6 +443,7 @@ static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_
338443
int main(int argc, char **argv) {
339444
bench_data data;
340445
int i, p;
446+
int run_calib = 0;
341447

342448
int default_iters = 10000;
343449
int iters = get_iters(default_iters);
@@ -364,6 +470,8 @@ int main(int argc, char **argv) {
364470
} else if(have_flag(argc, argv, "auto")) {
365471
printf("Using automatic algorithm selection:\n");
366472
data.forced_algo = BENCH_ALGO_AUTO;
473+
} else if(have_flag(argc, argv, "calib")) {
474+
run_calib = 1;
367475
} else {
368476
fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]);
369477
help(argv, default_iters);
@@ -394,27 +502,30 @@ int main(int argc, char **argv) {
394502
}
395503
secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS);
396504

505+
if (run_calib) {
506+
run_ecmult_multi_calib(&data);
507+
} else {
508+
print_output_table_header_row();
509+
/* Initialize offset1 and offset2 */
510+
hash_into_offset(&data, 0);
511+
run_ecmult_bench(&data, iters);
397512

398-
print_output_table_header_row();
399-
/* Initialize offset1 and offset2 */
400-
hash_into_offset(&data, 0);
401-
run_ecmult_bench(&data, iters);
402-
403-
for (i = 1; i <= 8; ++i) {
404-
run_ecmult_multi_bench(&data, i, 1, iters);
405-
}
513+
for (i = 1; i <= 8; ++i) {
514+
run_ecmult_multi_bench(&data, i, 1, iters);
515+
}
406516

407-
/* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
408-
* and the higher it goes the longer the computation takes(more points)
409-
* So we don't run this benchmark with low iterations to prevent slow down */
410-
if (iters > 2) {
411-
for (p = 0; p <= 11; ++p) {
412-
for (i = 9; i <= 16; ++i) {
413-
run_ecmult_multi_bench(&data, i << p, 1, iters);
517+
/* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
518+
* and the higher it goes the longer the computation takes(more points)
519+
* So we don't run this benchmark with low iterations to prevent slow down */
520+
if (iters > 2) {
521+
for (p = 0; p <= 11; ++p) {
522+
for (i = 9; i <= 16; ++i) {
523+
run_ecmult_multi_bench(&data, i << p, 1, iters);
524+
}
414525
}
526+
} else {
527+
printf("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n");
415528
}
416-
} else {
417-
printf("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n");
418529
}
419530

420531
secp256k1_context_destroy(data.ctx);

tools/ecmult_multi_calib.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
from collections import defaultdict
4+
5+
def linreg(x, y):
6+
n, sx, sy = len(x), sum(x), sum(y)
7+
sxy, sx2 = sum(a*b for a,b in zip(x,y)), sum(a*a for a in x)
8+
d = n*sx2 - sx*sx
9+
if abs(d) < 1e-10: return sy/n, 0
10+
return (sy - ((n*sxy - sx*sy)/d)*sx)/n, (n*sxy - sx*sy)/d
11+
12+
data = defaultdict(list)
13+
for line in sys.stdin:
14+
line = line.strip()
15+
if not line or line.startswith('#'): continue
16+
p = line.split(',')
17+
if len(p) == 3: data[p[0]].append((int(p[1]), float(p[2])))
18+
19+
res = {}
20+
for algo, m in data.items():
21+
if len(m) >= 2:
22+
C, D = linreg([1.0/n for n,_ in m], [t/n for n,t in m])
23+
res[algo] = (C, D)
24+
25+
scale = 100.0 / res['PIPPENGER_4'][0] if 'PIPPENGER_4' in res else 1.0
26+
27+
print("static const struct secp256k1_ecmult_multi_abcd secp256k1_ecmult_multi_abcds[SECP256K1_ECMULT_MULTI_NUM_ALGOS] = {")
28+
print(" {0, 0, 1000, 0 },")
29+
if 'STRAUSS' in res:
30+
C, D = res['STRAUSS']
31+
Cs, Ds = max(1,int(C*scale)), max(0,int(D*scale)) if D>0 else 0
32+
Cstr = f"{Cs},"
33+
print(f" {{SECP256K1_STRAUSS_POINT_SIZE, 0, {Cstr:<6} {Ds:<5}}},")
34+
for i in range(1, 13):
35+
if f'PIPPENGER_{i}' in res:
36+
C, D = res[f'PIPPENGER_{i}']
37+
Cs, Ds = max(1,int(C*scale)), max(0,int(D*scale)) if D>0 else 0
38+
ps = f"SECP256K1_PIPPENGER_POINT_SIZE({i}),"
39+
fs = f"SECP256K1_PIPPENGER_FIXED_SIZE({i}),"
40+
Cstr = f"{Cs},"
41+
print(f" {{{ps:<35} {fs:<35} {Cstr:<6} {Ds:<5}}},")
42+
print("};")

0 commit comments

Comments
 (0)