1616#include "scalar_impl.h"
1717#include "ecmult_impl.h"
1818#include "bench.h"
19+ #include "tests_common.h"
1920
2021#define POINTS 32768
2122
@@ -69,6 +70,110 @@ typedef struct {
6970 secp256k1_fe * output_xonly ;
7071} bench_data ;
7172
73+ /*
74+ * ABCD Calibration Benchmarks
75+ *
76+ * Measures the performance of each algorithm at various batch sizes and
77+ * outputs. Use tools/ecmult_multi_calib.py to calculate optimal C and D
78+ * values from the output.
79+ */
80+ static void run_ecmult_multi_calib (bench_data * data ) {
81+ static const size_t batch_sizes [] = {10 , 20 , 30 , 50 , 75 , 100 , 150 , 200 , 300 , 500 , 750 , 1000 , 1500 , 2000 , 3000 , 5000 , 7500 , 10000 , 15000 , 20000 , 30000 };
82+ static const size_t n_batch_sizes = sizeof (batch_sizes ) / sizeof (batch_sizes [0 ]);
83+
84+ static const char * algo_names [] = {
85+ "TRIVIAL" , "STRAUSS" , "PIPPENGER_1" , "PIPPENGER_2" , "PIPPENGER_3" , "PIPPENGER_4" , "PIPPENGER_5" , "PIPPENGER_6" , "PIPPENGER_7" , "PIPPENGER_8" , "PIPPENGER_9" , "PIPPENGER_10" , "PIPPENGER_11" , "PIPPENGER_12"
86+ };
87+
88+ secp256k1_ge * points = NULL ;
89+ secp256k1_scalar * scalars = NULL ;
90+ secp256k1_gej result ;
91+ size_t max_points = batch_sizes [n_batch_sizes - 1 ];
92+ int algo ;
93+ size_t i , j ;
94+ int base_iters = 1000 ;
95+
96+ points = (secp256k1_ge * )malloc (max_points * sizeof (secp256k1_ge ));
97+ scalars = (secp256k1_scalar * )malloc (max_points * sizeof (secp256k1_scalar ));
98+ CHECK (points != NULL );
99+ CHECK (scalars != NULL );
100+
101+ for (i = 0 ; i < max_points ; i ++ ) {
102+ points [i ] = data -> pubkeys [i % POINTS ];
103+ scalars [i ] = data -> scalars [i % POINTS ];
104+ }
105+
106+ printf ("# ECMULT_MULTI Calibration Data\n" );
107+ printf ("# Format: ALGO,N,TIME_US (microseconds per batch)\n" );
108+ printf ("# Copy the DATA section below into the Python script\n" );
109+ printf ("#\n" );
110+ printf ("# BEGIN DATA\n" );
111+
112+ /* Measure STRAUSS */
113+ algo = SECP256K1_ECMULT_MULTI_ALGO_STRAUSS ;
114+ for (i = 0 ; i < n_batch_sizes ; i ++ ) {
115+ size_t n = batch_sizes [i ];
116+ int64_t t_start , t_end ;
117+ double time_us ;
118+ int iters = base_iters ;
119+ int iter ;
120+
121+ /* Fewer iterations for large batches to keep runtime managable */
122+ if (n >= 1000 ) iters = base_iters / 10 ;
123+ if (n >= 5000 ) iters = base_iters / 50 ;
124+ if (n >= 15000 ) iters = base_iters / 100 ;
125+ if (iters < 3 ) iters = 3 ;
126+
127+ t_start = gettime_i64 ();
128+ for (iter = 0 ; iter < iters ; iter ++ ) {
129+ secp256k1_ecmult_multi_internal (& data -> ctx -> error_callback , algo ,
130+ & result , n , points , scalars , NULL );
131+ }
132+ t_end = gettime_i64 ();
133+
134+ time_us = (double )(t_end - t_start ) / iters ;
135+ printf ("%s,%lu,%.3f\n" , algo_names [algo ], (unsigned long )n , time_us );
136+ }
137+
138+ /* Measure PIPPENGER variants */
139+ for (algo = SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_1 ;
140+ algo <= SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_12 ;
141+ algo ++ ) {
142+ for (j = 0 ; j < n_batch_sizes ; j ++ ) {
143+ size_t n = batch_sizes [j ];
144+ int64_t t_start , t_end ;
145+ double time_us ;
146+ int iters = base_iters ;
147+ int iter ;
148+
149+ /* Fewer iterations for large batches to keep runtime managable */
150+ if (n >= 1000 ) iters = base_iters / 10 ;
151+ if (n >= 5000 ) iters = base_iters / 50 ;
152+ if (n >= 15000 ) iters = base_iters / 100 ;
153+ if (iters < 3 ) iters = 3 ;
154+
155+ t_start = gettime_i64 ();
156+ for (iter = 0 ; iter < iters ; iter ++ ) {
157+ secp256k1_ecmult_multi_internal (& data -> ctx -> error_callback , algo ,
158+ & result , n , points , scalars , NULL );
159+ }
160+ t_end = gettime_i64 ();
161+
162+ time_us = (double )(t_end - t_start ) / iters ;
163+ printf ("%s,%lu,%.3f\n" , algo_names [algo ], (unsigned long )n , time_us );
164+ }
165+ }
166+
167+ printf ("# END DATA\n" );
168+ printf ("#\n" );
169+ printf ("# To calculate ABCD constants, run:\n" );
170+ printf ("# ./bench_ecmult calib 2>&1 | python3 tools/ecmult_multi_calib.py\n" );
171+ printf ("#\n" );
172+
173+ free (points );
174+ free (scalars );
175+ }
176+
72177/* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */
73178static void hash_into_offset (bench_data * data , size_t x ) {
74179 data -> offset1 = (x * 0x537b7f6f + 0x8f66a481 ) % POINTS ;
@@ -338,6 +443,7 @@ static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_
338443int main (int argc , char * * argv ) {
339444 bench_data data ;
340445 int i , p ;
446+ int run_calib = 0 ;
341447
342448 int default_iters = 10000 ;
343449 int iters = get_iters (default_iters );
@@ -364,6 +470,8 @@ int main(int argc, char **argv) {
364470 } else if (have_flag (argc , argv , "auto" )) {
365471 printf ("Using automatic algorithm selection:\n" );
366472 data .forced_algo = BENCH_ALGO_AUTO ;
473+ } else if (have_flag (argc , argv , "calib" )) {
474+ run_calib = 1 ;
367475 } else {
368476 fprintf (stderr , "%s: unrecognized argument '%s'.\n\n" , argv [0 ], argv [1 ]);
369477 help (argv , default_iters );
@@ -394,27 +502,30 @@ int main(int argc, char **argv) {
394502 }
395503 secp256k1_ge_set_all_gej_var (data .pubkeys , data .pubkeys_gej , POINTS );
396504
505+ if (run_calib ) {
506+ run_ecmult_multi_calib (& data );
507+ } else {
508+ print_output_table_header_row ();
509+ /* Initialize offset1 and offset2 */
510+ hash_into_offset (& data , 0 );
511+ run_ecmult_bench (& data , iters );
397512
398- print_output_table_header_row ();
399- /* Initialize offset1 and offset2 */
400- hash_into_offset (& data , 0 );
401- run_ecmult_bench (& data , iters );
402-
403- for (i = 1 ; i <= 8 ; ++ i ) {
404- run_ecmult_multi_bench (& data , i , 1 , iters );
405- }
513+ for (i = 1 ; i <= 8 ; ++ i ) {
514+ run_ecmult_multi_bench (& data , i , 1 , iters );
515+ }
406516
407- /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
408- * and the higher it goes the longer the computation takes(more points)
409- * So we don't run this benchmark with low iterations to prevent slow down */
410- if (iters > 2 ) {
411- for (p = 0 ; p <= 11 ; ++ p ) {
412- for (i = 9 ; i <= 16 ; ++ i ) {
413- run_ecmult_multi_bench (& data , i << p , 1 , iters );
517+ /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
518+ * and the higher it goes the longer the computation takes(more points)
519+ * So we don't run this benchmark with low iterations to prevent slow down */
520+ if (iters > 2 ) {
521+ for (p = 0 ; p <= 11 ; ++ p ) {
522+ for (i = 9 ; i <= 16 ; ++ i ) {
523+ run_ecmult_multi_bench (& data , i << p , 1 , iters );
524+ }
414525 }
526+ } else {
527+ printf ("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n" );
415528 }
416- } else {
417- printf ("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n" );
418529 }
419530
420531 secp256k1_context_destroy (data .ctx );
0 commit comments