1818
1919#define POINTS 32768
2020
21+ void help (char * * argv ) {
22+ printf ("Benchmark EC multiplication algorithms\n" );
23+ printf ("\n" );
24+ printf ("Usage: %s <help|pippenger_wnaf|strauss_wnaf|simple>\n" , argv [0 ]);
25+ printf ("The output shows the number of multiplied and summed points right after the\n" );
26+ printf ("function name. The letter 'g' indicates that one of the points is the generator.\n" );
27+ printf ("The benchmarks are divided by the number of points.\n" );
28+ printf ("\n" );
29+ printf ("default (ecmult_multi): picks pippenger_wnaf or strauss_wnaf depending on the\n" );
30+ printf (" batch size\n" );
31+ printf ("pippenger_wnaf: for all batch sizes\n" );
32+ printf ("strauss_wnaf: for all batch sizes\n" );
33+ printf ("simple: multiply and sum each point individually\n" );
34+ }
35+
2136typedef struct {
2237 /* Setup once in advance */
2338 secp256k1_context * ctx ;
2439 secp256k1_scratch_space * scratch ;
2540 secp256k1_scalar * scalars ;
2641 secp256k1_ge * pubkeys ;
42+ secp256k1_gej * pubkeys_gej ;
2743 secp256k1_scalar * seckeys ;
2844 secp256k1_gej * expected_output ;
2945 secp256k1_ecmult_multi_func ecmult_multi ;
3046
31- /* Changes per test */
47+ /* Changes per benchmark */
3248 size_t count ;
3349 int includes_g ;
3450
35- /* Changes per test iteration */
51+ /* Changes per benchmark iteration, used to pick different scalars and pubkeys
52+ * in each run. */
3653 size_t offset1 ;
3754 size_t offset2 ;
3855
39- /* Test output. */
56+ /* Benchmark output. */
4057 secp256k1_gej * output ;
4158} bench_data ;
4259
43- static int bench_callback (secp256k1_scalar * sc , secp256k1_ge * ge , size_t idx , void * arg ) {
60+ /* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */
61+ static void hash_into_offset (bench_data * data , size_t x ) {
62+ data -> offset1 = (x * 0x537b7f6f + 0x8f66a481 ) % POINTS ;
63+ data -> offset2 = (x * 0x7f6f537b + 0x6a1a8f49 ) % POINTS ;
64+ }
65+
66+ /* Check correctness of the benchmark by computing
67+ * sum(outputs) ?= (sum(scalars_gen) + sum(seckeys)*sum(scalars))*G */
68+ static void bench_ecmult_teardown_helper (bench_data * data , size_t * seckey_offset , size_t * scalar_offset , size_t * scalar_gen_offset , int iters ) {
69+ int i ;
70+ secp256k1_gej sum_output , tmp ;
71+ secp256k1_scalar sum_scalars ;
72+
73+ secp256k1_gej_set_infinity (& sum_output );
74+ secp256k1_scalar_clear (& sum_scalars );
75+ for (i = 0 ; i < iters ; ++ i ) {
76+ secp256k1_gej_add_var (& sum_output , & sum_output , & data -> output [i ], NULL );
77+ if (scalar_gen_offset != NULL ) {
78+ secp256k1_scalar_add (& sum_scalars , & sum_scalars , & data -> scalars [(* scalar_gen_offset + i ) % POINTS ]);
79+ }
80+ if (seckey_offset != NULL ) {
81+ secp256k1_scalar s = data -> seckeys [(* seckey_offset + i ) % POINTS ];
82+ secp256k1_scalar_mul (& s , & s , & data -> scalars [(* scalar_offset + i ) % POINTS ]);
83+ secp256k1_scalar_add (& sum_scalars , & sum_scalars , & s );
84+ }
85+ }
86+ secp256k1_ecmult_gen (& data -> ctx -> ecmult_gen_ctx , & tmp , & sum_scalars );
87+ secp256k1_gej_neg (& tmp , & tmp );
88+ secp256k1_gej_add_var (& tmp , & tmp , & sum_output , NULL );
89+ CHECK (secp256k1_gej_is_infinity (& tmp ));
90+ }
91+
92+ static void bench_ecmult_setup (void * arg ) {
93+ bench_data * data = (bench_data * )arg ;
94+ /* Re-randomize offset to ensure that we're using different scalars and
95+ * group elements in each run. */
96+ hash_into_offset (data , data -> offset1 );
97+ }
98+
99+ static void bench_ecmult_gen (void * arg , int iters ) {
100+ bench_data * data = (bench_data * )arg ;
101+ int i ;
102+
103+ for (i = 0 ; i < iters ; ++ i ) {
104+ secp256k1_ecmult_gen (& data -> ctx -> ecmult_gen_ctx , & data -> output [i ], & data -> scalars [(data -> offset1 + i ) % POINTS ]);
105+ }
106+ }
107+
108+ static void bench_ecmult_gen_teardown (void * arg , int iters ) {
109+ bench_data * data = (bench_data * )arg ;
110+ bench_ecmult_teardown_helper (data , NULL , NULL , & data -> offset1 , iters );
111+ }
112+
113+ static void bench_ecmult_const (void * arg , int iters ) {
114+ bench_data * data = (bench_data * )arg ;
115+ int i ;
116+
117+ for (i = 0 ; i < iters ; ++ i ) {
118+ secp256k1_ecmult_const (& data -> output [i ], & data -> pubkeys [(data -> offset1 + i ) % POINTS ], & data -> scalars [(data -> offset2 + i ) % POINTS ], 256 );
119+ }
120+ }
121+
122+ static void bench_ecmult_const_teardown (void * arg , int iters ) {
123+ bench_data * data = (bench_data * )arg ;
124+ bench_ecmult_teardown_helper (data , & data -> offset1 , & data -> offset2 , NULL , iters );
125+ }
126+
127+ static void bench_ecmult_1 (void * arg , int iters ) {
128+ bench_data * data = (bench_data * )arg ;
129+ int i ;
130+
131+ for (i = 0 ; i < iters ; ++ i ) {
132+ secp256k1_ecmult (& data -> ctx -> ecmult_ctx , & data -> output [i ], & data -> pubkeys_gej [(data -> offset1 + i ) % POINTS ], & data -> scalars [(data -> offset2 + i ) % POINTS ], NULL );
133+ }
134+ }
135+
136+ static void bench_ecmult_1_teardown (void * arg , int iters ) {
137+ bench_data * data = (bench_data * )arg ;
138+ bench_ecmult_teardown_helper (data , & data -> offset1 , & data -> offset2 , NULL , iters );
139+ }
140+
141+ static void bench_ecmult_1g (void * arg , int iters ) {
142+ bench_data * data = (bench_data * )arg ;
143+ secp256k1_scalar zero ;
144+ int i ;
145+
146+ secp256k1_scalar_set_int (& zero , 0 );
147+ for (i = 0 ; i < iters ; ++ i ) {
148+ secp256k1_ecmult (& data -> ctx -> ecmult_ctx , & data -> output [i ], NULL , & zero , & data -> scalars [(data -> offset1 + i ) % POINTS ]);
149+ }
150+ }
151+
152+ static void bench_ecmult_1g_teardown (void * arg , int iters ) {
153+ bench_data * data = (bench_data * )arg ;
154+ bench_ecmult_teardown_helper (data , NULL , NULL , & data -> offset1 , iters );
155+ }
156+
157+ static void bench_ecmult_2g (void * arg , int iters ) {
158+ bench_data * data = (bench_data * )arg ;
159+ int i ;
160+
161+ for (i = 0 ; i < iters /2 ; ++ i ) {
162+ secp256k1_ecmult (& data -> ctx -> ecmult_ctx , & data -> output [i ], & data -> pubkeys_gej [(data -> offset1 + i ) % POINTS ], & data -> scalars [(data -> offset2 + i ) % POINTS ], & data -> scalars [(data -> offset1 + i ) % POINTS ]);
163+ }
164+ }
165+
166+ static void bench_ecmult_2g_teardown (void * arg , int iters ) {
167+ bench_data * data = (bench_data * )arg ;
168+ bench_ecmult_teardown_helper (data , & data -> offset1 , & data -> offset2 , & data -> offset1 , iters /2 );
169+ }
170+
171+ static void run_ecmult_bench (bench_data * data , int iters ) {
172+ char str [32 ];
173+ sprintf (str , "ecmult_gen" );
174+ run_benchmark (str , bench_ecmult_gen , bench_ecmult_setup , bench_ecmult_gen_teardown , data , 10 , iters );
175+ sprintf (str , "ecmult_const" );
176+ run_benchmark (str , bench_ecmult_const , bench_ecmult_setup , bench_ecmult_const_teardown , data , 10 , iters );
177+ /* ecmult with non generator point */
178+ sprintf (str , "ecmult 1" );
179+ run_benchmark (str , bench_ecmult_1 , bench_ecmult_setup , bench_ecmult_1_teardown , data , 10 , iters );
180+ /* ecmult with generator point */
181+ sprintf (str , "ecmult 1g" );
182+ run_benchmark (str , bench_ecmult_1g , bench_ecmult_setup , bench_ecmult_1g_teardown , data , 10 , iters );
183+ /* ecmult with generator and non-generator point. The reported time is per point. */
184+ sprintf (str , "ecmult 2g" );
185+ run_benchmark (str , bench_ecmult_2g , bench_ecmult_setup , bench_ecmult_2g_teardown , data , 10 , 2 * iters );
186+ }
187+
188+ static int bench_ecmult_multi_callback (secp256k1_scalar * sc , secp256k1_ge * ge , size_t idx , void * arg ) {
44189 bench_data * data = (bench_data * )arg ;
45190 if (data -> includes_g ) ++ idx ;
46191 if (idx == 0 ) {
@@ -53,7 +198,7 @@ static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, vo
53198 return 1 ;
54199}
55200
56- static void bench_ecmult (void * arg , int iters ) {
201+ static void bench_ecmult_multi (void * arg , int iters ) {
57202 bench_data * data = (bench_data * )arg ;
58203
59204 int includes_g = data -> includes_g ;
@@ -62,19 +207,18 @@ static void bench_ecmult(void* arg, int iters) {
62207 iters = iters / data -> count ;
63208
64209 for (iter = 0 ; iter < iters ; ++ iter ) {
65- data -> ecmult_multi (& data -> ctx -> error_callback , & data -> ctx -> ecmult_ctx , data -> scratch , & data -> output [iter ], data -> includes_g ? & data -> scalars [data -> offset1 ] : NULL , bench_callback , arg , count - includes_g );
210+ data -> ecmult_multi (& data -> ctx -> error_callback , & data -> ctx -> ecmult_ctx , data -> scratch , & data -> output [iter ], data -> includes_g ? & data -> scalars [data -> offset1 ] : NULL , bench_ecmult_multi_callback , arg , count - includes_g );
66211 data -> offset1 = (data -> offset1 + count ) % POINTS ;
67212 data -> offset2 = (data -> offset2 + count - 1 ) % POINTS ;
68213 }
69214}
70215
71- static void bench_ecmult_setup (void * arg ) {
216+ static void bench_ecmult_multi_setup (void * arg ) {
72217 bench_data * data = (bench_data * )arg ;
73- data -> offset1 = (data -> count * 0x537b7f6f + 0x8f66a481 ) % POINTS ;
74- data -> offset2 = (data -> count * 0x7f6f537b + 0x6a1a8f49 ) % POINTS ;
218+ hash_into_offset (data , data -> count );
75219}
76220
77- static void bench_ecmult_teardown (void * arg , int iters ) {
221+ static void bench_ecmult_multi_teardown (void * arg , int iters ) {
78222 bench_data * data = (bench_data * )arg ;
79223 int iter ;
80224 iters = iters / data -> count ;
@@ -88,7 +232,7 @@ static void bench_ecmult_teardown(void* arg, int iters) {
88232
89233static void generate_scalar (uint32_t num , secp256k1_scalar * scalar ) {
90234 secp256k1_sha256 sha256 ;
91- unsigned char c [11 ] = {'e' , 'c' , 'm' , 'u' , 'l' , 't' , 0 , 0 , 0 , 0 };
235+ unsigned char c [10 ] = {'e' , 'c' , 'm' , 'u' , 'l' , 't' , 0 , 0 , 0 , 0 };
92236 unsigned char buf [32 ];
93237 int overflow = 0 ;
94238 c [6 ] = num ;
@@ -102,7 +246,7 @@ static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
102246 CHECK (!overflow );
103247}
104248
105- static void run_test (bench_data * data , size_t count , int includes_g , int num_iters ) {
249+ static void run_ecmult_multi_bench (bench_data * data , size_t count , int includes_g , int num_iters ) {
106250 char str [32 ];
107251 static const secp256k1_scalar zero = SECP256K1_SCALAR_CONST (0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 );
108252 size_t iters = 1 + num_iters / count ;
@@ -112,8 +256,7 @@ static void run_test(bench_data* data, size_t count, int includes_g, int num_ite
112256 data -> includes_g = includes_g ;
113257
114258 /* Compute (the negation of) the expected results directly. */
115- data -> offset1 = (data -> count * 0x537b7f6f + 0x8f66a481 ) % POINTS ;
116- data -> offset2 = (data -> count * 0x7f6f537b + 0x6a1a8f49 ) % POINTS ;
259+ hash_into_offset (data , data -> count );
117260 for (iter = 0 ; iter < iters ; ++ iter ) {
118261 secp256k1_scalar tmp ;
119262 secp256k1_scalar total = data -> scalars [(data -> offset1 ++ ) % POINTS ];
@@ -127,65 +270,75 @@ static void run_test(bench_data* data, size_t count, int includes_g, int num_ite
127270 }
128271
129272 /* Run the benchmark. */
130- sprintf (str , includes_g ? "ecmult_ %ig" : "ecmult_ %i" , (int )count );
131- run_benchmark (str , bench_ecmult , bench_ecmult_setup , bench_ecmult_teardown , data , 10 , count * iters );
273+ sprintf (str , includes_g ? "ecmult_multi %ig" : "ecmult_multi %i" , (int )count );
274+ run_benchmark (str , bench_ecmult_multi , bench_ecmult_multi_setup , bench_ecmult_multi_teardown , data , 10 , count * iters );
132275}
133276
134277int main (int argc , char * * argv ) {
135278 bench_data data ;
136279 int i , p ;
137- secp256k1_gej * pubkeys_gej ;
138280 size_t scratch_size ;
139281
140282 int iters = get_iters (10000 );
141283
142- data .ctx = secp256k1_context_create (SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY );
143- scratch_size = secp256k1_strauss_scratch_size (POINTS ) + STRAUSS_SCRATCH_OBJECTS * 16 ;
144- data .scratch = secp256k1_scratch_space_create (data .ctx , scratch_size );
145284 data .ecmult_multi = secp256k1_ecmult_multi_var ;
146285
147286 if (argc > 1 ) {
148- if (have_flag (argc , argv , "pippenger_wnaf" )) {
287+ if (have_flag (argc , argv , "-h" )
288+ || have_flag (argc , argv , "--help" )
289+ || have_flag (argc , argv , "help" )) {
290+ help (argv );
291+ return 1 ;
292+ } else if (have_flag (argc , argv , "pippenger_wnaf" )) {
149293 printf ("Using pippenger_wnaf:\n" );
150294 data .ecmult_multi = secp256k1_ecmult_pippenger_batch_single ;
151295 } else if (have_flag (argc , argv , "strauss_wnaf" )) {
152296 printf ("Using strauss_wnaf:\n" );
153297 data .ecmult_multi = secp256k1_ecmult_strauss_batch_single ;
154298 } else if (have_flag (argc , argv , "simple" )) {
155299 printf ("Using simple algorithm:\n" );
156- data .ecmult_multi = secp256k1_ecmult_multi_var ;
157- secp256k1_scratch_space_destroy (data .ctx , data .scratch );
158- data .scratch = NULL ;
159300 } else {
160- fprintf (stderr , "%s: unrecognized argument '%s'.\n" , argv [0 ], argv [1 ]);
161- fprintf ( stderr , "Use 'pippenger_wnaf', 'strauss_wnaf', 'simple' or no argument to benchmark a combined algorithm.\n" );
301+ fprintf (stderr , "%s: unrecognized argument '%s'.\n\n " , argv [0 ], argv [1 ]);
302+ help ( argv );
162303 return 1 ;
163304 }
164305 }
165306
307+ data .ctx = secp256k1_context_create (SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY );
308+ scratch_size = secp256k1_strauss_scratch_size (POINTS ) + STRAUSS_SCRATCH_OBJECTS * 16 ;
309+ if (!have_flag (argc , argv , "simple" )) {
310+ data .scratch = secp256k1_scratch_space_create (data .ctx , scratch_size );
311+ } else {
312+ data .scratch = NULL ;
313+ }
314+
166315 /* Allocate stuff */
167316 data .scalars = malloc (sizeof (secp256k1_scalar ) * POINTS );
168317 data .seckeys = malloc (sizeof (secp256k1_scalar ) * POINTS );
169318 data .pubkeys = malloc (sizeof (secp256k1_ge ) * POINTS );
319+ data .pubkeys_gej = malloc (sizeof (secp256k1_gej ) * POINTS );
170320 data .expected_output = malloc (sizeof (secp256k1_gej ) * (iters + 1 ));
171321 data .output = malloc (sizeof (secp256k1_gej ) * (iters + 1 ));
172322
173323 /* Generate a set of scalars, and private/public keypairs. */
174- pubkeys_gej = malloc (sizeof (secp256k1_gej ) * POINTS );
175- secp256k1_gej_set_ge (& pubkeys_gej [0 ], & secp256k1_ge_const_g );
324+ secp256k1_gej_set_ge (& data .pubkeys_gej [0 ], & secp256k1_ge_const_g );
176325 secp256k1_scalar_set_int (& data .seckeys [0 ], 1 );
177326 for (i = 0 ; i < POINTS ; ++ i ) {
178327 generate_scalar (i , & data .scalars [i ]);
179328 if (i ) {
180- secp256k1_gej_double_var (& pubkeys_gej [i ], & pubkeys_gej [i - 1 ], NULL );
329+ secp256k1_gej_double_var (& data . pubkeys_gej [i ], & data . pubkeys_gej [i - 1 ], NULL );
181330 secp256k1_scalar_add (& data .seckeys [i ], & data .seckeys [i - 1 ], & data .seckeys [i - 1 ]);
182331 }
183332 }
184- secp256k1_ge_set_all_gej_var (data .pubkeys , pubkeys_gej , POINTS );
185- free (pubkeys_gej );
333+ secp256k1_ge_set_all_gej_var (data .pubkeys , data .pubkeys_gej , POINTS );
334+
335+
336+ /* Initialize offset1 and offset2 */
337+ hash_into_offset (& data , 0 );
338+ run_ecmult_bench (& data , iters );
186339
187340 for (i = 1 ; i <= 8 ; ++ i ) {
188- run_test (& data , i , 1 , iters );
341+ run_ecmult_multi_bench (& data , i , 1 , iters );
189342 }
190343
191344 /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
@@ -194,7 +347,7 @@ int main(int argc, char **argv) {
194347 if (iters > 2 ) {
195348 for (p = 0 ; p <= 11 ; ++ p ) {
196349 for (i = 9 ; i <= 16 ; ++ i ) {
197- run_test (& data , i << p , 1 , iters );
350+ run_ecmult_multi_bench (& data , i << p , 1 , iters );
198351 }
199352 }
200353 }
@@ -205,6 +358,7 @@ int main(int argc, char **argv) {
205358 secp256k1_context_destroy (data .ctx );
206359 free (data .scalars );
207360 free (data .pubkeys );
361+ free (data .pubkeys_gej );
208362 free (data .seckeys );
209363 free (data .output );
210364 free (data .expected_output );
0 commit comments