Skip to content

Commit f3733c5

Browse files
committed
Merge #797: Fix Jacobi benchmarks and other benchmark improvements
cb5524a Add benchmark for secp256k1_ge_set_gej_var (Pieter Wuille) 5c6af60 Make jacobi benchmarks vary inputs (Pieter Wuille) d0fdd5f Randomize the Z coordinates in bench_internal (Pieter Wuille) c7a3424 Rename bench_internal variables (Pieter Wuille) Pull request description: ACKs for top commit: real-or-random: ACK cb5524a jonasnick: ACK cb5524a Tree-SHA512: 0cbcfffebebf563cf9a1bd951394a0419503ffd43a2d0df4c99e4a839c89c8454925314f7e7eee0c01bce94b6dfeab935f36cc27f9bfc878f702313d455db7e1
2 parents 875d68b + cb5524a commit f3733c5

File tree

1 file changed

+113
-60
lines changed

1 file changed

+113
-60
lines changed

src/bench_internal.c

Lines changed: 113 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -20,49 +20,72 @@
2020
#include "secp256k1.c"
2121

2222
typedef struct {
23-
secp256k1_scalar scalar_x, scalar_y;
24-
secp256k1_fe fe_x, fe_y;
25-
secp256k1_ge ge_x, ge_y;
26-
secp256k1_gej gej_x, gej_y;
23+
secp256k1_scalar scalar[2];
24+
secp256k1_fe fe[4];
25+
secp256k1_ge ge[2];
26+
secp256k1_gej gej[2];
2727
unsigned char data[64];
2828
int wnaf[256];
2929
} bench_inv;
3030

3131
void bench_setup(void* arg) {
3232
bench_inv *data = (bench_inv*)arg;
3333

34-
static const unsigned char init_x[32] = {
35-
0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
36-
0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
37-
0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
38-
0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
34+
static const unsigned char init[4][32] = {
35+
/* Initializer for scalar[0], fe[0], first half of data, the X coordinate of ge[0],
36+
and the (implied affine) X coordinate of gej[0]. */
37+
{
38+
0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
39+
0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
40+
0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
41+
0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
42+
},
43+
/* Initializer for scalar[1], fe[1], first half of data, the X coordinate of ge[1],
44+
and the (implied affine) X coordinate of gej[1]. */
45+
{
46+
0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83,
47+
0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5,
48+
0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9,
49+
0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3
50+
},
51+
/* Initializer for fe[2] and the Z coordinate of gej[0]. */
52+
{
53+
0x3d, 0x2d, 0xef, 0xf4, 0x25, 0x98, 0x4f, 0x5d,
54+
0xe2, 0xca, 0x5f, 0x41, 0x3f, 0x3f, 0xce, 0x44,
55+
0xaa, 0x2c, 0x53, 0x8a, 0xc6, 0x59, 0x1f, 0x38,
56+
0x38, 0x23, 0xe4, 0x11, 0x27, 0xc6, 0xa0, 0xe7
57+
},
58+
/* Initializer for fe[3] and the Z coordinate of gej[1]. */
59+
{
60+
0xbd, 0x21, 0xa5, 0xe1, 0x13, 0x50, 0x73, 0x2e,
61+
0x52, 0x98, 0xc8, 0x9e, 0xab, 0x00, 0xa2, 0x68,
62+
0x43, 0xf5, 0xd7, 0x49, 0x80, 0x72, 0xa7, 0xf3,
63+
0xd7, 0x60, 0xe6, 0xab, 0x90, 0x92, 0xdf, 0xc5
64+
}
3965
};
4066

41-
static const unsigned char init_y[32] = {
42-
0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83,
43-
0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5,
44-
0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9,
45-
0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3
46-
};
47-
48-
secp256k1_scalar_set_b32(&data->scalar_x, init_x, NULL);
49-
secp256k1_scalar_set_b32(&data->scalar_y, init_y, NULL);
50-
secp256k1_fe_set_b32(&data->fe_x, init_x);
51-
secp256k1_fe_set_b32(&data->fe_y, init_y);
52-
CHECK(secp256k1_ge_set_xo_var(&data->ge_x, &data->fe_x, 0));
53-
CHECK(secp256k1_ge_set_xo_var(&data->ge_y, &data->fe_y, 1));
54-
secp256k1_gej_set_ge(&data->gej_x, &data->ge_x);
55-
secp256k1_gej_set_ge(&data->gej_y, &data->ge_y);
56-
memcpy(data->data, init_x, 32);
57-
memcpy(data->data + 32, init_y, 32);
67+
secp256k1_scalar_set_b32(&data->scalar[0], init[0], NULL);
68+
secp256k1_scalar_set_b32(&data->scalar[1], init[1], NULL);
69+
secp256k1_fe_set_b32(&data->fe[0], init[0]);
70+
secp256k1_fe_set_b32(&data->fe[1], init[1]);
71+
secp256k1_fe_set_b32(&data->fe[2], init[2]);
72+
secp256k1_fe_set_b32(&data->fe[3], init[3]);
73+
CHECK(secp256k1_ge_set_xo_var(&data->ge[0], &data->fe[0], 0));
74+
CHECK(secp256k1_ge_set_xo_var(&data->ge[1], &data->fe[1], 1));
75+
secp256k1_gej_set_ge(&data->gej[0], &data->ge[0]);
76+
secp256k1_gej_rescale(&data->gej[0], &data->fe[2]);
77+
secp256k1_gej_set_ge(&data->gej[1], &data->ge[1]);
78+
secp256k1_gej_rescale(&data->gej[1], &data->fe[3]);
79+
memcpy(data->data, init[0], 32);
80+
memcpy(data->data + 32, init[1], 32);
5881
}
5982

6083
void bench_scalar_add(void* arg, int iters) {
6184
int i, j = 0;
6285
bench_inv *data = (bench_inv*)arg;
6386

6487
for (i = 0; i < iters; i++) {
65-
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
88+
j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
6689
}
6790
CHECK(j <= iters);
6891
}
@@ -72,7 +95,7 @@ void bench_scalar_negate(void* arg, int iters) {
7295
bench_inv *data = (bench_inv*)arg;
7396

7497
for (i = 0; i < iters; i++) {
75-
secp256k1_scalar_negate(&data->scalar_x, &data->scalar_x);
98+
secp256k1_scalar_negate(&data->scalar[0], &data->scalar[0]);
7699
}
77100
}
78101

@@ -81,7 +104,7 @@ void bench_scalar_sqr(void* arg, int iters) {
81104
bench_inv *data = (bench_inv*)arg;
82105

83106
for (i = 0; i < iters; i++) {
84-
secp256k1_scalar_sqr(&data->scalar_x, &data->scalar_x);
107+
secp256k1_scalar_sqr(&data->scalar[0], &data->scalar[0]);
85108
}
86109
}
87110

@@ -90,7 +113,7 @@ void bench_scalar_mul(void* arg, int iters) {
90113
bench_inv *data = (bench_inv*)arg;
91114

92115
for (i = 0; i < iters; i++) {
93-
secp256k1_scalar_mul(&data->scalar_x, &data->scalar_x, &data->scalar_y);
116+
secp256k1_scalar_mul(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
94117
}
95118
}
96119

@@ -100,8 +123,8 @@ void bench_scalar_split(void* arg, int iters) {
100123
bench_inv *data = (bench_inv*)arg;
101124

102125
for (i = 0; i < iters; i++) {
103-
secp256k1_scalar_split_lambda(&data->scalar_x, &data->scalar_y, &data->scalar_x);
104-
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
126+
secp256k1_scalar_split_lambda(&data->scalar[0], &data->scalar[1], &data->scalar[0]);
127+
j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
105128
}
106129
CHECK(j <= iters);
107130
}
@@ -112,8 +135,8 @@ void bench_scalar_inverse(void* arg, int iters) {
112135
bench_inv *data = (bench_inv*)arg;
113136

114137
for (i = 0; i < iters; i++) {
115-
secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x);
116-
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
138+
secp256k1_scalar_inverse(&data->scalar[0], &data->scalar[0]);
139+
j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
117140
}
118141
CHECK(j <= iters);
119142
}
@@ -123,8 +146,8 @@ void bench_scalar_inverse_var(void* arg, int iters) {
123146
bench_inv *data = (bench_inv*)arg;
124147

125148
for (i = 0; i < iters; i++) {
126-
secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x);
127-
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
149+
secp256k1_scalar_inverse_var(&data->scalar[0], &data->scalar[0]);
150+
j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
128151
}
129152
CHECK(j <= iters);
130153
}
@@ -134,7 +157,7 @@ void bench_field_normalize(void* arg, int iters) {
134157
bench_inv *data = (bench_inv*)arg;
135158

136159
for (i = 0; i < iters; i++) {
137-
secp256k1_fe_normalize(&data->fe_x);
160+
secp256k1_fe_normalize(&data->fe[0]);
138161
}
139162
}
140163

@@ -143,7 +166,7 @@ void bench_field_normalize_weak(void* arg, int iters) {
143166
bench_inv *data = (bench_inv*)arg;
144167

145168
for (i = 0; i < iters; i++) {
146-
secp256k1_fe_normalize_weak(&data->fe_x);
169+
secp256k1_fe_normalize_weak(&data->fe[0]);
147170
}
148171
}
149172

@@ -152,7 +175,7 @@ void bench_field_mul(void* arg, int iters) {
152175
bench_inv *data = (bench_inv*)arg;
153176

154177
for (i = 0; i < iters; i++) {
155-
secp256k1_fe_mul(&data->fe_x, &data->fe_x, &data->fe_y);
178+
secp256k1_fe_mul(&data->fe[0], &data->fe[0], &data->fe[1]);
156179
}
157180
}
158181

@@ -161,7 +184,7 @@ void bench_field_sqr(void* arg, int iters) {
161184
bench_inv *data = (bench_inv*)arg;
162185

163186
for (i = 0; i < iters; i++) {
164-
secp256k1_fe_sqr(&data->fe_x, &data->fe_x);
187+
secp256k1_fe_sqr(&data->fe[0], &data->fe[0]);
165188
}
166189
}
167190

@@ -170,8 +193,8 @@ void bench_field_inverse(void* arg, int iters) {
170193
bench_inv *data = (bench_inv*)arg;
171194

172195
for (i = 0; i < iters; i++) {
173-
secp256k1_fe_inv(&data->fe_x, &data->fe_x);
174-
secp256k1_fe_add(&data->fe_x, &data->fe_y);
196+
secp256k1_fe_inv(&data->fe[0], &data->fe[0]);
197+
secp256k1_fe_add(&data->fe[0], &data->fe[1]);
175198
}
176199
}
177200

@@ -180,8 +203,8 @@ void bench_field_inverse_var(void* arg, int iters) {
180203
bench_inv *data = (bench_inv*)arg;
181204

182205
for (i = 0; i < iters; i++) {
183-
secp256k1_fe_inv_var(&data->fe_x, &data->fe_x);
184-
secp256k1_fe_add(&data->fe_x, &data->fe_y);
206+
secp256k1_fe_inv_var(&data->fe[0], &data->fe[0]);
207+
secp256k1_fe_add(&data->fe[0], &data->fe[1]);
185208
}
186209
}
187210

@@ -191,9 +214,9 @@ void bench_field_sqrt(void* arg, int iters) {
191214
secp256k1_fe t;
192215

193216
for (i = 0; i < iters; i++) {
194-
t = data->fe_x;
195-
j += secp256k1_fe_sqrt(&data->fe_x, &t);
196-
secp256k1_fe_add(&data->fe_x, &data->fe_y);
217+
t = data->fe[0];
218+
j += secp256k1_fe_sqrt(&data->fe[0], &t);
219+
secp256k1_fe_add(&data->fe[0], &data->fe[1]);
197220
}
198221
CHECK(j <= iters);
199222
}
@@ -203,7 +226,7 @@ void bench_group_double_var(void* arg, int iters) {
203226
bench_inv *data = (bench_inv*)arg;
204227

205228
for (i = 0; i < iters; i++) {
206-
secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL);
229+
secp256k1_gej_double_var(&data->gej[0], &data->gej[0], NULL);
207230
}
208231
}
209232

@@ -212,7 +235,7 @@ void bench_group_add_var(void* arg, int iters) {
212235
bench_inv *data = (bench_inv*)arg;
213236

214237
for (i = 0; i < iters; i++) {
215-
secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL);
238+
secp256k1_gej_add_var(&data->gej[0], &data->gej[0], &data->gej[1], NULL);
216239
}
217240
}
218241

@@ -221,7 +244,7 @@ void bench_group_add_affine(void* arg, int iters) {
221244
bench_inv *data = (bench_inv*)arg;
222245

223246
for (i = 0; i < iters; i++) {
224-
secp256k1_gej_add_ge(&data->gej_x, &data->gej_x, &data->ge_y);
247+
secp256k1_gej_add_ge(&data->gej[0], &data->gej[0], &data->ge[1]);
225248
}
226249
}
227250

@@ -230,7 +253,7 @@ void bench_group_add_affine_var(void* arg, int iters) {
230253
bench_inv *data = (bench_inv*)arg;
231254

232255
for (i = 0; i < iters; i++) {
233-
secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL);
256+
secp256k1_gej_add_ge_var(&data->gej[0], &data->gej[0], &data->ge[1], NULL);
234257
}
235258
}
236259

@@ -239,18 +262,46 @@ void bench_group_jacobi_var(void* arg, int iters) {
239262
bench_inv *data = (bench_inv*)arg;
240263

241264
for (i = 0; i < iters; i++) {
242-
j += secp256k1_gej_has_quad_y_var(&data->gej_x);
265+
j += secp256k1_gej_has_quad_y_var(&data->gej[0]);
266+
/* Vary the Y and Z coordinates of the input (the X coordinate doesn't matter to
267+
secp256k1_gej_has_quad_y_var). Note that the resulting coordinates will
268+
generally not correspond to a point on the curve, but this is not a problem
269+
for the code being benchmarked here. Adding and normalizing have less
270+
overhead than EC operations (which could guarantee the point remains on the
271+
curve). */
272+
secp256k1_fe_add(&data->gej[0].y, &data->fe[1]);
273+
secp256k1_fe_add(&data->gej[0].z, &data->fe[2]);
274+
secp256k1_fe_normalize_var(&data->gej[0].y);
275+
secp256k1_fe_normalize_var(&data->gej[0].z);
276+
}
277+
CHECK(j <= iters);
278+
}
279+
280+
void bench_group_to_affine_var(void* arg, int iters) {
281+
int i;
282+
bench_inv *data = (bench_inv*)arg;
283+
284+
for (i = 0; i < iters; ++i) {
285+
secp256k1_ge_set_gej_var(&data->ge[1], &data->gej[0]);
286+
/* Use the output affine X/Y coordinates to vary the input X/Y/Z coordinates.
287+
Similar to bench_group_jacobi_var, this approach does not result in
288+
coordinates of points on the curve. */
289+
secp256k1_fe_add(&data->gej[0].x, &data->ge[1].y);
290+
secp256k1_fe_add(&data->gej[0].y, &data->fe[2]);
291+
secp256k1_fe_add(&data->gej[0].z, &data->ge[1].x);
292+
secp256k1_fe_normalize_var(&data->gej[0].x);
293+
secp256k1_fe_normalize_var(&data->gej[0].y);
294+
secp256k1_fe_normalize_var(&data->gej[0].z);
243295
}
244-
CHECK(j == iters);
245296
}
246297

247298
void bench_ecmult_wnaf(void* arg, int iters) {
248299
int i, bits = 0, overflow = 0;
249300
bench_inv *data = (bench_inv*)arg;
250301

251302
for (i = 0; i < iters; i++) {
252-
bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A);
253-
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
303+
bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar[0], WINDOW_A);
304+
overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
254305
}
255306
CHECK(overflow >= 0);
256307
CHECK(bits <= 256*iters);
@@ -261,8 +312,8 @@ void bench_wnaf_const(void* arg, int iters) {
261312
bench_inv *data = (bench_inv*)arg;
262313

263314
for (i = 0; i < iters; i++) {
264-
bits += secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A, 256);
265-
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
315+
bits += secp256k1_wnaf_const(data->wnaf, &data->scalar[0], WINDOW_A, 256);
316+
overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
266317
}
267318
CHECK(overflow >= 0);
268319
CHECK(bits <= 256*iters);
@@ -324,14 +375,15 @@ void bench_context_sign(void* arg, int iters) {
324375
void bench_num_jacobi(void* arg, int iters) {
325376
int i, j = 0;
326377
bench_inv *data = (bench_inv*)arg;
327-
secp256k1_num nx, norder;
378+
secp256k1_num nx, na, norder;
328379

329-
secp256k1_scalar_get_num(&nx, &data->scalar_x);
380+
secp256k1_scalar_get_num(&nx, &data->scalar[0]);
330381
secp256k1_scalar_order_get_num(&norder);
331-
secp256k1_scalar_get_num(&norder, &data->scalar_y);
382+
secp256k1_scalar_get_num(&na, &data->scalar[1]);
332383

333384
for (i = 0; i < iters; i++) {
334385
j += secp256k1_num_jacobi(&nx, &norder);
386+
secp256k1_num_add(&nx, &nx, &na);
335387
}
336388
CHECK(j <= iters);
337389
}
@@ -364,6 +416,7 @@ int main(int argc, char **argv) {
364416
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10);
365417
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10);
366418
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "jacobi")) run_benchmark("group_jacobi_var", bench_group_jacobi_var, bench_setup, NULL, &data, 10, iters);
419+
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters);
367420

368421
if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters);
369422
if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, iters);

0 commit comments

Comments
 (0)