@@ -38,6 +38,9 @@ static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
38
38
#endif
39
39
40
40
static void secp256k1_fe_impl_get_bounds (secp256k1_fe * r , int m ) {
41
+
42
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
43
+
41
44
r -> n [0 ] = 0x3FFFFFFUL * 2 * m ;
42
45
r -> n [1 ] = 0x3FFFFFFUL * 2 * m ;
43
46
r -> n [2 ] = 0x3FFFFFFUL * 2 * m ;
@@ -263,6 +266,9 @@ SECP256K1_INLINE static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a) {
263
266
264
267
SECP256K1_INLINE static int secp256k1_fe_impl_is_zero (const secp256k1_fe * a ) {
265
268
const uint32_t * t = a -> n ;
269
+
270
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
271
+
266
272
return (t [0 ] | t [1 ] | t [2 ] | t [3 ] | t [4 ] | t [5 ] | t [6 ] | t [7 ] | t [8 ] | t [9 ]) == 0 ;
267
273
}
268
274
@@ -272,18 +278,20 @@ SECP256K1_INLINE static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a) {
272
278
273
279
static int secp256k1_fe_impl_cmp_var (const secp256k1_fe * a , const secp256k1_fe * b ) {
274
280
int i ;
281
+ int diff ;
275
282
for (i = 9 ; i >= 0 ; i -- ) {
276
- if (a -> n [i ] > b -> n [i ]) {
277
- return 1 ;
278
- }
279
- if (a -> n [i ] < b -> n [i ]) {
280
- return -1 ;
283
+ diff = (a -> n [i ] > b -> n [i ]) - (a -> n [i ] < b -> n [i ]);
284
+ if (diff != 0 ) {
285
+ return diff ;
281
286
}
282
287
}
283
288
return 0 ;
284
289
}
285
290
286
291
static void secp256k1_fe_impl_set_b32_mod (secp256k1_fe * r , const unsigned char * a ) {
292
+
293
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
294
+
287
295
r -> n [0 ] = (uint32_t )a [31 ] | ((uint32_t )a [30 ] << 8 ) | ((uint32_t )a [29 ] << 16 ) | ((uint32_t )(a [28 ] & 0x3 ) << 24 );
288
296
r -> n [1 ] = (uint32_t )((a [28 ] >> 2 ) & 0x3f ) | ((uint32_t )a [27 ] << 6 ) | ((uint32_t )a [26 ] << 14 ) | ((uint32_t )(a [25 ] & 0xf ) << 22 );
289
297
r -> n [2 ] = (uint32_t )((a [25 ] >> 4 ) & 0xf ) | ((uint32_t )a [24 ] << 4 ) | ((uint32_t )a [23 ] << 12 ) | ((uint32_t )(a [22 ] & 0x3f ) << 20 );
@@ -303,6 +311,9 @@ static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char
303
311
304
312
/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
305
313
static void secp256k1_fe_impl_get_b32 (unsigned char * r , const secp256k1_fe * a ) {
314
+
315
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
316
+
306
317
r [0 ] = (a -> n [9 ] >> 14 ) & 0xff ;
307
318
r [1 ] = (a -> n [9 ] >> 6 ) & 0xff ;
308
319
r [2 ] = ((a -> n [9 ] & 0x3F ) << 2 ) | ((a -> n [8 ] >> 24 ) & 0x3 );
@@ -346,6 +357,9 @@ SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r,
346
357
347
358
/* Due to the properties above, the left hand in the subtractions below is never less than
348
359
* the right hand. */
360
+
361
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
362
+
349
363
r -> n [0 ] = 0x3FFFC2FUL * 2 * (m + 1 ) - a -> n [0 ];
350
364
r -> n [1 ] = 0x3FFFFBFUL * 2 * (m + 1 ) - a -> n [1 ];
351
365
r -> n [2 ] = 0x3FFFFFFUL * 2 * (m + 1 ) - a -> n [2 ];
@@ -359,6 +373,8 @@ SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r,
359
373
}
360
374
361
375
SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked (secp256k1_fe * r , int a ) {
376
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
377
+
362
378
r -> n [0 ] *= a ;
363
379
r -> n [1 ] *= a ;
364
380
r -> n [2 ] *= a ;
@@ -372,6 +388,8 @@ SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r
372
388
}
373
389
374
390
SECP256K1_INLINE static void secp256k1_fe_impl_add (secp256k1_fe * r , const secp256k1_fe * a ) {
391
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
392
+
375
393
r -> n [0 ] += a -> n [0 ];
376
394
r -> n [1 ] += a -> n [1 ];
377
395
r -> n [2 ] += a -> n [2 ];
@@ -1017,6 +1035,10 @@ SECP256K1_INLINE static void secp256k1_fe_impl_cmov(secp256k1_fe *r, const secp2
1017
1035
SECP256K1_CHECKMEM_CHECK_VERIFY (r -> n , sizeof (r -> n ));
1018
1036
mask0 = vflag + ~((uint32_t )0 );
1019
1037
mask1 = ~mask0 ;
1038
+
1039
+
1040
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
1041
+
1020
1042
r -> n [0 ] = (r -> n [0 ] & mask0 ) | (a -> n [0 ] & mask1 );
1021
1043
r -> n [1 ] = (r -> n [1 ] & mask0 ) | (a -> n [1 ] & mask1 );
1022
1044
r -> n [2 ] = (r -> n [2 ] & mask0 ) | (a -> n [2 ] & mask1 );
@@ -1065,6 +1087,8 @@ static SECP256K1_INLINE void secp256k1_fe_impl_half(secp256k1_fe *r) {
1065
1087
* t9 <= D * (m + 1/2)
1066
1088
*/
1067
1089
1090
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
1091
+
1068
1092
r -> n [0 ] = (t0 >> 1 ) + ((t1 & one ) << 25 );
1069
1093
r -> n [1 ] = (t1 >> 1 ) + ((t2 & one ) << 25 );
1070
1094
r -> n [2 ] = (t2 >> 1 ) + ((t3 & one ) << 25 );
@@ -1100,6 +1124,9 @@ static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r,
1100
1124
SECP256K1_CHECKMEM_CHECK_VERIFY (r -> n , sizeof (r -> n ));
1101
1125
mask0 = vflag + ~((uint32_t )0 );
1102
1126
mask1 = ~mask0 ;
1127
+
1128
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
1129
+
1103
1130
r -> n [0 ] = (r -> n [0 ] & mask0 ) | (a -> n [0 ] & mask1 );
1104
1131
r -> n [1 ] = (r -> n [1 ] & mask0 ) | (a -> n [1 ] & mask1 );
1105
1132
r -> n [2 ] = (r -> n [2 ] & mask0 ) | (a -> n [2 ] & mask1 );
@@ -1111,6 +1138,9 @@ static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r,
1111
1138
}
1112
1139
1113
1140
static void secp256k1_fe_impl_to_storage (secp256k1_fe_storage * r , const secp256k1_fe * a ) {
1141
+
1142
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
1143
+
1114
1144
r -> n [0 ] = a -> n [0 ] | a -> n [1 ] << 26 ;
1115
1145
r -> n [1 ] = a -> n [1 ] >> 6 | a -> n [2 ] << 20 ;
1116
1146
r -> n [2 ] = a -> n [2 ] >> 12 | a -> n [3 ] << 14 ;
@@ -1123,6 +1153,9 @@ static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k
1123
1153
1124
1154
static SECP256K1_INLINE void secp256k1_fe_impl_from_storage (secp256k1_fe * r , const secp256k1_fe_storage * a ) {
1125
1155
r -> n [0 ] = a -> n [0 ] & 0x3FFFFFFUL ;
1156
+
1157
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
1158
+
1126
1159
r -> n [1 ] = a -> n [0 ] >> 26 | ((a -> n [1 ] << 6 ) & 0x3FFFFFFUL );
1127
1160
r -> n [2 ] = a -> n [1 ] >> 20 | ((a -> n [2 ] << 12 ) & 0x3FFFFFFUL );
1128
1161
r -> n [3 ] = a -> n [2 ] >> 14 | ((a -> n [3 ] << 18 ) & 0x3FFFFFFUL );
@@ -1152,6 +1185,8 @@ static void secp256k1_fe_from_signed30(secp256k1_fe *r, const secp256k1_modinv32
1152
1185
VERIFY_CHECK (a7 >> 30 == 0 );
1153
1186
VERIFY_CHECK (a8 >> 16 == 0 );
1154
1187
1188
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
1189
+
1155
1190
r -> n [0 ] = a0 & M26 ;
1156
1191
r -> n [1 ] = (a0 >> 26 | a1 << 4 ) & M26 ;
1157
1192
r -> n [2 ] = (a1 >> 22 | a2 << 8 ) & M26 ;
@@ -1169,6 +1204,8 @@ static void secp256k1_fe_to_signed30(secp256k1_modinv32_signed30 *r, const secp2
1169
1204
const uint64_t a0 = a -> n [0 ], a1 = a -> n [1 ], a2 = a -> n [2 ], a3 = a -> n [3 ], a4 = a -> n [4 ],
1170
1205
a5 = a -> n [5 ], a6 = a -> n [6 ], a7 = a -> n [7 ], a8 = a -> n [8 ], a9 = a -> n [9 ];
1171
1206
1207
+ /* TODO: parallelize, SSE2 (32bit cpu only) */
1208
+
1172
1209
r -> v [0 ] = (a0 | a1 << 26 ) & M30 ;
1173
1210
r -> v [1 ] = (a1 >> 4 | a2 << 22 ) & M30 ;
1174
1211
r -> v [2 ] = (a2 >> 8 | a3 << 18 ) & M30 ;
0 commit comments