Skip to content

Commit b3f4ebd

Browse files
authored
[ec]: batchAffine does not use alloca anymore (#579)
* [ec]: batchAffine does not use alloca anymore * [ec]: remove [Alloca] effect and noinline pragma
1 parent d2df757 commit b3f4ebd

File tree

2 files changed

+83
-28
lines changed

2 files changed

+83
-28
lines changed

constantine/math/elliptic/ec_shortweierstrass_batch_ops.nim

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import
2929
func batchAffine*[F, G](
3030
affs: ptr UncheckedArray[EC_ShortW_Aff[F, G]],
3131
projs: ptr UncheckedArray[EC_ShortW_Prj[F, G]],
32-
N: int) {.noInline, tags:[Alloca], meter.} =
32+
N: int) {.meter.} =
3333
# Algorithm: Montgomery's batch inversion
3434
# - Speeding the Pollard and Elliptic Curve Methods of Factorization
3535
# Section 10.3.1
@@ -41,17 +41,22 @@ func batchAffine*[F, G](
4141
# https://members.loria.fr/PZimmermann/mca/mca-cup-0.5.9.pdf
4242

4343
# To avoid temporaries, we store partial accumulations
44-
# in affs[i].x
45-
let zeroes = allocStackArray(SecretBool, N)
44+
# in affs[i].x and affs[i].y will store if the input was 0
45+
template zero(i: int): SecretBool =
46+
when F is Fp:
47+
SecretBool affs[i].y.mres.limbs[0]
48+
else:
49+
SecretBool affs[i].y.coords[0].mres.limbs[0]
50+
4651
affs[0].x = projs[0].z
47-
zeroes[0] = affs[0].x.isZero()
48-
affs[0].x.csetOne(zeroes[0])
52+
zero(0) = affs[0].x.isZero()
53+
affs[0].x.csetOne(zero(0))
4954

5055
for i in 1 ..< N:
5156
# Skip zero z-coordinates (infinity points)
5257
var z = projs[i].z
53-
zeroes[i] = z.isZero()
54-
z.csetOne(zeroes[i])
58+
zero(i) = z.isZero()
59+
z.csetOne(zero(i))
5560

5661
if i != N-1:
5762
affs[i].x.prod(affs[i-1].x, z, lazyReduce = true)
@@ -63,21 +68,21 @@ func batchAffine*[F, G](
6368

6469
for i in countdown(N-1, 1):
6570
# Extract 1/Pᵢ
66-
var invi {.noInit.}: F
71+
var invi {.noInit.}, invi_next {.noInit.}: F
6772
invi.prod(accInv, affs[i-1].x, lazyReduce = true)
68-
invi.csetZero(zeroes[i])
73+
invi.csetZero(zero(i))
74+
75+
# next iteration (zero and affs[i].y are aliasing)
76+
invi_next = projs[i].z
77+
invi_next.csetOne(zero(i))
78+
accInv.prod(accInv, invi_next, lazyReduce = true)
6979

7080
# Now convert Pᵢ to affine
7181
affs[i].x.prod(projs[i].x, invi)
7282
affs[i].y.prod(projs[i].y, invi)
7383

74-
# next iteration
75-
invi = projs[i].z
76-
invi.csetOne(zeroes[i])
77-
accInv.prod(accInv, invi, lazyReduce = true)
78-
7984
block: # tail
80-
accInv.csetZero(zeroes[0])
85+
accInv.csetZero(zero(0))
8186
affs[0].x.prod(projs[0].x, accInv)
8287
affs[0].y.prod(projs[0].y, accInv)
8388

@@ -89,7 +94,7 @@ func batchAffine*[N: static int, F, G](
8994
func batchAffine*[F, G](
9095
affs: ptr UncheckedArray[EC_ShortW_Aff[F, G]],
9196
jacs: ptr UncheckedArray[EC_ShortW_Jac[F, G]],
92-
N: int) {.noInline, tags:[Alloca], meter.} =
97+
N: int) {.meter.} =
9398
# Algorithm: Montgomery's batch inversion
9499
# - Speeding the Pollard and Elliptic Curve Methods of Factorization
95100
# Section 10.3.1
@@ -102,16 +107,21 @@ func batchAffine*[F, G](
102107

103108
# To avoid temporaries, we store partial accumulations
104109
# in affs[i].x and whether z == 0 in affs[i].y
105-
var zeroes = allocStackArray(SecretBool, N)
110+
template zero(i: int): SecretBool =
111+
when F is Fp:
112+
SecretBool affs[i].y.mres.limbs[0]
113+
else:
114+
SecretBool affs[i].y.coords[0].mres.limbs[0]
115+
106116
affs[0].x = jacs[0].z
107-
zeroes[0] = affs[0].x.isZero()
108-
affs[0].x.csetOne(zeroes[0])
117+
zero(0) = affs[0].x.isZero()
118+
affs[0].x.csetOne(zero(0))
109119

110120
for i in 1 ..< N:
111121
# Skip zero z-coordinates (infinity points)
112122
var z = jacs[i].z
113-
zeroes[i] = z.isZero()
114-
z.csetOne(zeroes[i])
123+
zero(i) = z.isZero()
124+
z.csetOne(zero(i))
115125

116126
if i != N-1:
117127
affs[i].x.prod(affs[i-1].x, z, lazyReduce = true)
@@ -123,9 +133,14 @@ func batchAffine*[F, G](
123133

124134
for i in countdown(N-1, 1):
125135
# Extract 1/Pᵢ
126-
var invi {.noInit.}: F
136+
var invi {.noInit.}, invi_next {.noInit.}: F
127137
invi.prod(accInv, affs[i-1].x, lazyReduce = true)
128-
invi.csetZero(zeroes[i])
138+
invi.csetZero(zero(i))
139+
140+
# next iteration (zero and affs[i].y are aliasing)
141+
invi_next = jacs[i].z
142+
invi_next.csetOne(zero(i))
143+
accInv.prod(accInv, invi_next, lazyReduce = true)
129144

130145
# Now convert Pᵢ to affine
131146
var invi2 {.noinit.}: F
@@ -134,14 +149,10 @@ func batchAffine*[F, G](
134149
invi.prod(invi, invi2, lazyReduce = true)
135150
affs[i].y.prod(jacs[i].y, invi)
136151

137-
# next iteration
138-
invi = jacs[i].z
139-
invi.csetOne(zeroes[i])
140-
accInv.prod(accInv, invi, lazyReduce = true)
141152

142153
block: # tail
143154
var invi2 {.noinit.}: F
144-
accInv.csetZero(zeroes[0])
155+
accInv.csetZero(zero(0))
145156
invi2.square(accInv, lazyReduce = true)
146157
affs[0].x.prod(jacs[0].x, invi2)
147158
accInv.prod(accInv, invi2, lazyReduce = true)

tests/math_elliptic_curves/t_ec_template.nim

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,6 +1169,50 @@ proc run_EC_affine_conversion*(
11691169
test(ec, gen = HighHammingWeight)
11701170
test(ec, gen = Long01Sequence)
11711171

1172+
test "EC " & $ec.G & " batchAffine with infinite points":
1173+
proc test(EC: typedesc, gen: RandomGen) =
1174+
const batchSize = 10
1175+
for _ in 0 ..< Iters:
1176+
var Ps: array[batchSize, EC]
1177+
for i in 0 ..< batchSize:
1178+
if rng.sample_unsafe([0, 1, 2]) != 0: # 33% chance of infinite point
1179+
Ps[i] = rng.random_point(EC, randZ = true, gen)
1180+
else:
1181+
Ps[i].setNeutral()
1182+
1183+
var Qs, Rs: array[batchSize, affine(EC)]
1184+
for i in 0 ..< batchSize:
1185+
Qs[i].affine(Ps[i])
1186+
Rs.batchAffine(Ps)
1187+
1188+
for i in countdown(batchSize-1, 0):
1189+
doAssert bool(Qs[i] == Rs[i]), block:
1190+
var s: string
1191+
s &= "Mismatch on iteration " & $i
1192+
s &= "\nFailing batch for " & $EC & " (" & $WordBitWidth & "-bit)"
1193+
s &= "\n ["
1194+
for i in 0 ..< batchSize:
1195+
s &= "\n" & Ps[i].toHex(indent = 4)
1196+
if i != batchSize-1: s &= ","
1197+
s &= "\n ]"
1198+
s &= "\nFailing inversions for " & $EC & " (" & $WordBitWidth & "-bit)"
1199+
s &= "\n ["
1200+
for i in 0 ..< batchSize:
1201+
s &= "\n" & Rs[i].toHex(indent = 4)
1202+
if i != batchSize-1: s &= ","
1203+
s &= "\n ]"
1204+
s &= "\nExpected inversions for " & $EC & " (" & $WordBitWidth & "-bit)"
1205+
s &= "\n ["
1206+
for i in 0 ..< batchSize:
1207+
s &= "\n" & Qs[i].toHex(indent = 4)
1208+
if i != batchSize-1: s &= ","
1209+
s &= "\n ]"
1210+
s
1211+
1212+
test(ec, gen = Uniform)
1213+
test(ec, gen = HighHammingWeight)
1214+
test(ec, gen = Long01Sequence)
1215+
11721216
proc run_EC_conversion_failures*(
11731217
moduleName: string
11741218
) =

0 commit comments

Comments
 (0)