2929func batchAffine * [F, G](
3030 affs: ptr UncheckedArray [EC_ShortW_Aff[F, G]],
3131 projs: ptr UncheckedArray [EC_ShortW_Prj[F, G]],
32- N: int ) {.noInline , tags :[ Alloca ], meter .} =
32+ N: int ) {.meter .} =
3333 # Algorithm: Montgomery's batch inversion
3434 # - Speeding the Pollard and Elliptic Curve Methods of Factorization
3535 # Section 10.3.1
@@ -41,17 +41,22 @@ func batchAffine*[F, G](
4141 # https://members.loria.fr/PZimmermann/mca/mca-cup-0.5.9.pdf
4242
4343 # To avoid temporaries, we store partial accumulations
44- # in affs[i].x
45- let zeroes = allocStackArray (SecretBool , N)
44+ # in affs[i].x and affs[i].y will store if the input was 0
45+ template zero (i: int ): SecretBool =
46+ when F is Fp :
47+ SecretBool affs[i].y.mres.limbs[0 ]
48+ else :
49+ SecretBool affs[i].y.coords[0 ].mres.limbs[0 ]
50+
4651 affs[0 ].x = projs[0 ].z
47- zeroes[ 0 ] = affs[0 ].x.isZero ()
48- affs[0 ].x.csetOne (zeroes[ 0 ] )
52+ zero ( 0 ) = affs[0 ].x.isZero ()
53+ affs[0 ].x.csetOne (zero ( 0 ) )
4954
5055 for i in 1 ..< N:
5156 # Skip zero z-coordinates (infinity points)
5257 var z = projs[i].z
53- zeroes[i] = z.isZero ()
54- z.csetOne (zeroes[i] )
58+ zero (i) = z.isZero ()
59+ z.csetOne (zero (i) )
5560
5661 if i != N- 1 :
5762 affs[i].x.prod (affs[i- 1 ].x, z, lazyReduce = true )
@@ -63,21 +68,21 @@ func batchAffine*[F, G](
6368
6469 for i in countdown (N- 1 , 1 ):
6570 # Extract 1/Pᵢ
66- var invi {.noInit .}: F
71+ var invi {.noInit .}, invi_next {. noInit .} : F
6772 invi.prod (accInv, affs[i- 1 ].x, lazyReduce = true )
68- invi.csetZero (zeroes[i])
73+ invi.csetZero (zero (i))
74+
75+ # next iteration (zero and affs[i].y are aliasing)
76+ invi_next = projs[i].z
77+ invi_next.csetOne (zero (i))
78+ accInv.prod (accInv, invi_next, lazyReduce = true )
6979
7080 # Now convert Pᵢ to affine
7181 affs[i].x.prod (projs[i].x, invi)
7282 affs[i].y.prod (projs[i].y, invi)
7383
74- # next iteration
75- invi = projs[i].z
76- invi.csetOne (zeroes[i])
77- accInv.prod (accInv, invi, lazyReduce = true )
78-
7984 block : # tail
80- accInv.csetZero (zeroes[ 0 ] )
85+ accInv.csetZero (zero ( 0 ) )
8186 affs[0 ].x.prod (projs[0 ].x, accInv)
8287 affs[0 ].y.prod (projs[0 ].y, accInv)
8388
@@ -89,7 +94,7 @@ func batchAffine*[N: static int, F, G](
8994func batchAffine * [F, G](
9095 affs: ptr UncheckedArray [EC_ShortW_Aff[F, G]],
9196 jacs: ptr UncheckedArray [EC_ShortW_Jac[F, G]],
92- N: int ) {.noInline , tags :[ Alloca ], meter .} =
97+ N: int ) {.meter .} =
9398 # Algorithm: Montgomery's batch inversion
9499 # - Speeding the Pollard and Elliptic Curve Methods of Factorization
95100 # Section 10.3.1
@@ -102,16 +107,21 @@ func batchAffine*[F, G](
102107
103108 # To avoid temporaries, we store partial accumulations
104109 # in affs[i].x and whether z == 0 in affs[i].y
105- var zeroes = allocStackArray (SecretBool , N)
110+ template zero (i: int ): SecretBool =
111+ when F is Fp :
112+ SecretBool affs[i].y.mres.limbs[0 ]
113+ else :
114+ SecretBool affs[i].y.coords[0 ].mres.limbs[0 ]
115+
106116 affs[0 ].x = jacs[0 ].z
107- zeroes[ 0 ] = affs[0 ].x.isZero ()
108- affs[0 ].x.csetOne (zeroes[ 0 ] )
117+ zero ( 0 ) = affs[0 ].x.isZero ()
118+ affs[0 ].x.csetOne (zero ( 0 ) )
109119
110120 for i in 1 ..< N:
111121 # Skip zero z-coordinates (infinity points)
112122 var z = jacs[i].z
113- zeroes[i] = z.isZero ()
114- z.csetOne (zeroes[i] )
123+ zero (i) = z.isZero ()
124+ z.csetOne (zero (i) )
115125
116126 if i != N- 1 :
117127 affs[i].x.prod (affs[i- 1 ].x, z, lazyReduce = true )
@@ -123,9 +133,14 @@ func batchAffine*[F, G](
123133
124134 for i in countdown (N- 1 , 1 ):
125135 # Extract 1/Pᵢ
126- var invi {.noInit .}: F
136+ var invi {.noInit .}, invi_next {. noInit .} : F
127137 invi.prod (accInv, affs[i- 1 ].x, lazyReduce = true )
128- invi.csetZero (zeroes[i])
138+ invi.csetZero (zero (i))
139+
140+ # next iteration (zero and affs[i].y are aliasing)
141+ invi_next = jacs[i].z
142+ invi_next.csetOne (zero (i))
143+ accInv.prod (accInv, invi_next, lazyReduce = true )
129144
130145 # Now convert Pᵢ to affine
131146 var invi2 {.noinit .}: F
@@ -134,14 +149,10 @@ func batchAffine*[F, G](
134149 invi.prod (invi, invi2, lazyReduce = true )
135150 affs[i].y.prod (jacs[i].y, invi)
136151
137- # next iteration
138- invi = jacs[i].z
139- invi.csetOne (zeroes[i])
140- accInv.prod (accInv, invi, lazyReduce = true )
141152
142153 block : # tail
143154 var invi2 {.noinit .}: F
144- accInv.csetZero (zeroes[ 0 ] )
155+ accInv.csetZero (zero ( 0 ) )
145156 invi2.square (accInv, lazyReduce = true )
146157 affs[0 ].x.prod (jacs[0 ].x, invi2)
147158 accInv.prod (accInv, invi2, lazyReduce = true )
0 commit comments