18
18
* Changes:
19
19
* - Add call to csub in reduce128_avx to produce outputs
20
20
* in [0,1,...,q-1] rather than [0,1,...,q], matching the
21
- * semantics of mlk_poly_reduce().
21
+ * semantics of mlk_poly_reduce(),
22
+ * - Use a macro instead of a local function call.
22
23
*/
23
24
24
25
#include "../../../common.h"
29
30
#include "consts.h"
30
31
#include "fq.inc"
31
32
32
- .text
33
- .global MLK_ASM_NAMESPACE(tomont_avx2)
34
- .balign 4
35
- MLK_ASM_FN_SYMBOL(tomont_avx2)
36
- #consts
37
- vmovdqa MLK_AVX2_BACKEND_DATA_OFFSET_16XQ*2 (%rsi ),%ymm0
38
- vmovdqa MLK_AVX2_BACKEND_DATA_OFFSET_16XMONTSQLO*2 (%rsi ),%ymm1
39
- vmovdqa MLK_AVX2_BACKEND_DATA_OFFSET_16XMONTSQHI*2 (%rsi ),%ymm2
40
- call tomont_avx2_core
41
- add $256 ,%rdi
42
- call tomont_avx2_core
43
- ret
44
-
45
- tomont_avx2_core:
33
+ .macro tomont_128_coefficients offset
46
34
#load
47
- vmovdqa ( %rdi ),%ymm3
48
- vmovdqa 32 (%rdi ),%ymm4
49
- vmovdqa 64 (%rdi ),%ymm5
50
- vmovdqa 96 (%rdi ),%ymm6
51
- vmovdqa 128 (%rdi ),%ymm7
52
- vmovdqa 160 (%rdi ),%ymm8
53
- vmovdqa 192 (%rdi ),%ymm9
54
- vmovdqa 224 (%rdi ),%ymm10
35
+ vmovdqa (\offset + 0 )( %rdi ), %ymm3
36
+ vmovdqa (\offset + 32 ) (%rdi ), %ymm4
37
+ vmovdqa (\offset + 64 ) (%rdi ), %ymm5
38
+ vmovdqa (\offset + 96 ) (%rdi ), %ymm6
39
+ vmovdqa (\offset + 128) (%rdi ), %ymm7
40
+ vmovdqa (\offset + 160) (%rdi ), %ymm8
41
+ vmovdqa (\offset + 192) (%rdi ), %ymm9
42
+ vmovdqa (\offset + 224) (%rdi ), %ymm10
55
43
56
44
fqmulprecomp 1 ,2 ,3 ,11
57
45
fqmulprecomp 1 ,2 ,4 ,12
@@ -63,14 +51,27 @@ fqmulprecomp 1,2,9,12
63
51
fqmulprecomp 1 ,2 ,10 ,13
64
52
65
53
#store
66
- vmovdqa %ymm3 ,(%rdi )
67
- vmovdqa %ymm4 ,32 (%rdi )
68
- vmovdqa %ymm5 ,64 (%rdi )
69
- vmovdqa %ymm6 ,96 (%rdi )
70
- vmovdqa %ymm7 ,128 (%rdi )
71
- vmovdqa %ymm8 ,160 (%rdi )
72
- vmovdqa %ymm9 ,192 (%rdi )
73
- vmovdqa %ymm10 ,224 (%rdi )
54
+ vmovdqa %ymm3 , (\offset + 0 )(%rdi )
55
+ vmovdqa %ymm4 , (\offset + 32 )(%rdi )
56
+ vmovdqa %ymm5 , (\offset + 64 )(%rdi )
57
+ vmovdqa %ymm6 , (\offset + 96 )(%rdi )
58
+ vmovdqa %ymm7 , (\offset +128 )(%rdi )
59
+ vmovdqa %ymm8 , (\offset +160 )(%rdi )
60
+ vmovdqa %ymm9 , (\offset +192 )(%rdi )
61
+ vmovdqa %ymm10 , (\offset +224 )(%rdi )
62
+ .endm
63
+
64
+ .text
65
+ .global MLK_ASM_NAMESPACE(tomont_avx2)
66
+ .balign 4
67
+ MLK_ASM_FN_SYMBOL(tomont_avx2)
68
+ #consts
69
+ vmovdqa MLK_AVX2_BACKEND_DATA_OFFSET_16XQ*2 (%rsi ),%ymm0
70
+ vmovdqa MLK_AVX2_BACKEND_DATA_OFFSET_16XMONTSQLO*2 (%rsi ),%ymm1
71
+ vmovdqa MLK_AVX2_BACKEND_DATA_OFFSET_16XMONTSQHI*2 (%rsi ),%ymm2
72
+
73
+ tomont_128_coefficients 0
74
+ tomont_128_coefficients 2*128
74
75
75
76
ret
76
77
0 commit comments