Skip to content

Commit 73833d6

Browse files
AArch64: Add native implementation of poly_pointwise_montgomery
This commit adds a native implementation of poly_pointwise_montgomery written from scratch. Co-authored-by: Matthias J. Kannwischer <[email protected]> Signed-off-by: jammychiou1 <[email protected]>
1 parent 88bd6d5 commit 73833d6

File tree

3 files changed

+89
-0
lines changed

3 files changed

+89
-0
lines changed

mldsa/native/aarch64/meta.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#define MLD_USE_NATIVE_POLY_CHKNORM
2222
#define MLD_USE_NATIVE_POLYZ_UNPACK_17
2323
#define MLD_USE_NATIVE_POLYZ_UNPACK_19
24+
#define MLD_USE_NATIVE_POINTWISE_MONTGOMERY
2425

2526
/* Identifier for this backend so that source and assembly files
2627
* in the build can be appropriately guarded. */
@@ -147,5 +148,12 @@ static MLD_INLINE void mld_polyz_unpack_19_native(int32_t *r,
147148
mld_polyz_unpack_19_asm(r, buf, mld_polyz_unpack_19_indices);
148149
}
149150

151+
static MLD_INLINE void mld_poly_pointwise_montgomery_native(
152+
int32_t out[MLDSA_N], const int32_t in0[MLDSA_N],
153+
const int32_t in1[MLDSA_N])
154+
{
155+
mld_poly_pointwise_montgomery_asm(out, in0, in1);
156+
}
157+
150158
#endif /* !__ASSEMBLER__ */
151159
#endif /* !MLD_NATIVE_AARCH64_META_H */

mldsa/native/aarch64/src/arith_native_aarch64.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,9 @@ void mld_polyz_unpack_17_asm(int32_t *r, const uint8_t *buf,
9393
void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf,
9494
const uint8_t *indices);
9595

96+
#define mld_poly_pointwise_montgomery_asm \
97+
MLD_NAMESPACE(poly_pointwise_montgomery_asm)
98+
void mld_poly_pointwise_montgomery_asm(int32_t *, const int32_t *,
99+
const int32_t *);
100+
96101
#endif /* !MLD_NATIVE_AARCH64_SRC_ARITH_NATIVE_AARCH64_H */
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/* Copyright (c) The mldsa-native project authors
2+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3+
*/
4+
5+
#include "../../../common.h"
6+
#if defined(MLD_ARITH_BACKEND_AARCH64)
7+
8+
.macro montgomery_reduce_long res, inl, inh
9+
uzp1 \res\().4s, \inl\().4s, \inh\().4s
10+
mul \res\().4s, \res\().4s, modulus_twisted.4s
11+
smlal \inl\().2d, \res\().2s, modulus.2s
12+
smlal2 \inh\().2d, \res\().4s, modulus.4s
13+
uzp2 \res\().4s, \inl\().4s, \inh\().4s
14+
.endm
15+
16+
.macro pmull dl, dh, a, b
17+
smull \dl\().2d, \a\().2s, \b\().2s
18+
smull2 \dh\().2d, \a\().4s, \b\().4s
19+
.endm
20+
21+
out_ptr .req x0
22+
a_ptr .req x1
23+
b_ptr .req x2
24+
count .req x3
25+
wtmp .req w3
26+
27+
modulus .req v0
28+
modulus_twisted .req v1
29+
30+
.text
31+
.global MLD_ASM_NAMESPACE(poly_pointwise_montgomery_asm)
32+
.balign 4
33+
MLD_ASM_FN_SYMBOL(poly_pointwise_montgomery_asm)
34+
// load q = 8380417
35+
movz wtmp, #57345
36+
movk wtmp, #127, lsl #16
37+
dup modulus.4s, wtmp
38+
39+
// load -q^-1 = 4236238847
40+
movz wtmp, #57343
41+
movk wtmp, #64639, lsl #16
42+
dup modulus_twisted.4s, wtmp
43+
44+
mov count, #(MLDSA_N / 4)
45+
46+
loop_start:
47+
ldr q17, [a_ptr, #1*16]
48+
ldr q18, [a_ptr, #2*16]
49+
ldr q19, [a_ptr, #3*16]
50+
ldr q16, [a_ptr], #4*16
51+
52+
ldr q21, [b_ptr, #1*16]
53+
ldr q22, [b_ptr, #2*16]
54+
ldr q23, [b_ptr, #3*16]
55+
ldr q20, [b_ptr], #4*16
56+
57+
pmull v24, v25, v16, v20
58+
pmull v26, v27, v17, v21
59+
pmull v28, v29, v18, v22
60+
pmull v30, v31, v19, v23
61+
62+
montgomery_reduce_long v16, v24, v25
63+
montgomery_reduce_long v17, v26, v27
64+
montgomery_reduce_long v18, v28, v29
65+
montgomery_reduce_long v19, v30, v31
66+
67+
str q17, [out_ptr, #1*16]
68+
str q18, [out_ptr, #2*16]
69+
str q19, [out_ptr, #3*16]
70+
str q16, [out_ptr], #4*16
71+
72+
subs count, count, #4
73+
cbnz count, loop_start
74+
75+
ret
76+
#endif /* MLD_ARITH_BACKEND_AARCH64 */

0 commit comments

Comments
 (0)