Skip to content

Commit 91cece9

Browse files
authored
Merge pull request #541 from pq-code-package/simpasm
Port `simpasm` from mlkem-native to mldsa-native
2 parents 03a184c + a77abbe commit 91cece9

File tree

97 files changed

+17643
-3503
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+17643
-3503
lines changed

.github/workflows/base.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,34 @@ jobs:
218218
- name: make lib
219219
run: |
220220
make lib
221+
simpasm:
222+
strategy:
223+
fail-fast: false
224+
matrix:
225+
backend:
226+
- arg: '--aarch64-clean'
227+
name: Clean
228+
# TODO: add backend option after we have optimized/clean seperation
229+
# - arg: ''
230+
# name: Optimized
231+
simplify:
232+
- arg: ''
233+
name: Simplified
234+
- arg: '--no-simplify'
235+
name: Unmodified
236+
runs-on: pqcp-arm64
237+
name: AArch64 dev backend (${{ matrix.simplify.name }})
238+
steps:
239+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
240+
- name: Reinstate and test backend
241+
uses: ./.github/actions/setup-shell
242+
with:
243+
nix-shell: 'ci'
244+
gh_token: ${{ secrets.GITHUB_TOKEN }}
245+
script: |
246+
./scripts/autogen ${{ matrix.simplify.arg }}
247+
make clean
248+
OPT=1 make quickcheck
221249
scan-build:
222250
strategy:
223251
fail-fast: false

.github/workflows/ci.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,8 @@ jobs:
461461
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
462462
- uses: ./.github/actions/setup-shell
463463
with:
464-
nix-shell: 'ci'
464+
nix-shell: 'ci-cross' # Need cross-compiler for ASM simplification
465+
nix-cache: 'true'
465466
gh_token: ${{ secrets.GITHUB_TOKEN }}
466467
script: |
467-
python3 ./scripts/autogen --dry-run
468+
python3 ./scripts/autogen --dry-run --force-cross

BIBLIOGRAPHY.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ source code and documentation.
7272
- Matthias J. Kannwischer
7373
* URL: https://eprint.iacr.org/2022/1243
7474
* Referenced from:
75+
- [dev/fips202/aarch64/auto.h](dev/fips202/aarch64/auto.h)
76+
- [dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S](dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S)
77+
- [dev/fips202/aarch64/src/keccak_f1600_x2_v84a_asm.S](dev/fips202/aarch64/src/keccak_f1600_x2_v84a_asm.S)
7578
- [mldsa/fips202/native/aarch64/auto.h](mldsa/fips202/native/aarch64/auto.h)
7679
- [mldsa/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S](mldsa/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S)
7780
- [mldsa/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S](mldsa/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S)
@@ -145,6 +148,27 @@ source code and documentation.
145148
- Damien Stehlé
146149
* URL: https://github.com/pq-crystals/dilithium/tree/master/avx2
147150
* Referenced from:
151+
- [dev/x86_64/src/align.h](dev/x86_64/src/align.h)
152+
- [dev/x86_64/src/consts.c](dev/x86_64/src/consts.c)
153+
- [dev/x86_64/src/consts.h](dev/x86_64/src/consts.h)
154+
- [dev/x86_64/src/intt.S](dev/x86_64/src/intt.S)
155+
- [dev/x86_64/src/ntt.S](dev/x86_64/src/ntt.S)
156+
- [dev/x86_64/src/nttunpack.S](dev/x86_64/src/nttunpack.S)
157+
- [dev/x86_64/src/pointwise.S](dev/x86_64/src/pointwise.S)
158+
- [dev/x86_64/src/pointwise_acc_l4.S](dev/x86_64/src/pointwise_acc_l4.S)
159+
- [dev/x86_64/src/pointwise_acc_l5.S](dev/x86_64/src/pointwise_acc_l5.S)
160+
- [dev/x86_64/src/pointwise_acc_l7.S](dev/x86_64/src/pointwise_acc_l7.S)
161+
- [dev/x86_64/src/poly_caddq_avx2.c](dev/x86_64/src/poly_caddq_avx2.c)
162+
- [dev/x86_64/src/poly_chknorm_avx2.c](dev/x86_64/src/poly_chknorm_avx2.c)
163+
- [dev/x86_64/src/poly_decompose_32_avx2.c](dev/x86_64/src/poly_decompose_32_avx2.c)
164+
- [dev/x86_64/src/poly_decompose_88_avx2.c](dev/x86_64/src/poly_decompose_88_avx2.c)
165+
- [dev/x86_64/src/poly_use_hint_32_avx2.c](dev/x86_64/src/poly_use_hint_32_avx2.c)
166+
- [dev/x86_64/src/poly_use_hint_88_avx2.c](dev/x86_64/src/poly_use_hint_88_avx2.c)
167+
- [dev/x86_64/src/polyz_unpack_17_avx2.c](dev/x86_64/src/polyz_unpack_17_avx2.c)
168+
- [dev/x86_64/src/polyz_unpack_19_avx2.c](dev/x86_64/src/polyz_unpack_19_avx2.c)
169+
- [dev/x86_64/src/rej_uniform_avx2.c](dev/x86_64/src/rej_uniform_avx2.c)
170+
- [dev/x86_64/src/rej_uniform_eta2_avx2.c](dev/x86_64/src/rej_uniform_eta2_avx2.c)
171+
- [dev/x86_64/src/rej_uniform_eta4_avx2.c](dev/x86_64/src/rej_uniform_eta4_avx2.c)
148172
- [mldsa/native/x86_64/src/align.h](mldsa/native/x86_64/src/align.h)
149173
- [mldsa/native/x86_64/src/consts.c](mldsa/native/x86_64/src/consts.c)
150174
- [mldsa/native/x86_64/src/consts.h](mldsa/native/x86_64/src/consts.h)

dev/aarch64_clean/meta.h

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Copyright (c) The mlkem-native project authors
3+
* Copyright (c) The mldsa-native project authors
4+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5+
*/
6+
7+
#ifndef MLD_NATIVE_AARCH64_META_H
8+
#define MLD_NATIVE_AARCH64_META_H
9+
10+
/* Set of primitives that this backend replaces */
11+
#define MLD_USE_NATIVE_NTT
12+
#define MLD_USE_NATIVE_INTT
13+
#define MLD_USE_NATIVE_REJ_UNIFORM
14+
#define MLD_USE_NATIVE_REJ_UNIFORM_ETA2
15+
#define MLD_USE_NATIVE_REJ_UNIFORM_ETA4
16+
#define MLD_USE_NATIVE_POLY_DECOMPOSE_32
17+
#define MLD_USE_NATIVE_POLY_DECOMPOSE_88
18+
#define MLD_USE_NATIVE_POLY_CADDQ
19+
#define MLD_USE_NATIVE_POLY_USE_HINT_32
20+
#define MLD_USE_NATIVE_POLY_USE_HINT_88
21+
#define MLD_USE_NATIVE_POLY_CHKNORM
22+
#define MLD_USE_NATIVE_POLYZ_UNPACK_17
23+
#define MLD_USE_NATIVE_POLYZ_UNPACK_19
24+
#define MLD_USE_NATIVE_POINTWISE_MONTGOMERY
25+
#define MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4
26+
#define MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5
27+
#define MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7
28+
29+
/* Identifier for this backend so that source and assembly files
30+
* in the build can be appropriately guarded. */
31+
#define MLD_ARITH_BACKEND_AARCH64
32+
33+
34+
#if !defined(__ASSEMBLER__)
35+
#include "src/arith_native_aarch64.h"
36+
37+
static MLD_INLINE void mld_ntt_native(int32_t data[MLDSA_N])
38+
{
39+
mld_ntt_asm(data, mld_aarch64_ntt_zetas_layer123456,
40+
mld_aarch64_ntt_zetas_layer78);
41+
}
42+
43+
static MLD_INLINE void mld_intt_native(int32_t data[MLDSA_N])
44+
{
45+
mld_intt_asm(data, mld_aarch64_intt_zetas_layer78,
46+
mld_aarch64_intt_zetas_layer123456);
47+
}
48+
49+
static MLD_INLINE int mld_rej_uniform_native(int32_t *r, unsigned len,
50+
const uint8_t *buf,
51+
unsigned buflen)
52+
{
53+
if (len != MLDSA_N || buflen % 24 != 0)
54+
{
55+
return -1;
56+
}
57+
58+
/* Safety: outlen is at most MLDSA_N, hence, this cast is safe. */
59+
return (int)mld_rej_uniform_asm(r, buf, buflen, mld_rej_uniform_table);
60+
}
61+
62+
static MLD_INLINE int mld_rej_uniform_eta2_native(int32_t *r, unsigned len,
63+
const uint8_t *buf,
64+
unsigned buflen)
65+
{
66+
unsigned int outlen;
67+
/* AArch64 implementation assumes specific buffer lengths */
68+
if (len != MLDSA_N || buflen != MLD_AARCH64_REJ_UNIFORM_ETA2_BUFLEN)
69+
{
70+
return -1;
71+
}
72+
/* Constant time: Inputs and outputs to this function are secret.
73+
* It is safe to leak which coefficients are accepted/rejected.
74+
* The assembly implementation must not leak any other information about the
75+
* accepted coefficients. Constant-time testing cannot cover this, and we
76+
* hence have to manually verify the assembly.
77+
* We declassify prior the input data and mark the outputs as secret.
78+
*/
79+
MLD_CT_TESTING_DECLASSIFY(buf, buflen);
80+
outlen = mld_rej_uniform_eta2_asm(r, buf, buflen, mld_rej_uniform_eta_table);
81+
MLD_CT_TESTING_SECRET(r, sizeof(int32_t) * outlen);
82+
/* Safety: outlen is at most MLDSA_N and, hence, this cast is safe. */
83+
return (int)outlen;
84+
}
85+
86+
static MLD_INLINE int mld_rej_uniform_eta4_native(int32_t *r, unsigned len,
87+
const uint8_t *buf,
88+
unsigned buflen)
89+
{
90+
unsigned int outlen;
91+
/* AArch64 implementation assumes specific buffer lengths */
92+
if (len != MLDSA_N || buflen != MLD_AARCH64_REJ_UNIFORM_ETA4_BUFLEN)
93+
{
94+
return -1;
95+
}
96+
/* Constant time: Inputs and outputs to this function are secret.
97+
* It is safe to leak which coefficients are accepted/rejected.
98+
* The assembly implementation must not leak any other information about the
99+
* accepted coefficients. Constant-time testing cannot cover this, and we
100+
* hence have to manually verify the assembly.
101+
* We declassify prior the input data and mark the outputs as secret.
102+
*/
103+
MLD_CT_TESTING_DECLASSIFY(buf, buflen);
104+
outlen = mld_rej_uniform_eta4_asm(r, buf, buflen, mld_rej_uniform_eta_table);
105+
MLD_CT_TESTING_SECRET(r, sizeof(int32_t) * outlen);
106+
/* Safety: outlen is at most MLDSA_N and, hence, this cast is safe. */
107+
return (int)outlen;
108+
}
109+
110+
static MLD_INLINE void mld_poly_decompose_32_native(int32_t *a1, int32_t *a0,
111+
const int32_t *a)
112+
{
113+
mld_poly_decompose_32_asm(a1, a0, a);
114+
}
115+
116+
static MLD_INLINE void mld_poly_decompose_88_native(int32_t *a1, int32_t *a0,
117+
const int32_t *a)
118+
{
119+
mld_poly_decompose_88_asm(a1, a0, a);
120+
}
121+
122+
static MLD_INLINE void mld_poly_caddq_native(int32_t a[MLDSA_N])
123+
{
124+
mld_poly_caddq_asm(a);
125+
}
126+
127+
static MLD_INLINE void mld_poly_use_hint_32_native(int32_t *b, const int32_t *a,
128+
const int32_t *h)
129+
{
130+
mld_poly_use_hint_32_asm(b, a, h);
131+
}
132+
133+
static MLD_INLINE void mld_poly_use_hint_88_native(int32_t *b, const int32_t *a,
134+
const int32_t *h)
135+
{
136+
mld_poly_use_hint_88_asm(b, a, h);
137+
}
138+
139+
static MLD_INLINE uint32_t mld_poly_chknorm_native(const int32_t *a, int32_t B)
140+
{
141+
return mld_poly_chknorm_asm(a, B);
142+
}
143+
144+
static MLD_INLINE void mld_polyz_unpack_17_native(int32_t *r,
145+
const uint8_t *buf)
146+
{
147+
mld_polyz_unpack_17_asm(r, buf, mld_polyz_unpack_17_indices);
148+
}
149+
150+
static MLD_INLINE void mld_polyz_unpack_19_native(int32_t *r,
151+
const uint8_t *buf)
152+
{
153+
mld_polyz_unpack_19_asm(r, buf, mld_polyz_unpack_19_indices);
154+
}
155+
156+
static MLD_INLINE void mld_poly_pointwise_montgomery_native(
157+
int32_t out[MLDSA_N], const int32_t in0[MLDSA_N],
158+
const int32_t in1[MLDSA_N])
159+
{
160+
mld_poly_pointwise_montgomery_asm(out, in0, in1);
161+
}
162+
163+
static MLD_INLINE void mld_polyvecl_pointwise_acc_montgomery_l4_native(
164+
int32_t w[MLDSA_N], const int32_t u[4][MLDSA_N],
165+
const int32_t v[4][MLDSA_N])
166+
{
167+
mld_polyvecl_pointwise_acc_montgomery_l4_asm(w, (const int32_t *)u,
168+
(const int32_t *)v);
169+
}
170+
171+
static MLD_INLINE void mld_polyvecl_pointwise_acc_montgomery_l5_native(
172+
int32_t w[MLDSA_N], const int32_t u[5][MLDSA_N],
173+
const int32_t v[5][MLDSA_N])
174+
{
175+
mld_polyvecl_pointwise_acc_montgomery_l5_asm(w, (const int32_t *)u,
176+
(const int32_t *)v);
177+
}
178+
179+
static MLD_INLINE void mld_polyvecl_pointwise_acc_montgomery_l7_native(
180+
int32_t w[MLDSA_N], const int32_t u[7][MLDSA_N],
181+
const int32_t v[7][MLDSA_N])
182+
{
183+
mld_polyvecl_pointwise_acc_montgomery_l7_asm(w, (const int32_t *)u,
184+
(const int32_t *)v);
185+
}
186+
187+
#endif /* !__ASSEMBLER__ */
188+
#endif /* !MLD_NATIVE_AARCH64_META_H */

0 commit comments

Comments
 (0)