Skip to content

Commit 4f33c42

Browse files
jammychiou1hanno-becker
authored andcommitted
AVX2: Add native implementation of poly_chknorm
This adds the AVX2 intrinsics implementation of poly_chknorm from https://github.com/pq-crystals/dilithium/blob/master/avx2/poly.c. Signed-off-by: jammychiou1 <[email protected]>
1 parent 5fc3ec8 commit 4f33c42

File tree

6 files changed

+85
-0
lines changed

6 files changed

+85
-0
lines changed

BIBLIOGRAPHY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ source code and documentation.
149149
- [mldsa/native/x86_64/src/ntt.S](mldsa/native/x86_64/src/ntt.S)
150150
- [mldsa/native/x86_64/src/nttunpack.S](mldsa/native/x86_64/src/nttunpack.S)
151151
- [mldsa/native/x86_64/src/poly_caddq_avx2.c](mldsa/native/x86_64/src/poly_caddq_avx2.c)
152+
- [mldsa/native/x86_64/src/poly_chknorm_avx2.c](mldsa/native/x86_64/src/poly_chknorm_avx2.c)
152153
- [mldsa/native/x86_64/src/poly_decompose_32_avx2.c](mldsa/native/x86_64/src/poly_decompose_32_avx2.c)
153154
- [mldsa/native/x86_64/src/poly_decompose_88_avx2.c](mldsa/native/x86_64/src/poly_decompose_88_avx2.c)
154155
- [mldsa/native/x86_64/src/poly_use_hint_32_avx2.c](mldsa/native/x86_64/src/poly_use_hint_32_avx2.c)

mldsa/native/api.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,4 +254,20 @@ static MLD_INLINE void mld_poly_use_hint_88_native(int32_t *b, const int32_t *a,
254254
const int32_t *h);
255255
#endif /* MLD_USE_NATIVE_POLY_USE_HINT_88 */
256256

257+
#if defined(MLD_USE_NATIVE_POLY_CHKNORM)
258+
/*************************************************
259+
* Name: mld_poly_chknorm_native
260+
*
261+
* Description: Check infinity norm of polynomial against given bound.
262+
* Assumes input coefficients were reduced by mld_reduce32().
263+
*
264+
* Arguments: - const int32_t *a: pointer to polynomial
265+
* - int32_t B: norm bound
266+
*
267+
* Returns 0 if norm is strictly smaller than B <= (MLDSA_Q-1)/8 and 0xFFFFFFFF
268+
* otherwise.
269+
**************************************************/
270+
static MLD_INLINE uint32_t mld_poly_chknorm_native(const int32_t *a, int32_t B);
271+
#endif /* MLD_USE_NATIVE_POLY_CHKNORM */
272+
257273
#endif /* !MLD_NATIVE_API_H */

mldsa/native/x86_64/meta.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define MLD_USE_NATIVE_POLY_CADDQ
2323
#define MLD_USE_NATIVE_POLY_USE_HINT_32
2424
#define MLD_USE_NATIVE_POLY_USE_HINT_88
25+
#define MLD_USE_NATIVE_POLY_CHKNORM
2526

2627
#if !defined(__ASSEMBLER__)
2728
#include <string.h>
@@ -133,6 +134,11 @@ static MLD_INLINE void mld_poly_use_hint_88_native(int32_t *b, const int32_t *a,
133134
(const __m256i *)h);
134135
}
135136

137+
static MLD_INLINE uint32_t mld_poly_chknorm_native(const int32_t *a, int32_t B)
138+
{
139+
return mld_poly_chknorm_avx2((const __m256i *)a, B);
140+
}
141+
136142
#endif /* !__ASSEMBLER__ */
137143

138144
#endif /* !MLD_NATIVE_X86_64_META_H */

mldsa/native/x86_64/src/arith_native_x86_64.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,7 @@ void mld_poly_use_hint_32_avx2(__m256i *b, const __m256i *a, const __m256i *h);
6969
#define mld_poly_use_hint_88_avx2 MLD_NAMESPACE(mld_poly_use_hint_88_avx2)
7070
void mld_poly_use_hint_88_avx2(__m256i *b, const __m256i *a, const __m256i *h);
7171

72+
#define mld_poly_chknorm_avx2 MLD_NAMESPACE(mld_poly_chknorm_avx2)
73+
uint32_t mld_poly_chknorm_avx2(const __m256i *a, int32_t B);
74+
7275
#endif /* !MLD_NATIVE_X86_64_SRC_ARITH_NATIVE_X86_64_H */
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) The mldsa-native project authors
3+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4+
*/
5+
6+
/* References
7+
* ==========
8+
*
9+
* - [REF_AVX2]
10+
* CRYSTALS-Dilithium optimized AVX2 implementation
11+
* Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
12+
* https://github.com/pq-crystals/dilithium/tree/master/avx2
13+
*/
14+
15+
/*
16+
* This file is derived from the public domain
17+
* AVX2 Dilithium implementation @[REF_AVX2].
18+
*/
19+
20+
#include "../../../common.h"
21+
22+
#if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
23+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
24+
25+
#include <immintrin.h>
26+
#include <stdint.h>
27+
#include "arith_native_x86_64.h"
28+
29+
uint32_t mld_poly_chknorm_avx2(const __m256i *a, int32_t B)
30+
{
31+
unsigned int i;
32+
__m256i f, t;
33+
const __m256i bound = _mm256_set1_epi32(B - 1);
34+
35+
t = _mm256_setzero_si256();
36+
for (i = 0; i < MLDSA_N / 8; i++)
37+
{
38+
f = _mm256_load_si256(&a[i]);
39+
f = _mm256_abs_epi32(f);
40+
f = _mm256_cmpgt_epi32(f, bound);
41+
t = _mm256_or_si256(t, f);
42+
}
43+
44+
return _mm256_testz_si256(t, t) - 1;
45+
}
46+
47+
#else /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
48+
*/
49+
50+
MLD_EMPTY_CU(avx2_poly_chknorm)
51+
52+
#endif /* !(MLD_ARITH_BACKEND_X86_64_DEFAULT && \
53+
!MLD_CONFIG_MULTILEVEL_NO_SHARED) */

mldsa/poly.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,11 @@ void mld_poly_use_hint(mld_poly *b, const mld_poly *a, const mld_poly *h)
324324
MLD_INTERNAL_API
325325
uint32_t mld_poly_chknorm(const mld_poly *a, int32_t B)
326326
{
327+
#if defined(MLD_USE_NATIVE_POLY_CHKNORM)
328+
/* TODO: proof */
329+
mld_assert_bound(a->coeffs, MLDSA_N, -REDUCE32_RANGE_MAX, REDUCE32_RANGE_MAX);
330+
return mld_poly_chknorm_native(a->coeffs, B);
331+
#else
327332
unsigned int i;
328333
uint32_t t = 0;
329334
mld_assert_bound(a->coeffs, MLDSA_N, -REDUCE32_RANGE_MAX, REDUCE32_RANGE_MAX);
@@ -346,6 +351,7 @@ uint32_t mld_poly_chknorm(const mld_poly *a, int32_t B)
346351
}
347352

348353
return t;
354+
#endif /* !MLD_USE_NATIVE_POLY_CHKNORM */
349355
}
350356

351357
/*************************************************

0 commit comments

Comments
 (0)