Skip to content

Commit 0b16a44

Browse files
committed
Explain the bound 3q/4 which applies to all montmul-by-const
To ease the explanation, we cite and add [Survey_Hwang23](https://eprint.iacr.org/2023/1962) to the bibliography. In the process, we noticed that the original MLD_INTT_BOUND defined as (MLDSA_Q * 3 / 4) = floor(3q/4) was too strong to justify concisely, so we bump it to (MLDSA_Q * 3 / 4 + 1) or ceil(3q/4). Signed-off-by: jammychiou1 <[email protected]>
1 parent 2a5357c commit 0b16a44

File tree

6 files changed

+53
-10
lines changed

6 files changed

+53
-10
lines changed

BIBLIOGRAPHY.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,15 @@ source code and documentation.
247247
- [mldsa/src/native/aarch64/src/intt.S](mldsa/src/native/aarch64/src/intt.S)
248248
- [mldsa/src/native/aarch64/src/ntt.S](mldsa/src/native/aarch64/src/ntt.S)
249249

250+
### `Survey_Hwang23`
251+
252+
* A Survey of Polynomial Multiplications for Lattice-Based Cryptosystems
253+
* Author(s):
254+
- Vincent Hwang
255+
* URL: https://eprint.iacr.org/2023/1962
256+
* Referenced from:
257+
- [dev/x86_64/src/intt.S](dev/x86_64/src/intt.S)
258+
250259
### `libmceliece`
251260

252261
* libmceliece implementation of Classic McEliece

BIBLIOGRAPHY.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@
103103
- Kannwischer, Matthias J.
104104
- Yang, Bo-Yin
105105
- Yang, Shang-Yi
106+
107+
- id: Survey_Hwang23
108+
name: "A Survey of Polynomial Multiplications for Lattice-Based Cryptosystems"
109+
year: 2023
110+
url: https://eprint.iacr.org/2023/1962
111+
author:
112+
- Hwang, Vincent
113+
106114
- id: mupq
107115
name: Common files for pqm4, pqm3, pqriscv
108116
author:

dev/x86_64/src/intt.S

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
* CRYSTALS-Dilithium optimized AVX2 implementation
1616
* Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
1717
* https://github.com/pq-crystals/dilithium/tree/master/avx2
18+
*
19+
* - [Survey_Hwang23]
20+
* A Survey of Polynomial Multiplications for Lattice-Based Cryptosystems
21+
* Vincent Hwang
22+
* https://eprint.iacr.org/2023/1962
1823
*/
1924

2025
#include "../../../common.h"
@@ -47,7 +52,8 @@ vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
4752
* Compute l + h, montmul(h - l, zh) then store the results back to l, h
4853
* respectively.
4954
*
50-
* The general abs bound of Montgomery multiplication is 3q/4.
55+
* The abs bound of "Montgomery multiplication with signed canonical constant"
56+
* is ceil(3q/4) (see the end of this file).
5157
*/
5258
.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2
5359
vpsubd %ymm\l,%ymm\h,%ymm12
@@ -107,7 +113,7 @@ vmovshdup %ymm3,%ymm1
107113
vmovshdup %ymm15,%ymm2
108114
butterfly 10,11,1,3,2,15
109115

110-
/* 4, 6, 8, 10: abs bound < 2q; 5, 7, 9, 11: abs bound < 3q/4 */
116+
/* 4, 6, 8, 10: abs bound < 2q; 5, 7, 9, 11: abs bound < ceil(3q/4) */
111117
/*
112118
* Note that since 2^31 / q > 256, the sum of all 256 coefficients does not
113119
* overflow. This allows us to greatly simplify the range analysis by relaxing
@@ -236,7 +242,7 @@ butterfly 5,9
236242
butterfly 6,10
237243
butterfly 7,11
238244

239-
/* 4, 5, 6, 7: abs bound < 256q; 8, 9, 10, 11: abs bound < 3q/4 */
245+
/* 4, 5, 6, 7: abs bound < 256q; 8, 9, 10, 11: abs bound < ceil(3q/4) */
240246

241247
vmovdqa %ymm8,512+32*\off(%rdi)
242248
vmovdqa %ymm9,640+32*\off(%rdi)
@@ -251,8 +257,7 @@ vmovdqa %ymm11,896+32*\off(%rdi)
251257
* For ymm{8,9,10,11}, the scaling has been merged into the last butterfly, so
252258
* only ymm{4,5,6,7} need to be scaled explicitly.
253259
*
254-
* The scaling is achieved by computing montmul(-, MLD_AVX2_DIV), so the output
255-
* will have an abs bound of 3q/4.
260+
* The scaling is achieved by computing montmul(-, MLD_AVX2_DIV).
256261
*
257262
* 4, 5, 6, 7: abs bound < 256q
258263
*/
@@ -305,7 +310,22 @@ vmovshdup %ymm7,%ymm7
305310
vpblendd $0xAA,%ymm8,%ymm6,%ymm6
306311
vpblendd $0xAA,%ymm9,%ymm7,%ymm7
307312

308-
/* 4, 5, 6, 7: abs bound < 3q/4 */
313+
/*
314+
* The bound ceil(3q/4) for this scaling, as well as any other "Montgomery
315+
* multiplication with signed canonical constant", is justified as follows.
316+
*
317+
* In @[Survey_Hwang23, Section 2.2] they showed a bound that works for any
318+
* variable input a, as long as the constant b is signed canonical:
319+
*
320+
* |montmul(a, b)| <= (|a| (q/2) + (R/2) q) / R = (q/2) (1 + |a|/R).
321+
*
322+
* Therefore, even if we know nothing about a except that it fits inside
323+
* int32_t (thus |a| <= R/2), we still have |montmul(a, b)| <= 3q/4. This can be
324+
* strengthened to |montmul_pos(a, b)| <= floor(3q/4) < ceil(3q/4) since LHS is
325+
* an integer and 3q/4 isn't.
326+
*/
327+
328+
/* 4, 5, 6, 7: abs bound < ceil(3q/4) */
309329

310330
vmovdqa %ymm4, 0+32*\off(%rdi)
311331
vmovdqa %ymm5,128+32*\off(%rdi)

dev/x86_64/src/ntt.S

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
4848
* Compute l + montmul(h, zh), l - montmul(h, zh) then store the results back to
4949
* l, h respectively.
5050
*
51-
* Although the general abs bound of Montgomery multiplication is 3q/4, we use
52-
* the more convenient bound q here.
51+
* Although the abs bound of "Montgomery multiplication with signed canonical
52+
* constant" is ceil(3q/4) (see the end of dev/x86_64/src/intt.S), we use the
53+
* more convenient bound q here.
5354
*
5455
* In conclusion, the magnitudes of all coefficients grow by at most q after
5556
* each layer.

mldsa/src/native/x86_64/src/intt.S

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
* CRYSTALS-Dilithium optimized AVX2 implementation
1616
* Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
1717
* https://github.com/pq-crystals/dilithium/tree/master/avx2
18+
*
19+
* - [Survey_Hwang23]
20+
* A Survey of Polynomial Multiplications for Lattice-Based Cryptosystems
21+
* Vincent Hwang
22+
* https://eprint.iacr.org/2023/1962
1823
*/
1924

2025
#include "../../../common.h"

mldsa/src/ntt.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
/* Absolute exclusive upper bound for the output of the forward NTT */
2323
#define MLD_NTT_BOUND (9 * MLDSA_Q)
24-
/* Absolute exclusive upper bound for the output of the inverse NTT*/
25-
#define MLD_INTT_BOUND (MLDSA_Q * 3 / 4)
24+
/* Absolute exclusive upper bound for the output of the inverse NTT */
25+
#define MLD_INTT_BOUND (MLDSA_Q * 3 / 4 + 1) /* ceil(3 * MLDSA_Q / 4) */
2626

2727
#define mld_ntt MLD_NAMESPACE(ntt)
2828
/*************************************************

0 commit comments

Comments
 (0)