From bac5adad592bb57ae628c7973f4fbc63246d7bc9 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Wed, 28 May 2025 10:16:53 +0100 Subject: [PATCH 1/3] ACVP: Allow parameters in `EXEC_WRAPPER` environment variable Previously, the value of the environment variable `EXEC_WRAPPER` had to be the name of a binary to be usable with `test/acvp_client.py`. This commit generalizes this to allow a binary name followed by some fixed parameters. Signed-off-by: Hanno Becker --- test/acvp_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/acvp_client.py b/test/acvp_client.py index 56a7a76c8..e422db027 100644 --- a/test/acvp_client.py +++ b/test/acvp_client.py @@ -15,7 +15,7 @@ # Check if we need to use a wrapper for execution (e.g. QEMU) exec_prefix = os.environ.get("EXEC_WRAPPER", "") -exec_prefix = [exec_prefix] if exec_prefix != "" else [] +exec_prefix = exec_prefix.split(" ") if exec_prefix != "" else [] acvp_dir = "test/acvp_data" acvp_keygen_jsons = [ From 5c3414f9199a4c9e3448aa62671feb4d45f64071 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Wed, 28 May 2025 10:31:45 +0100 Subject: [PATCH 2/3] Makefiles: Simplify auto-derivation of compiler flags This commit simplifies the logic around automatically setting architecture-specific compiler flags from ``` if (arch == A) { if (!cross) ... if (cross == X) ... elif (cross == Y) ... } elif (arch == B) { if (!cross) ... if (cross == X) ... elif (cross == Y) ... } ``` to ``` if !cross { if (arch == A) ... elif (arch == B) ... ... } else { if (cross == X) ... elif (cross == Y) ... } ``` It also adds further architecture flags for RV64 and PPC64LE. Signed-off-by: Hanno Becker --- .../Makefile | 30 ++++++++--------- examples/multilevel_build_native/Makefile | 30 ++++++++--------- test/mk/auto.mk | 33 +++++++------------ 3 files changed, 40 insertions(+), 53 deletions(-) diff --git a/examples/monolithic_build_multilevel_native/Makefile b/examples/monolithic_build_multilevel_native/Makefile index 823dd846b..44bcb30aa 100644 --- a/examples/monolithic_build_multilevel_native/Makefile +++ b/examples/monolithic_build_multilevel_native/Makefile @@ -52,36 +52,34 @@ BIN=test_binary # Automatically detect system architecture and set preprocessor etc accordingly HOST_PLATFORM := $(shell uname -s)-$(shell uname -m) + # linux x86_64 ifeq ($(HOST_PLATFORM),Linux-x86_64) CFLAGS += -z noexecstack endif -ifeq ($(HOST_PLATFORM),Linux-x86_64) +# Native compilation ifeq ($(CROSS_PREFIX),) +ifeq ($(HOST_PLATFORM),Linux-x86_64) CFLAGS += -mavx2 -mbmi2 -mpopcnt -maes CFLAGS += -DMLK_FORCE_X86_64 -else ifneq ($(findstring aarch64_be, $(CROSS_PREFIX)),) - CFLAGS += -DMLK_FORCE_AARCH64_EB -else ifneq ($(findstring aarch64, $(CROSS_PREFIX)),) - CFLAGS += -DMLK_FORCE_AARCH64 -else - -endif - -# linux aarch64 else ifeq ($(HOST_PLATFORM),Linux-aarch64) -ifeq ($(CROSS_PREFIX),) CFLAGS += -DMLK_FORCE_AARCH64 +else ifeq ($(HOST_PLATFORM),Darwin-arm64) + CFLAGS += -DMLK_FORCE_AARCH64 +endif +# Cross compilation else ifneq ($(findstring x86_64, $(CROSS_PREFIX)),) CFLAGS += -mavx2 -mbmi2 -mpopcnt -maes CFLAGS += -DMLK_FORCE_X86_64 -else -endif - -# darwin aarch64 -else ifeq ($(HOST_PLATFORM),Darwin-arm64) +else ifneq ($(findstring aarch64_be, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_AARCH64_EB +else ifneq ($(findstring aarch64, $(CROSS_PREFIX)),) CFLAGS += -DMLK_FORCE_AARCH64 +else ifneq ($(findstring riscv64, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_RISCV64 +else ifneq ($(findstring powerpc64le, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_PPC64LE endif CFLAGS := \ diff --git a/examples/multilevel_build_native/Makefile b/examples/multilevel_build_native/Makefile index fde0b56fb..685a09af8 100644 --- a/examples/multilevel_build_native/Makefile +++ b/examples/multilevel_build_native/Makefile @@ -14,36 +14,34 @@ endif # Automatically detect system architecture and set preprocessor etc accordingly HOST_PLATFORM := $(shell uname -s)-$(shell uname -m) + # linux x86_64 ifeq ($(HOST_PLATFORM),Linux-x86_64) CFLAGS += -z noexecstack endif -ifeq ($(HOST_PLATFORM),Linux-x86_64) +# Native compilation ifeq ($(CROSS_PREFIX),) +ifeq ($(HOST_PLATFORM),Linux-x86_64) CFLAGS += -mavx2 -mbmi2 -mpopcnt -maes CFLAGS += -DMLK_FORCE_X86_64 -else ifneq ($(findstring aarch64_be, $(CROSS_PREFIX)),) - CFLAGS += -DMLK_FORCE_AARCH64_EB -else ifneq ($(findstring aarch64, $(CROSS_PREFIX)),) - CFLAGS += -DMLK_FORCE_AARCH64 -else - -endif - -# linux aarch64 else ifeq ($(HOST_PLATFORM),Linux-aarch64) -ifeq ($(CROSS_PREFIX),) CFLAGS += -DMLK_FORCE_AARCH64 +else ifeq ($(HOST_PLATFORM),Darwin-arm64) + CFLAGS += -DMLK_FORCE_AARCH64 +endif +# Cross compilation else ifneq ($(findstring x86_64, $(CROSS_PREFIX)),) CFLAGS += -mavx2 -mbmi2 -mpopcnt -maes CFLAGS += -DMLK_FORCE_X86_64 -else -endif - -# darwin aarch64 -else ifeq ($(HOST_PLATFORM),Darwin-arm64) +else ifneq ($(findstring aarch64_be, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_AARCH64_EB +else ifneq ($(findstring aarch64, $(CROSS_PREFIX)),) CFLAGS += -DMLK_FORCE_AARCH64 +else ifneq ($(findstring riscv64, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_RISCV64 +else ifneq ($(findstring powerpc64le, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_PPC64LE endif CFLAGS := \ diff --git a/test/mk/auto.mk b/test/mk/auto.mk index 091f143af..c57e138b6 100644 --- a/test/mk/auto.mk +++ b/test/mk/auto.mk @@ -1,36 +1,27 @@ # SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT # # Automatically detect system architecture and set preprocessor etc accordingly -ifeq ($(HOST_PLATFORM),Linux-x86_64) + +# Native compilation ifeq ($(CROSS_PREFIX),) +ifeq ($(HOST_PLATFORM),Linux-x86_64) CFLAGS += -mavx2 -mbmi2 -mpopcnt -maes CFLAGS += -DMLK_FORCE_X86_64 -else ifneq ($(findstring aarch64_be, $(CROSS_PREFIX)),) - CFLAGS += -DMLK_FORCE_AARCH64_EB -else ifneq ($(findstring aarch64, $(CROSS_PREFIX)),) - CFLAGS += -DMLK_FORCE_AARCH64 -else - -endif - -# linux aarch64 else ifeq ($(HOST_PLATFORM),Linux-aarch64) -ifeq ($(CROSS_PREFIX),) CFLAGS += -DMLK_FORCE_AARCH64 -else ifneq ($(findstring x86_64, $(CROSS_PREFIX)),) - CFLAGS += -mavx2 -mbmi2 -mpopcnt -maes - CFLAGS += -DMLK_FORCE_X86_64 -else -endif - -# darwin aarch64 else ifeq ($(HOST_PLATFORM),Darwin-arm64) -ifeq ($(CROSS_PREFIX),) CFLAGS += -DMLK_FORCE_AARCH64 +endif +# Cross compilation else ifneq ($(findstring x86_64, $(CROSS_PREFIX)),) + CFLAGS += -mavx2 -mbmi2 -mpopcnt -maes CFLAGS += -DMLK_FORCE_X86_64 +else ifneq ($(findstring aarch64_be, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_AARCH64_EB else ifneq ($(findstring aarch64, $(CROSS_PREFIX)),) CFLAGS += -DMLK_FORCE_AARCH64 -else -endif +else ifneq ($(findstring riscv64, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_RISCV64 +else ifneq ($(findstring powerpc64le, $(CROSS_PREFIX)),) + CFLAGS += -DMLK_FORCE_PPC64LE endif From ad9bff97aa638d8b620a6e586e7b54de91adec34 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Wed, 28 May 2025 18:08:52 +0800 Subject: [PATCH 3/3] Neon NTT reference: switch from tches to eprint url. The tches server being unavailable keeps failing our CI. This commit switches to the eprint url of the same paper - that should be more stable. Signed-off-by: Matthias J. Kannwischer --- BIBLIOGRAPHY.md | 2 +- BIBLIOGRAPHY.yml | 2 +- dev/aarch64_clean/README.md | 2 +- dev/aarch64_clean/src/intt.S | 2 +- dev/aarch64_clean/src/ntt.S | 2 +- .../src/polyvec_basemul_acc_montgomery_cached_asm_k2.S | 2 +- .../src/polyvec_basemul_acc_montgomery_cached_asm_k3.S | 2 +- .../src/polyvec_basemul_acc_montgomery_cached_asm_k4.S | 2 +- dev/aarch64_opt/README.md | 2 +- dev/aarch64_opt/src/intt.S | 2 +- dev/aarch64_opt/src/ntt.S | 2 +- .../src/polyvec_basemul_acc_montgomery_cached_asm_k2.S | 2 +- .../src/polyvec_basemul_acc_montgomery_cached_asm_k3.S | 2 +- .../src/polyvec_basemul_acc_montgomery_cached_asm_k4.S | 2 +- mlkem/native/aarch64/README.md | 2 +- mlkem/native/aarch64/src/intt.S | 2 +- mlkem/native/aarch64/src/ntt.S | 2 +- .../aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S | 2 +- .../aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S | 2 +- .../aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S | 2 +- mlkem/poly.c | 2 +- mlkem/poly_k.c | 2 +- proofs/hol_light/arm/mlkem/mlkem_intt.S | 2 +- proofs/hol_light/arm/mlkem/mlkem_ntt.S | 2 +- .../arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S | 2 +- .../arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S | 2 +- .../arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) diff --git a/BIBLIOGRAPHY.md b/BIBLIOGRAPHY.md index 3fc338880..157a9f78c 100644 --- a/BIBLIOGRAPHY.md +++ b/BIBLIOGRAPHY.md @@ -157,7 +157,7 @@ source code and documentation. - Matthias J. Kannwischer - Bo-Yin Yang - Shang-Yi Yang -* URL: https://tches.iacr.org/index.php/TCHES/article/view/9295 +* URL: https://eprint.iacr.org/2021/986 * Referenced from: - [dev/aarch64_clean/README.md](dev/aarch64_clean/README.md) - [dev/aarch64_clean/src/intt.S](dev/aarch64_clean/src/intt.S) diff --git a/BIBLIOGRAPHY.yml b/BIBLIOGRAPHY.yml index b72cd5b5c..e0d1fd42d 100644 --- a/BIBLIOGRAPHY.yml +++ b/BIBLIOGRAPHY.yml @@ -102,7 +102,7 @@ - id: NeonNTT name: "Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1" year: 2022 - url: https://tches.iacr.org/index.php/TCHES/article/view/9295 + url: https://eprint.iacr.org/2021/986 author: - Becker, Hanno - Hwang, Vincent diff --git a/dev/aarch64_clean/README.md b/dev/aarch64_clean/README.md index 2dba77326..a9f6e0895 100644 --- a/dev/aarch64_clean/README.md +++ b/dev/aarch64_clean/README.md @@ -13,5 +13,5 @@ target architecture is Cortex-A55, but you can easily re-optimize the code for a by SLOTHY, by adjusting the parameters in the [Makefile](../aarch64_opt/src/Makefile). -[^NeonNTT]: Becker, Hwang, Kannwischer, Yang, Yang: Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1, [https://tches.iacr.org/index.php/TCHES/article/view/9295](https://tches.iacr.org/index.php/TCHES/article/view/9295) +[^NeonNTT]: Becker, Hwang, Kannwischer, Yang, Yang: Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1, [https://eprint.iacr.org/2021/986](https://eprint.iacr.org/2021/986) [^SLOTHY_Paper]: Abdulrahman, Becker, Kannwischer, Klein: Fast and Clean: Auditable high-performance assembly via constraint solving, [https://eprint.iacr.org/2022/1303](https://eprint.iacr.org/2022/1303) diff --git a/dev/aarch64_clean/src/intt.S b/dev/aarch64_clean/src/intt.S index 82f1e378d..190fe5e59 100644 --- a/dev/aarch64_clean/src/intt.S +++ b/dev/aarch64_clean/src/intt.S @@ -29,7 +29,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [SLOTHY_Paper] * Fast and Clean: Auditable high-performance assembly via constraint solving diff --git a/dev/aarch64_clean/src/ntt.S b/dev/aarch64_clean/src/ntt.S index 62abf9c4d..6967d81e8 100644 --- a/dev/aarch64_clean/src/ntt.S +++ b/dev/aarch64_clean/src/ntt.S @@ -29,7 +29,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [SLOTHY_Paper] * Fast and Clean: Auditable high-performance assembly via constraint solving diff --git a/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S b/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S index 48f0751dc..d3ef8c6f5 100644 --- a/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +++ b/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S b/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S index 61f42f53e..c051b4951 100644 --- a/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +++ b/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S b/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S index 80c5bee16..06302d228 100644 --- a/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +++ b/dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/dev/aarch64_opt/README.md b/dev/aarch64_opt/README.md index eb10d3807..fec4a3d04 100644 --- a/dev/aarch64_opt/README.md +++ b/dev/aarch64_opt/README.md @@ -11,5 +11,5 @@ target architecture is Cortex-A55, but you can easily re-optimize the code for a by SLOTHY, by adjusting the parameters in the [Makefile](src/Makefile). -[^NeonNTT]: Becker, Hwang, Kannwischer, Yang, Yang: Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1, [https://tches.iacr.org/index.php/TCHES/article/view/9295](https://tches.iacr.org/index.php/TCHES/article/view/9295) +[^NeonNTT]: Becker, Hwang, Kannwischer, Yang, Yang: Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1, [https://eprint.iacr.org/2021/986](https://eprint.iacr.org/2021/986) [^SLOTHY_Paper]: Abdulrahman, Becker, Kannwischer, Klein: Fast and Clean: Auditable high-performance assembly via constraint solving, [https://eprint.iacr.org/2022/1303](https://eprint.iacr.org/2022/1303) diff --git a/dev/aarch64_opt/src/intt.S b/dev/aarch64_opt/src/intt.S index 95d1d1bbb..946d89ff6 100644 --- a/dev/aarch64_opt/src/intt.S +++ b/dev/aarch64_opt/src/intt.S @@ -29,7 +29,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [SLOTHY_Paper] * Fast and Clean: Auditable high-performance assembly via constraint solving diff --git a/dev/aarch64_opt/src/ntt.S b/dev/aarch64_opt/src/ntt.S index 6776909c5..79515b063 100644 --- a/dev/aarch64_opt/src/ntt.S +++ b/dev/aarch64_opt/src/ntt.S @@ -29,7 +29,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [SLOTHY_Paper] * Fast and Clean: Auditable high-performance assembly via constraint solving diff --git a/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S b/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S index 59cd0defe..1c431c7ce 100644 --- a/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +++ b/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S b/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S index 93f32de31..03162dda1 100644 --- a/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +++ b/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S b/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S index 145626aba..d1cb26da4 100644 --- a/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +++ b/dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/mlkem/native/aarch64/README.md b/mlkem/native/aarch64/README.md index bc0182cdc..1217c2d73 100644 --- a/mlkem/native/aarch64/README.md +++ b/mlkem/native/aarch64/README.md @@ -12,5 +12,5 @@ and most preprocessor directives by [`scripts/simpasm`](../../../scripts/simpasm If you want to understand how the assembly works, and/or make changes to it, consult [dev/](../../../dev). -[^NeonNTT]: Becker, Hwang, Kannwischer, Yang, Yang: Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1, [https://tches.iacr.org/index.php/TCHES/article/view/9295](https://tches.iacr.org/index.php/TCHES/article/view/9295) +[^NeonNTT]: Becker, Hwang, Kannwischer, Yang, Yang: Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1, [https://eprint.iacr.org/2021/986](https://eprint.iacr.org/2021/986) [^SLOTHY_Paper]: Abdulrahman, Becker, Kannwischer, Klein: Fast and Clean: Auditable high-performance assembly via constraint solving, [https://eprint.iacr.org/2022/1303](https://eprint.iacr.org/2022/1303) diff --git a/mlkem/native/aarch64/src/intt.S b/mlkem/native/aarch64/src/intt.S index bd51b0704..674192465 100644 --- a/mlkem/native/aarch64/src/intt.S +++ b/mlkem/native/aarch64/src/intt.S @@ -29,7 +29,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [SLOTHY_Paper] * Fast and Clean: Auditable high-performance assembly via constraint solving diff --git a/mlkem/native/aarch64/src/ntt.S b/mlkem/native/aarch64/src/ntt.S index ac241f2b7..584e84e8b 100644 --- a/mlkem/native/aarch64/src/ntt.S +++ b/mlkem/native/aarch64/src/ntt.S @@ -29,7 +29,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [SLOTHY_Paper] * Fast and Clean: Auditable high-performance assembly via constraint solving diff --git a/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S b/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S index 8496bc6cf..25d71ab2a 100644 --- a/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +++ b/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S b/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S index d87d56b14..167248b0e 100644 --- a/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +++ b/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S b/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S index e4eee6d9f..ad157597d 100644 --- a/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +++ b/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/mlkem/poly.c b/mlkem/poly.c index d2b67c54c..40d29948c 100644 --- a/mlkem/poly.c +++ b/mlkem/poly.c @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [REF] * CRYSTALS-Kyber C reference implementation diff --git a/mlkem/poly_k.c b/mlkem/poly_k.c index 21412c837..f15ab96ce 100644 --- a/mlkem/poly_k.c +++ b/mlkem/poly_k.c @@ -14,7 +14,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [REF] * CRYSTALS-Kyber C reference implementation diff --git a/proofs/hol_light/arm/mlkem/mlkem_intt.S b/proofs/hol_light/arm/mlkem/mlkem_intt.S index 11d06b72e..9595061e3 100644 --- a/proofs/hol_light/arm/mlkem/mlkem_intt.S +++ b/proofs/hol_light/arm/mlkem/mlkem_intt.S @@ -29,7 +29,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [SLOTHY_Paper] * Fast and Clean: Auditable high-performance assembly via constraint solving diff --git a/proofs/hol_light/arm/mlkem/mlkem_ntt.S b/proofs/hol_light/arm/mlkem/mlkem_ntt.S index f6c43f66a..ad502031a 100644 --- a/proofs/hol_light/arm/mlkem/mlkem_ntt.S +++ b/proofs/hol_light/arm/mlkem/mlkem_ntt.S @@ -29,7 +29,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 * * - [SLOTHY_Paper] * Fast and Clean: Auditable high-performance assembly via constraint solving diff --git a/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S b/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S index 3d8d5d66e..9e6691178 100644 --- a/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S +++ b/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S b/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S index 926009dfd..388d449d0 100644 --- a/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S +++ b/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */ diff --git a/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S b/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S index 1b346ed0c..0c7df698c 100644 --- a/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S +++ b/proofs/hol_light/arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S @@ -9,7 +9,7 @@ * - [NeonNTT] * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 * Becker, Hwang, Kannwischer, Yang, Yang - * https://tches.iacr.org/index.php/TCHES/article/view/9295 + * https://eprint.iacr.org/2021/986 */ /* Re-implementation of asymmetric base multiplication following @[NeonNTT] */