Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/hol_light.yml
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ jobs:
needs: ["mlkem_specs.ml"]
- name: mlkem_reduce
needs: ["mlkem_specs.ml"]
- name: mlkem_tobytes
needs: ["mlkem_specs.ml"]
name: x86_64 HOL Light proof for ${{ matrix.proof.name }}.S
runs-on: pqcp-x64
if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork
Expand Down
1 change: 1 addition & 0 deletions BIBLIOGRAPHY.md
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ source code and documentation.
- [proofs/hol_light/x86/mlkem/mlkem_intt.S](proofs/hol_light/x86/mlkem/mlkem_intt.S)
- [proofs/hol_light/x86/mlkem/mlkem_ntt.S](proofs/hol_light/x86/mlkem/mlkem_ntt.S)
- [proofs/hol_light/x86/mlkem/mlkem_reduce.S](proofs/hol_light/x86/mlkem/mlkem_reduce.S)
- [proofs/hol_light/x86/mlkem/mlkem_tobytes.S](proofs/hol_light/x86/mlkem/mlkem_tobytes.S)

### `SLOTHY`

Expand Down
1 change: 1 addition & 0 deletions proofs/hol_light/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ The following x86_64 assembly routines used in mlkem-native are covered:
* x86_64 inverse NTT: [mlkem_intt.S](x86/mlkem/mlkem_intt.S)
* x86_64 base multiplications: [mlkem_poly_basemul_acc_montgomery_cached_k2.S](x86/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S) [mlkem_poly_basemul_acc_montgomery_cached_k3.S](x86/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S) [mlkem_poly_basemul_acc_montgomery_cached_k4.S](x86/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S)
* x86_64 modular reduction: [mlkem_reduce.S](x86/mlkem/mlkem_reduce.S)
* x86_64 polynomial compression: [mlkem_tobytes.S](arm/mlkem/mlkem_tobytes.S)

<!--- bibliography --->
[^HYBRID]: Becker, Kannwischer: Hybrid scalar/vector implementations of Keccak and SPHINCS+ on AArch64, [https://eprint.iacr.org/2022/1243](https://eprint.iacr.org/2022/1243)
Expand Down
3 changes: 2 additions & 1 deletion proofs/hol_light/x86/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ OBJ = mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.o \
mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.o \
mlkem/mlkem_ntt.o \
mlkem/mlkem_intt.o \
mlkem/mlkem_reduce.o
mlkem/mlkem_reduce.o \
mlkem/mlkem_tobytes.o

# Build object files from assembly sources
$(OBJ): %.o : %.S
Expand Down
179 changes: 179 additions & 0 deletions proofs/hol_light/x86/mlkem/mlkem_tobytes.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*
* Copyright (c) The mlkem-native project authors
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
*/

/* References
* ==========
*
* - [REF_AVX2]
* CRYSTALS-Kyber optimized AVX2 implementation
* Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, Stehlé
* https://github.com/pq-crystals/kyber/tree/main/avx2
*/

/*
* This file is derived from the public domain
* AVX2 Kyber implementation @[REF_AVX2].
*/



/*
* WARNING: This file is auto-derived from the mlkem-native source file
* dev/x86_64/src/ntttobytes.S using scripts/simpasm. Do not modify it directly.
*/

#if defined(__ELF__)
.section .note.GNU-stack,"",@progbits
#endif

.text
.balign 4
#ifdef __APPLE__
.global _PQCP_MLKEM_NATIVE_MLKEM768_ntttobytes_avx2
_PQCP_MLKEM_NATIVE_MLKEM768_ntttobytes_avx2:
#else
.global PQCP_MLKEM_NATIVE_MLKEM768_ntttobytes_avx2
PQCP_MLKEM_NATIVE_MLKEM768_ntttobytes_avx2:
#endif

.cfi_startproc
endbr64
movl $0xd010d01, %eax # imm = 0xD010D01
vmovd %eax, %xmm0
vpbroadcastd %xmm0, %ymm0
vmovdqa (%rsi), %ymm5
vmovdqa 0x20(%rsi), %ymm6
vmovdqa 0x40(%rsi), %ymm7
vmovdqa 0x60(%rsi), %ymm8
vmovdqa 0x80(%rsi), %ymm9
vmovdqa 0xa0(%rsi), %ymm10
vmovdqa 0xc0(%rsi), %ymm11
vmovdqa 0xe0(%rsi), %ymm12
vpsllw $0xc, %ymm6, %ymm4
vpor %ymm4, %ymm5, %ymm4
vpsrlw $0x4, %ymm6, %ymm5
vpsllw $0x8, %ymm7, %ymm6
vpor %ymm5, %ymm6, %ymm5
vpsrlw $0x8, %ymm7, %ymm6
vpsllw $0x4, %ymm8, %ymm7
vpor %ymm6, %ymm7, %ymm6
vpsllw $0xc, %ymm10, %ymm7
vpor %ymm7, %ymm9, %ymm7
vpsrlw $0x4, %ymm10, %ymm8
vpsllw $0x8, %ymm11, %ymm9
vpor %ymm8, %ymm9, %ymm8
vpsrlw $0x8, %ymm11, %ymm9
vpsllw $0x4, %ymm12, %ymm10
vpor %ymm9, %ymm10, %ymm9
vpslld $0x10, %ymm5, %ymm3
vpblendw $0xaa, %ymm3, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3],ymm4[4],ymm3[5],ymm4[6],ymm3[7],ymm4[8],ymm3[9],ymm4[10],ymm3[11],ymm4[12],ymm3[13],ymm4[14],ymm3[15]
vpsrld $0x10, %ymm4, %ymm4
vpblendw $0xaa, %ymm5, %ymm4, %ymm5 # ymm5 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7],ymm4[8],ymm5[9],ymm4[10],ymm5[11],ymm4[12],ymm5[13],ymm4[14],ymm5[15]
vpslld $0x10, %ymm7, %ymm4
vpblendw $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7],ymm6[8],ymm4[9],ymm6[10],ymm4[11],ymm6[12],ymm4[13],ymm6[14],ymm4[15]
vpsrld $0x10, %ymm6, %ymm6
vpblendw $0xaa, %ymm7, %ymm6, %ymm7 # ymm7 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7],ymm6[8],ymm7[9],ymm6[10],ymm7[11],ymm6[12],ymm7[13],ymm6[14],ymm7[15]
vpslld $0x10, %ymm9, %ymm6
vpblendw $0xaa, %ymm6, %ymm8, %ymm6 # ymm6 = ymm8[0],ymm6[1],ymm8[2],ymm6[3],ymm8[4],ymm6[5],ymm8[6],ymm6[7],ymm8[8],ymm6[9],ymm8[10],ymm6[11],ymm8[12],ymm6[13],ymm8[14],ymm6[15]
vpsrld $0x10, %ymm8, %ymm8
vpblendw $0xaa, %ymm9, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm9[1],ymm8[2],ymm9[3],ymm8[4],ymm9[5],ymm8[6],ymm9[7],ymm8[8],ymm9[9],ymm8[10],ymm9[11],ymm8[12],ymm9[13],ymm8[14],ymm9[15]
vmovsldup %ymm4, %ymm8 # ymm8 = ymm4[0,0,2,2,4,4,6,6]
vpblendd $0xaa, %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[0],ymm8[1],ymm3[2],ymm8[3],ymm3[4],ymm8[5],ymm3[6],ymm8[7]
vpsrlq $0x20, %ymm3, %ymm3
vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
vmovsldup %ymm5, %ymm3 # ymm3 = ymm5[0,0,2,2,4,4,6,6]
vpblendd $0xaa, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3],ymm6[4],ymm3[5],ymm6[6],ymm3[7]
vpsrlq $0x20, %ymm6, %ymm6
vpblendd $0xaa, %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7]
vmovsldup %ymm9, %ymm6 # ymm6 = ymm9[0,0,2,2,4,4,6,6]
vpblendd $0xaa, %ymm6, %ymm7, %ymm6 # ymm6 = ymm7[0],ymm6[1],ymm7[2],ymm6[3],ymm7[4],ymm6[5],ymm7[6],ymm6[7]
vpsrlq $0x20, %ymm7, %ymm7
vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
vpunpcklqdq %ymm3, %ymm8, %ymm7 # ymm7 = ymm8[0],ymm3[0],ymm8[2],ymm3[2]
vpunpckhqdq %ymm3, %ymm8, %ymm3 # ymm3 = ymm8[1],ymm3[1],ymm8[3],ymm3[3]
vpunpcklqdq %ymm4, %ymm6, %ymm8 # ymm8 = ymm6[0],ymm4[0],ymm6[2],ymm4[2]
vpunpckhqdq %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[1],ymm4[1],ymm6[3],ymm4[3]
vpunpcklqdq %ymm9, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm9[0],ymm5[2],ymm9[2]
vpunpckhqdq %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[1],ymm9[1],ymm5[3],ymm9[3]
vperm2i128 $0x20, %ymm8, %ymm7, %ymm5 # ymm5 = ymm7[0,1],ymm8[0,1]
vperm2i128 $0x31, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[2,3],ymm8[2,3]
vperm2i128 $0x20, %ymm3, %ymm6, %ymm7 # ymm7 = ymm6[0,1],ymm3[0,1]
vperm2i128 $0x31, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[2,3],ymm3[2,3]
vperm2i128 $0x20, %ymm9, %ymm4, %ymm6 # ymm6 = ymm4[0,1],ymm9[0,1]
vperm2i128 $0x31, %ymm9, %ymm4, %ymm9 # ymm9 = ymm4[2,3],ymm9[2,3]
vmovdqu %ymm5, (%rdi)
vmovdqu %ymm7, 0x20(%rdi)
vmovdqu %ymm6, 0x40(%rdi)
vmovdqu %ymm8, 0x60(%rdi)
vmovdqu %ymm3, 0x80(%rdi)
vmovdqu %ymm9, 0xa0(%rdi)
vmovdqa 0x100(%rsi), %ymm5
vmovdqa 0x120(%rsi), %ymm6
vmovdqa 0x140(%rsi), %ymm7
vmovdqa 0x160(%rsi), %ymm8
vmovdqa 0x180(%rsi), %ymm9
vmovdqa 0x1a0(%rsi), %ymm10
vmovdqa 0x1c0(%rsi), %ymm11
vmovdqa 0x1e0(%rsi), %ymm12
vpsllw $0xc, %ymm6, %ymm4
vpor %ymm4, %ymm5, %ymm4
vpsrlw $0x4, %ymm6, %ymm5
vpsllw $0x8, %ymm7, %ymm6
vpor %ymm5, %ymm6, %ymm5
vpsrlw $0x8, %ymm7, %ymm6
vpsllw $0x4, %ymm8, %ymm7
vpor %ymm6, %ymm7, %ymm6
vpsllw $0xc, %ymm10, %ymm7
vpor %ymm7, %ymm9, %ymm7
vpsrlw $0x4, %ymm10, %ymm8
vpsllw $0x8, %ymm11, %ymm9
vpor %ymm8, %ymm9, %ymm8
vpsrlw $0x8, %ymm11, %ymm9
vpsllw $0x4, %ymm12, %ymm10
vpor %ymm9, %ymm10, %ymm9
vpslld $0x10, %ymm5, %ymm3
vpblendw $0xaa, %ymm3, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3],ymm4[4],ymm3[5],ymm4[6],ymm3[7],ymm4[8],ymm3[9],ymm4[10],ymm3[11],ymm4[12],ymm3[13],ymm4[14],ymm3[15]
vpsrld $0x10, %ymm4, %ymm4
vpblendw $0xaa, %ymm5, %ymm4, %ymm5 # ymm5 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7],ymm4[8],ymm5[9],ymm4[10],ymm5[11],ymm4[12],ymm5[13],ymm4[14],ymm5[15]
vpslld $0x10, %ymm7, %ymm4
vpblendw $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7],ymm6[8],ymm4[9],ymm6[10],ymm4[11],ymm6[12],ymm4[13],ymm6[14],ymm4[15]
vpsrld $0x10, %ymm6, %ymm6
vpblendw $0xaa, %ymm7, %ymm6, %ymm7 # ymm7 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7],ymm6[8],ymm7[9],ymm6[10],ymm7[11],ymm6[12],ymm7[13],ymm6[14],ymm7[15]
vpslld $0x10, %ymm9, %ymm6
vpblendw $0xaa, %ymm6, %ymm8, %ymm6 # ymm6 = ymm8[0],ymm6[1],ymm8[2],ymm6[3],ymm8[4],ymm6[5],ymm8[6],ymm6[7],ymm8[8],ymm6[9],ymm8[10],ymm6[11],ymm8[12],ymm6[13],ymm8[14],ymm6[15]
vpsrld $0x10, %ymm8, %ymm8
vpblendw $0xaa, %ymm9, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm9[1],ymm8[2],ymm9[3],ymm8[4],ymm9[5],ymm8[6],ymm9[7],ymm8[8],ymm9[9],ymm8[10],ymm9[11],ymm8[12],ymm9[13],ymm8[14],ymm9[15]
vmovsldup %ymm4, %ymm8 # ymm8 = ymm4[0,0,2,2,4,4,6,6]
vpblendd $0xaa, %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[0],ymm8[1],ymm3[2],ymm8[3],ymm3[4],ymm8[5],ymm3[6],ymm8[7]
vpsrlq $0x20, %ymm3, %ymm3
vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
vmovsldup %ymm5, %ymm3 # ymm3 = ymm5[0,0,2,2,4,4,6,6]
vpblendd $0xaa, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3],ymm6[4],ymm3[5],ymm6[6],ymm3[7]
vpsrlq $0x20, %ymm6, %ymm6
vpblendd $0xaa, %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7]
vmovsldup %ymm9, %ymm6 # ymm6 = ymm9[0,0,2,2,4,4,6,6]
vpblendd $0xaa, %ymm6, %ymm7, %ymm6 # ymm6 = ymm7[0],ymm6[1],ymm7[2],ymm6[3],ymm7[4],ymm6[5],ymm7[6],ymm6[7]
vpsrlq $0x20, %ymm7, %ymm7
vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
vpunpcklqdq %ymm3, %ymm8, %ymm7 # ymm7 = ymm8[0],ymm3[0],ymm8[2],ymm3[2]
vpunpckhqdq %ymm3, %ymm8, %ymm3 # ymm3 = ymm8[1],ymm3[1],ymm8[3],ymm3[3]
vpunpcklqdq %ymm4, %ymm6, %ymm8 # ymm8 = ymm6[0],ymm4[0],ymm6[2],ymm4[2]
vpunpckhqdq %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[1],ymm4[1],ymm6[3],ymm4[3]
vpunpcklqdq %ymm9, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm9[0],ymm5[2],ymm9[2]
vpunpckhqdq %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[1],ymm9[1],ymm5[3],ymm9[3]
vperm2i128 $0x20, %ymm8, %ymm7, %ymm5 # ymm5 = ymm7[0,1],ymm8[0,1]
vperm2i128 $0x31, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[2,3],ymm8[2,3]
vperm2i128 $0x20, %ymm3, %ymm6, %ymm7 # ymm7 = ymm6[0,1],ymm3[0,1]
vperm2i128 $0x31, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[2,3],ymm3[2,3]
vperm2i128 $0x20, %ymm9, %ymm4, %ymm6 # ymm6 = ymm4[0,1],ymm9[0,1]
vperm2i128 $0x31, %ymm9, %ymm4, %ymm9 # ymm9 = ymm4[2,3],ymm9[2,3]
vmovdqu %ymm5, 0xc0(%rdi)
vmovdqu %ymm7, 0xe0(%rdi)
vmovdqu %ymm6, 0x100(%rdi)
vmovdqu %ymm8, 0x120(%rdi)
vmovdqu %ymm3, 0x140(%rdi)
vmovdqu %ymm9, 0x160(%rdi)
retq
.cfi_endproc
4 changes: 4 additions & 0 deletions proofs/hol_light/x86/proofs/dump_bytecode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ print_string "==== bytecode end =====================================\n\n";;
print_string "=== bytecode start: x86/mlkem/mlkem_reduce.o ===\n";;
print_literal_from_elf "x86/mlkem/mlkem_reduce.o";;
print_string "==== bytecode end =====================================\n\n";;

print_string "=== bytecode start: x86/mlkem/mlkem_tobytes.o ===\n";;
print_literal_from_elf "x86/mlkem/mlkem_tobytes.o";;
print_string "==== bytecode end =====================================\n\n";;
6 changes: 3 additions & 3 deletions proofs/hol_light/x86/proofs/mlkem_intt.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,7 @@ let MLKEM_INTT_CORRECT = prove
==> let zi =
read(memory :> bytes16(word_add a (word(2 * i)))) s in
(ival zi == avx2_inverse_ntt (ival o x) i) (mod &3329) /\
abs(ival zi) <= &26632))
abs(ival zi) <= &26631))
(MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
MAYCHANGE [ZMM0; ZMM1; ZMM2; ZMM3; ZMM4; ZMM5; ZMM6; ZMM7; ZMM8;
ZMM9; ZMM10; ZMM11; ZMM12; ZMM13; ZMM14; ZMM15] ,,
Expand Down Expand Up @@ -1187,7 +1187,7 @@ let MLKEM_INTT_NOIBT_SUBROUTINE_CORRECT = prove
==> let zi =
read(memory :> bytes16(word_add a (word(2 * i)))) s in
(ival zi == avx2_inverse_ntt (ival o x) i) (mod &3329) /\
abs(ival zi) <= &26632))
abs(ival zi) <= &26631))
(MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
MAYCHANGE [memory :> bytes(a, 512)])`,
let TWEAK_CONV = ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV in
Expand Down Expand Up @@ -1217,7 +1217,7 @@ let MLKEM_INTT_SUBROUTINE_CORRECT = prove
==> let zi =
read(memory :> bytes16(word_add a (word(2 * i)))) s in
(ival zi == avx2_inverse_ntt (ival o x) i) (mod &3329) /\
abs(ival zi) <= &26632))
abs(ival zi) <= &26631))
(MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
MAYCHANGE [memory :> bytes(a, 512)])`,
let TWEAK_CONV = ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV in
Expand Down
Loading