pq-code-package · dkostic · Nov 25, 2025 · Nov 26, 2025 · Nov 27, 2025 · Nov 27, 2025
@@ -12,6 +12,9 @@ on:
      - 'proofs/hol_light/arm/Makefile'
      - 'proofs/hol_light/arm/**/*.S'
      - 'proofs/hol_light/arm/**/*.ml'
+     - 'proofs/hol_light/x86/Makefile'
+     - 'proofs/hol_light/x86/**/*.S'
+     - 'proofs/hol_light/x86/**/*.ml'
      - 'flake.nix'
      - 'flake.lock'
      - 'nix/hol_light/*'
@@ -23,6 +26,9 @@ on:
       - 'proofs/hol_light/arm/Makefile'
       - 'proofs/hol_light/arm/**/*.S'
       - 'proofs/hol_light/arm/**/*.ml'
+      - 'proofs/hol_light/x86/Makefile'
+      - 'proofs/hol_light/x86/**/*.S'
+      - 'proofs/hol_light/x86/**/*.ml'
       - 'flake.nix'
       - 'flake.lock'
       - 'nix/hol_light/*'
@@ -142,3 +148,88 @@ jobs:
           nix-shell: 'hol_light'
           script: |
             tests hol_light -p ${{ matrix.proof.name }} --verbose
+
+  # x86_64 proofs
+  hol_light_bytecode_x86_64:
+    name: HOL-Light bytecode check
+    runs-on: pqcp-x64
+    if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork
+    steps:
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          fetch-depth: 0
+      - uses: ./.github/actions/setup-shell
+        with:
+          gh_token: ${{ secrets.GITHUB_TOKEN }}
+          nix-shell: 'hol_light'
+          script: |
+            autogen --update-hol-light-bytecode --dry-run
+  # TODO: fix the interactive mode to work for both x86 and arm.
+  # hol_light_interactive_x86_64:
+  #   name: HOL-Light interactive shell test
+  #   runs-on: pqcp-x64
+  #   needs: [ hol_light_bytecode_x86_64 ]
+  #   if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork
+  #   steps:
+  #     - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+  #       with:
+  #         fetch-depth: 0
+  #     - uses: ./.github/actions/setup-shell
+  #       with:
+  #         gh_token: ${{ secrets.GITHUB_TOKEN }}
+  #         nix-shell: 'hol_light'
+  #         script: |
+  #           make -C proofs/hol_light/x86 mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.o
+  #           echo 'needs "proofs/mlkem_poly_basemul_acc_montgomery_cached_k2.ml";;' | hol.sh
+  hol_light_proofs_x86_64:
+    needs: [ hol_light_bytecode_x86_64 ]
+    strategy:
+      fail-fast: false
+      matrix:
+        proof:
+          # Dependencies on {name}.{S,ml} are implicit
+          - name: mlkem_poly_basemul_acc_montgomery_cached_k2
+            needs: ["mlkem_specs.ml"]
+          - name: mlkem_poly_basemul_acc_montgomery_cached_k3
+            needs: ["mlkem_specs.ml"]
+          - name: mlkem_poly_basemul_acc_montgomery_cached_k4
+            needs: ["mlkem_specs.ml"]
+    name: HOL Light proof for ${{ matrix.proof.name }}.S
+    runs-on: pqcp-x64
+    if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork
+    steps:
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          fetch-depth: 0
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+      - name: Check if dependencies changed
+        id: check_run
+        shell: bash
+        run: |
+          run_needed=0
+          changed_files="${{ steps.changed-files.outputs.all_changed_files }}"
+          dependencies="${{ join(matrix.proof.needs, ' ') }} ${{ format('{0}.S {0}.ml', matrix.proof.name) }}"
+          for changed in $changed_files; do
+            for needs in $dependencies; do
+               if [[ "$changed" == *"$needs" ]]; then
+                 run_needed=1
+               fi
+            done
+          done
+
+          # Always re-run upon change to nix files for HOL-Light
+          if [[ "$changed_files" == *"nix/"* ]] || [[ "$changed_files" == *"hol_light.yml"* ]] || [[ "$changed_files" == *"flake"* ]] || [[ "$changed_files" == *"proofs/hol_light/x86/Makefile"* ]]; then
+            run_needed=1
+          fi
+
+          echo "run_needed=${run_needed}" >> $GITHUB_OUTPUT
+      - uses: ./.github/actions/setup-shell
+        if: |
+          steps.check_run.outputs.run_needed == '1'
+        with:
+          gh_token: ${{ secrets.GITHUB_TOKEN }}
+          nix-shell: 'hol_light'
+          script: |
+            tests hol_light -p ${{ matrix.proof.name }} --verbose
@@ -113,7 +113,7 @@ source code and documentation.
   - [mlkem/src/fips202/native/aarch64/auto.h](mlkem/src/fips202/native/aarch64/auto.h)
   - [mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S](mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S)
   - [mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S](mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S)
-  - [proofs/hol_light/arm/README.md](proofs/hol_light/arm/README.md)
+  - [proofs/hol_light/README.md](proofs/hol_light/README.md)
   - [proofs/hol_light/arm/mlkem/keccak_f1600_x1_v84a.S](proofs/hol_light/arm/mlkem/keccak_f1600_x1_v84a.S)
   - [proofs/hol_light/arm/mlkem/keccak_f1600_x2_v84a.S](proofs/hol_light/arm/mlkem/keccak_f1600_x2_v84a.S)
 
@@ -248,7 +248,7 @@ source code and documentation.
 * URL: https://github.com/slothy-optimizer/slothy/
 * Referenced from:
   - [dev/README.md](dev/README.md)
-  - [proofs/hol_light/arm/README.md](proofs/hol_light/arm/README.md)
+  - [proofs/hol_light/README.md](proofs/hol_light/README.md)
 
 ### `SLOTHY_Paper`
 

@@ -7,19 +7,21 @@ hol_light.overrideAttrs (old: {
     export HOLLIGHT_DIR="$1/lib/hol_light"
     export PATH="$1/lib/hol_light:$PATH"
   '';
-  version = "unstable-2025-09-22";
+  version = "unstable-2025-11-17";
   src = fetchFromGitHub {
     owner = "jrh13";
     repo = "hol-light";
-    rev = "bed58fa74649fa74015176f8f90e77f7af5cf8e3";
-    hash = "sha256-QDubbUUChvv04239BdcKPSU+E2gdSzqAWfAETK2Xtg0=";
+    rev = "08bcac75772d37c2447a90c90d1dff9ab415f217";
+    hash = "sha256-kYOzGW7uQGOM/b+JPWQfpqqtgMmMku/BkN58WZTtokU=";
   };
   patches = [
     ./0005-Configure-hol-sh-for-mlkem-native.patch
     ./0006-Add-findlib-to-ocaml-hol.patch
   ];
   propagatedBuildInputs = old.propagatedBuildInputs ++ old.nativeBuildInputs ++ [ ocamlPackages.pcre2 ledit ];
   buildPhase = ''
+    patchShebangs pa_j/chooser.sh
+    patchShebangs update_database/chooser.sh
     HOLLIGHT_USE_MODULE=1 make hol.sh
     patchShebangs hol.sh
     HOLLIGHT_USE_MODULE=1 make

@@ -2,12 +2,12 @@
 { stdenv, fetchFromGitHub, writeText, ... }:
 stdenv.mkDerivation rec {
   pname = "s2n_bignum";
-  version = "2ab2252b8505e58a7c3392f8ad823782032b61e7";
+  version = "84a604317b94cbca9f14c7b2b771afc2827ab99f";
   src = fetchFromGitHub {
     owner = "awslabs";
     repo = "s2n-bignum";
     rev = "${version}";
-    hash = "sha256-7lil3jAFo5NiyNOSBYZcRjduXkotV3x4PlxXSKt63M8=";
+    hash = "sha256-J2tVZ2x23ZHP+ZNkbiUqyaci5bu4zNfkXuRemnuB+N0=";
   };
   setupHook = writeText "setup-hook.sh" ''
     export S2N_BIGNUM_DIR="$1"

@@ -2,12 +2,12 @@
 
 # HOL Light functional correctness proofs
 
-This directory contains functional correctness proofs for all AArch64 assembly routines
-used in mlkem-native. The proofs were largely developed by John Harrison
+This directory contains functional correctness proofs for all AArch64 and some x86_64 assembly routines
+used in mlkem-native. The proofs were largely developed by John Harrison and Dusan Kostic
 and are written in the [HOL Light](https://hol-light.github.io/) theorem
 prover, utilizing the assembly verification infrastructure from [s2n-bignum](https://github.com/awslabs/s2n-bignum).
 
-Each function is proved in a separate `.ml` file in [proofs/](proofs). Each file
+Each function is proved in a separate `.ml` file in [arm/proofs/](arm/proofs) and [x86/proofs/](x86/proofs). Each file
 contains the byte code being verified, as well as the specification that is being
 proved.
 
@@ -16,7 +16,7 @@ proved.
 Proofs are 'post-hoc' in the sense that HOL-Light/s2n-bignum operate on the final object code. In particular, the means by which the code was generated (including the [SLOTHY](https://github.com/slothy-optimizer/slothy/) superoptimizer) need not be trusted.
 
 Specifications are essentially [Hoare triples](https://en.wikipedia.org/wiki/Hoare_logic), with the noteworthy difference that the program is implicit as the content of memory at the PC; which is asserted to
-be the code under verification as part of the precondition. For example, the following is the specification of the `poly_reduce` function:
+be the code under verification as part of the precondition. For example, the following is the specification of the aarch64 `poly_reduce` function:
 
 ```ocaml
  (* For all (abbreviated by `!` in HOL):
@@ -67,6 +67,11 @@ from mlkem-native's base directory. Then
 ```bash
 make -C proofs/hol_light/arm
 ```
+or
+
+```bash
+make -C proofs/hol_light/x86
+```
 
 will build and run the proofs. Note that this make take hours even on powerful machines.
 
@@ -77,23 +82,27 @@ For convenience, you can also use `tests hol_light` which wraps the `make` invoc
 All AArch64 assembly routines used in mlkem-native are covered. Those are:
 
 - ML-KEM Arithmetic:
-  * AArch64 forward NTT: [mlkem_ntt.S](mlkem/mlkem_ntt.S)
-  * AArch64 inverse NTT: [mlkem_intt.S](mlkem/mlkem_intt.S)
-  * AArch64 base multiplications: [mlkem_poly_basemul_acc_montgomery_cached_k2.S](mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S) [mlkem_poly_basemul_acc_montgomery_cached_k3.S](mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S) [mlkem_poly_basemul_acc_montgomery_cached_k4.S](mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S)
-  * AArch64 conversion to Montgomery form: [mlkem_poly_tomont.S](mlkem/mlkem_poly_tomont.S)
-  * AArch64 modular reduction: [mlkem_poly_reduce.S](mlkem/mlkem_poly_reduce.S)
-  * AArch64 'multiplication cache' computation: [mlkem_poly_mulcache_compute.S](mlkem/mlkem_poly_mulcache_compute.S)
-  * AArch64 rejection sampling: [mlkem_rej_uniform.S](mlkem/mlkem_rej_uniform.S)
-  * AArch64 polynomial compresseion: [mlkem_poly_tobytes.S](mlkem/mlkem_poly_tobytes.S)
+  * AArch64 forward NTT: [mlkem_ntt.S](arm/mlkem/mlkem_ntt.S)
+  * AArch64 inverse NTT: [mlkem_intt.S](arm/mlkem/mlkem_intt.S)
+  * AArch64 base multiplications: [mlkem_poly_basemul_acc_montgomery_cached_k2.S](arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S) [mlkem_poly_basemul_acc_montgomery_cached_k3.S](arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S) [mlkem_poly_basemul_acc_montgomery_cached_k4.S](arm/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S)
+  * AArch64 conversion to Montgomery form: [mlkem_poly_tomont.S](arm/mlkem/mlkem_poly_tomont.S)
+  * AArch64 modular reduction: [mlkem_poly_reduce.S](arm/mlkem/mlkem_poly_reduce.S)
+  * AArch64 'multiplication cache' computation: [mlkem_poly_mulcache_compute.S](arm/mlkem/mlkem_poly_mulcache_compute.S)
+  * AArch64 rejection sampling: [mlkem_rej_uniform.S](arm/mlkem/mlkem_rej_uniform.S)
+  * AArch64 polynomial compresseion: [mlkem_poly_tobytes.S](arm/mlkem/mlkem_poly_tobytes.S)
 - FIPS202:
-  * Keccak-F1600 using lazy rotations[^HYBRID]: [keccak_f1600_x1_scalar.S](mlkem/keccak_f1600_x1_scalar.S)
-  * Keccak-F1600 using v8.4-A SHA3 instructions: [keccak_f1600_x1_v84a.S](mlkem/keccak_f1600_x1_v84a.S)
-  * 2-fold Keccak-F1600 using v8.4-A SHA3 instructions: [keccak_f1600_x2_v84a.S](mlkem/keccak_f1600_x2_v84a.S)
-  * 'Hybrid' 4-fold Keccak-F1600 using scalar and v8-A Neon instructions: [keccak_f1600_x4_v8a_scalar.S](mlkem/keccak_f1600_x4_v8a_scalar.S)
-  * 'Triple hybrid' 4-fold Keccak-F1600 using scalar, v8-A Neon and v8.4-A+SHA3 Neon instructions:[keccak_f1600_x4_v8a_v84a_scalar.S](mlkem/keccak_f1600_x4_v8a_v84a_scalar.S)
+  * Keccak-F1600 using lazy rotations[^HYBRID]: [keccak_f1600_x1_scalar.S](arm/mlkem/keccak_f1600_x1_scalar.S)
+  * Keccak-F1600 using v8.4-A SHA3 instructions: [keccak_f1600_x1_v84a.S](arm/mlkem/keccak_f1600_x1_v84a.S)
+  * 2-fold Keccak-F1600 using v8.4-A SHA3 instructions: [keccak_f1600_x2_v84a.S](arm/mlkem/keccak_f1600_x2_v84a.S)
+  * 'Hybrid' 4-fold Keccak-F1600 using scalar and v8-A Neon instructions: [keccak_f1600_x4_v8a_scalar.S](arm/mlkem/keccak_f1600_x4_v8a_scalar.S)
+  * 'Triple hybrid' 4-fold Keccak-F1600 using scalar, v8-A Neon and v8.4-A+SHA3 Neon instructions:[keccak_f1600_x4_v8a_v84a_scalar.S](arm/mlkem/keccak_f1600_x4_v8a_v84a_scalar.S)
 
 The NTT and invNTT functions are super-optimized using [SLOTHY](https://github.com/slothy-optimizer/slothy/).
 
+The following x86_64 assembly routines used in mlkem-native are covered:
+- ML-KEM Arithmetic:
+  * x86_64 base multiplications: [mlkem_poly_basemul_acc_montgomery_cached_k2.S](x86/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.S) [mlkem_poly_basemul_acc_montgomery_cached_k3.S](x86/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.S) [mlkem_poly_basemul_acc_montgomery_cached_k4.S](x86/mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.S)
+
 <!--- bibliography --->
 [^HYBRID]: Becker, Kannwischer: Hybrid scalar/vector implementations of Keccak and SPHINCS+ on AArch64, [https://eprint.iacr.org/2022/1243](https://eprint.iacr.org/2022/1243)
 [^SLOTHY]: Abdulrahman, Becker, Kannwischer, Klein: SLOTHY superoptimizer, [https://github.com/slothy-optimizer/slothy/](https://github.com/slothy-optimizer/slothy/)
@@ -0,0 +1,136 @@
+#############################################################################
+# Copyright (c) The mlkem-native project authors
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0
+#############################################################################
+
+#
+# This Makefile is derived from the Makefile x86/Makefile in s2n-bignum.
+# - Remove all s2n-bignum proofs and tutorial, add mlkem-native proofs
+# - Minor path modifications to support base theories from s2n-bignum
+#   to reside in a separate read-only directory
+#
+
+.DEFAULT_GOAL := run_proofs
+
+OSTYPE_RESULT=$(shell uname -s)
+ARCHTYPE_RESULT=$(shell uname -m)
+
+SRC ?= $(S2N_BIGNUM_DIR)
+SRC_X86 ?= $(SRC)/x86
+
+# Add explicit language input parameter to cpp, otherwise the use of #n for
+# numeric literals in x86 code is a problem when used inside #define macros
+# since normally that means stringization.
+#
+# Some clang-based preprocessors seem to behave differently, and get confused
+# by single-quote characters in comments, so we eliminate // comments first.
+
+ifeq ($(OSTYPE_RESULT),Darwin)
+PREPROCESS=sed -e 's/\/\/.*//' | $(CC) -E -xassembler-with-cpp -
+else
+PREPROCESS=$(CC) -E -xassembler-with-cpp -
+endif
+
+# Generally GNU-type assemblers are happy with multiple instructions on
+# a line, but we split them up anyway just in case.
+
+SPLIT=tr ';' '\n'
+
+# If actually on an x86_64 machine, just use the assembler (as). Otherwise
+# use a cross-assembling version so that the code can still be assembled
+# and the proofs checked against the object files (though you won't be able
+# to run code without additional emulation infrastructure). For the clang
+# version on OS X we just add the "-arch x86_64" option. For the Linux/gcc
+# toolchain we assume the presence of the special cross-assembler. This
+# can be installed via something like:
+#
+#  sudo apt-get install binutils-x86-64-linux-gnu
+
+ifeq ($(ARCHTYPE_RESULT),x86_64)
+ASSEMBLE=as
+OBJDUMP=objdump -d
+else
+ifeq ($(OSTYPE_RESULT),Darwin)
+ASSEMBLE=as -arch x86_64
+OBJDUMP=otool -tvV
+else
+ASSEMBLE=x86_64-linux-gnu-as
+OBJDUMP=x86_64-linux-gnu-objdump -d
+endif
+endif
+
+OBJ = mlkem/mlkem_poly_basemul_acc_montgomery_cached_k2.o \
+      mlkem/mlkem_poly_basemul_acc_montgomery_cached_k3.o \
+      mlkem/mlkem_poly_basemul_acc_montgomery_cached_k4.o
+
+# Build object files from assembly sources
+$(OBJ): %.o : %.S
+	@echo "Preparing $@ ..."
+	@echo "AS: `$(ASSEMBLE) --version`"
+	@echo "OBJDUMP: `$(OBJDUMP) --version`"
+	$(Q)[ -d $(@D) ] || mkdir -p $(@D)
+	cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -o $@ -
+	# MacOS may generate relocations in non-text sections that break
+	# the object file parser in HOL-Light
+	strip $@
+
+clean:; rm -f */*.o */*/*.o */*.correct */*.native
+
+# Proof-related parts
+#
+# The proof files are all independent, though each one loads the
+# same common infrastructure "base.ml". So you can potentially
+# run the proofs in parallel for more speed, e.g.
+#
+#    nohup make -j 16 proofs &
+#
+# If you build hol-light yourself (see https://github.com/jrh13/hol-light)
+# in your home directory, and do "make" inside the subdirectory hol-light,
+# then the following HOLDIR setting should be right:
+
+HOLDIR?=$(HOLLIGHTDIR)
+HOLLIGHT:=$(HOLDIR)/hol.sh
+
+BASE?=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
+
+PROOF_BINS = $(OBJ:.o=.native)
+PROOF_LOGS = $(OBJ:.o=.correct)
+
+# Build precompiled binary for dumping bytecodes
+proofs/dump_bytecode.native: proofs/dump_bytecode.ml $(OBJ)
+	./proofs/build-proof.sh $(BASE)/$< "$(HOLLIGHT)" "$@"
+
+# Build precompiled native binaries of HOL Light proofs
+
+.SECONDEXPANSION:
+%.native: proofs/$$(*F).ml %.o ; ./proofs/build-proof.sh $(BASE)/$< "$(HOLLIGHT)" "$@"
+
+# Run them and print the standard output+error at *.correct
+%.correct: %.native
+	$< 2>&1 | tee $@
+	@if (grep -i "error:\|exception:" "$@" >/dev/null); then  \
+		echo "$< had errors!";			          \
+		exit 1;					          \
+	else							  \
+		echo "$< OK";					  \
+	fi
+
+build_proofs: $(PROOF_BINS);
+run_proofs: build_proofs $(PROOF_LOGS);
+
+proofs: run_proofs ; $(SRC)/tools/count-proofs.sh .
+
+dump_bytecode: proofs/dump_bytecode.native
+	./$<
+
+.PHONY: proofs build_proofs run_proofs sematest clean dump_bytecode
+
+# Always run sematest regardless of dependency check
+FORCE: ;
+# Always use max. # of cores because in Makefile one cannot get the passed number of -j.
+# A portable way of getting the number of max. cores:
+# https://stackoverflow.com/a/23569003/1488216
+NUM_CORES_FOR_SEMATEST = $(shell getconf _NPROCESSORS_ONLN)
+sematest: FORCE $(OBJ) $(SRC_X86)/proofs/simulator_iclasses.ml $(SRC_X86)/proofs/simulator.native
+	$(SRC)/tools/run-sematest.sh x86 $(NUM_CORES_FOR_SEMATEST)
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Copyright (c) The mlkem-native project authors
+# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
+#
+# This tiny script just lists the names of source files for which
+# we have a spec and proof in HOL-Light.
+
+ROOT=$(git rev-parse --show-toplevel)
+cd $ROOT
+ls -1 proofs/hol_light/x86/mlkem/*.S | cut -d '/' -f 5 | sed 's/\.S//'