pq-code-package
diff --git a/‎BIBLIOGRAPHY.md‎
Lines changed: 34 additions & 0 deletions b/‎BIBLIOGRAPHY.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎mldsa/polyvec.c‎
Lines changed: 96 additions & 52 deletions b/‎mldsa/polyvec.c‎
Lines changed: 96 additions & 52 deletions
diff --git a/‎mldsa/polyvec.h‎
Lines changed: 0 additions & 1 deletion b/‎mldsa/polyvec.h‎
Lines changed: 0 additions & 1 deletion
@@ -14,6 +14,10 @@ source code and documentation.
   - National Institute of Standards and Technology
 * URL: https://csrc.nist.gov/projects/cryptographic-module-validation-program/fips-140-3-ig-announcements
 * Referenced from:
+  - [examples/bring_your_own_fips202/mldsa_native/config.h](examples/bring_your_own_fips202/mldsa_native/config.h)
+  - [examples/bring_your_own_fips202/mldsa_native/sign.c](examples/bring_your_own_fips202/mldsa_native/sign.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/config.h](examples/bring_your_own_fips202_static/mldsa_native/config.h)
+  - [examples/bring_your_own_fips202_static/mldsa_native/sign.c](examples/bring_your_own_fips202_static/mldsa_native/sign.c)
   - [integration/liboqs/config_aarch64.h](integration/liboqs/config_aarch64.h)
   - [integration/liboqs/config_c.h](integration/liboqs/config_c.h)
   - [integration/liboqs/config_x86_64.h](integration/liboqs/config_x86_64.h)
@@ -46,6 +50,24 @@ source code and documentation.
 * URL: https://csrc.nist.gov/pubs/fips/204/final
 * Referenced from:
   - [README.md](README.md)
+  - [examples/bring_your_own_fips202/mldsa_native/common.h](examples/bring_your_own_fips202/mldsa_native/common.h)
+  - [examples/bring_your_own_fips202/mldsa_native/config.h](examples/bring_your_own_fips202/mldsa_native/config.h)
+  - [examples/bring_your_own_fips202/mldsa_native/ntt.h](examples/bring_your_own_fips202/mldsa_native/ntt.h)
+  - [examples/bring_your_own_fips202/mldsa_native/poly.c](examples/bring_your_own_fips202/mldsa_native/poly.c)
+  - [examples/bring_your_own_fips202/mldsa_native/poly_kl.c](examples/bring_your_own_fips202/mldsa_native/poly_kl.c)
+  - [examples/bring_your_own_fips202/mldsa_native/polyvec.c](examples/bring_your_own_fips202/mldsa_native/polyvec.c)
+  - [examples/bring_your_own_fips202/mldsa_native/rounding.h](examples/bring_your_own_fips202/mldsa_native/rounding.h)
+  - [examples/bring_your_own_fips202/mldsa_native/sign.c](examples/bring_your_own_fips202/mldsa_native/sign.c)
+  - [examples/bring_your_own_fips202/mldsa_native/sign.h](examples/bring_your_own_fips202/mldsa_native/sign.h)
+  - [examples/bring_your_own_fips202_static/mldsa_native/common.h](examples/bring_your_own_fips202_static/mldsa_native/common.h)
+  - [examples/bring_your_own_fips202_static/mldsa_native/config.h](examples/bring_your_own_fips202_static/mldsa_native/config.h)
+  - [examples/bring_your_own_fips202_static/mldsa_native/ntt.h](examples/bring_your_own_fips202_static/mldsa_native/ntt.h)
+  - [examples/bring_your_own_fips202_static/mldsa_native/poly.c](examples/bring_your_own_fips202_static/mldsa_native/poly.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/poly_kl.c](examples/bring_your_own_fips202_static/mldsa_native/poly_kl.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/polyvec.c](examples/bring_your_own_fips202_static/mldsa_native/polyvec.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/rounding.h](examples/bring_your_own_fips202_static/mldsa_native/rounding.h)
+  - [examples/bring_your_own_fips202_static/mldsa_native/sign.c](examples/bring_your_own_fips202_static/mldsa_native/sign.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/sign.h](examples/bring_your_own_fips202_static/mldsa_native/sign.h)
   - [mldsa/common.h](mldsa/common.h)
   - [mldsa/config.h](mldsa/config.h)
   - [mldsa/fips202/fips202.c](mldsa/fips202/fips202.c)
@@ -132,6 +154,12 @@ source code and documentation.
 * URL: https://github.com/pq-crystals/dilithium/tree/master/ref
 * Referenced from:
   - [README.md](README.md)
+  - [examples/bring_your_own_fips202/mldsa_native/ntt.c](examples/bring_your_own_fips202/mldsa_native/ntt.c)
+  - [examples/bring_your_own_fips202/mldsa_native/poly.c](examples/bring_your_own_fips202/mldsa_native/poly.c)
+  - [examples/bring_your_own_fips202/mldsa_native/poly_kl.c](examples/bring_your_own_fips202/mldsa_native/poly_kl.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/ntt.c](examples/bring_your_own_fips202_static/mldsa_native/ntt.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/poly.c](examples/bring_your_own_fips202_static/mldsa_native/poly.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/poly_kl.c](examples/bring_your_own_fips202_static/mldsa_native/poly_kl.c)
   - [mldsa/ntt.c](mldsa/ntt.c)
   - [mldsa/poly.c](mldsa/poly.c)
   - [mldsa/poly_kl.c](mldsa/poly_kl.c)
@@ -207,6 +235,8 @@ source code and documentation.
   - Damien Stehlé
 * URL: https://pq-crystals.org/dilithium/data/dilithium-specification-round3-20210208.pdf
 * Referenced from:
+  - [examples/bring_your_own_fips202/mldsa_native/sign.c](examples/bring_your_own_fips202/mldsa_native/sign.c)
+  - [examples/bring_your_own_fips202_static/mldsa_native/sign.c](examples/bring_your_own_fips202_static/mldsa_native/sign.c)
   - [mldsa/sign.c](mldsa/sign.c)
 
 ### `libmceliece`
@@ -217,6 +247,8 @@ source code and documentation.
   - Tung Chou
 * URL: https://lib.mceliece.org/
 * Referenced from:
+  - [examples/bring_your_own_fips202/mldsa_native/ct.h](examples/bring_your_own_fips202/mldsa_native/ct.h)
+  - [examples/bring_your_own_fips202_static/mldsa_native/ct.h](examples/bring_your_own_fips202_static/mldsa_native/ct.h)
   - [mldsa/ct.h](mldsa/ct.h)
 
 ### `m1cycles`
@@ -249,6 +281,8 @@ source code and documentation.
   - Daniel J. Bernstein
 * URL: https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/hqbtIGFKIpU/m/H14H0wOlBgAJ
 * Referenced from:
+  - [examples/bring_your_own_fips202/mldsa_native/ct.h](examples/bring_your_own_fips202/mldsa_native/ct.h)
+  - [examples/bring_your_own_fips202_static/mldsa_native/ct.h](examples/bring_your_own_fips202_static/mldsa_native/ct.h)
   - [mldsa/ct.h](mldsa/ct.h)
 
 ### `supercop`
 
@@ -44,22 +44,14 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
    * of the same parent object.
    */
 
-  MLD_ALIGN uint8_t seed_ext[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
-
-  for (j = 0; j < 4; j++)
-  __loop__(
-    assigns(j, object_whole(seed_ext))
-    invariant(j <= 4)
-  )
-  {
-    mld_memcpy(seed_ext[j], rho, MLDSA_SEEDBYTES);
-  }
+  MLD_ALIGN uint8_t single_seed[MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
 
 #if !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY)
+  MLD_ALIGN uint8_t batched_seeds[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
   /* Sample 4 matrix entries a time. */
   for (i = 0; i < (MLDSA_K * MLDSA_L / 4) * 4; i += 4)
   __loop__(
-    assigns(i, j, object_whole(seed_ext), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
+    assigns(i, j, object_whole(batched_seeds), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
     invariant(i <= (MLDSA_K * MLDSA_L / 4) * 4 && i % 4 == 0)
     /* vectors 0 .. i / MLDSA_L are completely sampled */
     invariant(forall(k1, 0, i / MLDSA_L, forall(l1, 0, MLDSA_L,
@@ -71,31 +63,38 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
   {
     for (j = 0; j < 4; j++)
     __loop__(
-      assigns(j, object_whole(seed_ext))
+      assigns(j, object_whole(batched_seeds))
       invariant(j <= 4)
     )
     {
       uint8_t x = (uint8_t)((i + j) / MLDSA_L);
       uint8_t y = (uint8_t)((i + j) % MLDSA_L);
 
-      seed_ext[j][MLDSA_SEEDBYTES + 0] = y;
-      seed_ext[j][MLDSA_SEEDBYTES + 1] = x;
+      mld_memcpy(batched_seeds[j], rho, MLDSA_SEEDBYTES);
+      batched_seeds[j][MLDSA_SEEDBYTES + 0] = y;
+      batched_seeds[j][MLDSA_SEEDBYTES + 1] = x;
     }
 
     mld_poly_uniform_4x(&mat[i / MLDSA_L].vec[i % MLDSA_L],
                         &mat[(i + 1) / MLDSA_L].vec[(i + 1) % MLDSA_L],
                         &mat[(i + 2) / MLDSA_L].vec[(i + 2) % MLDSA_L],
                         &mat[(i + 3) / MLDSA_L].vec[(i + 3) % MLDSA_L],
-                        seed_ext);
+                        batched_seeds);
   }
+
+  /* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
+  mld_zeroize(batched_seeds, sizeof(batched_seeds));
+
 #else  /* !MLD_CONFIG_SERIAL_FIPS202_ONLY */
   i = 0;
 #endif /* MLD_CONFIG_SERIAL_FIPS202_ONLY */
 
+  mld_memcpy(single_seed, rho, MLDSA_SEEDBYTES);
+
   /* For MLDSA_K=6, MLDSA_L=5, process the last two entries individually */
   while (i < MLDSA_K * MLDSA_L)
   __loop__(
-    assigns(i, object_whole(seed_ext), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
+    assigns(i, object_whole(single_seed), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
     invariant(i <= MLDSA_K * MLDSA_L)
     /* vectors 0 .. i / MLDSA_L are completely sampled */
     invariant(forall(k1, 0, i / MLDSA_L, forall(l1, 0, MLDSA_L,
@@ -109,27 +108,31 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
     uint8_t y = (uint8_t)(i % MLDSA_L);
     mld_poly *this_poly = &mat[i / MLDSA_L].vec[i % MLDSA_L];
 
-    seed_ext[0][MLDSA_SEEDBYTES + 0] = y;
-    seed_ext[0][MLDSA_SEEDBYTES + 1] = x;
+    single_seed[MLDSA_SEEDBYTES + 0] = y;
+    single_seed[MLDSA_SEEDBYTES + 1] = x;
 
-    mld_poly_uniform(this_poly, seed_ext[0]);
+    mld_poly_uniform(this_poly, single_seed);
     i++;
   }
 
   /*
    * The public matrix is generated in NTT domain. If the native backend
-   * uses a custom order in NTT domain, permute A accordingly.
+   * uses a custom order in NTT domain, permute A accordingly. This does
+   * not affect the bounds on the coefficients, so we ignore this for CBMC
+   * to simplify proof.
    */
+#ifndef CBMC
   for (i = 0; i < MLDSA_K; i++)
   {
     for (j = 0; j < MLDSA_L; j++)
     {
       mld_poly_permute_bitrev_to_custom(mat[i].vec[j].coeffs);
     }
   }
+#endif /* !CBMC */
 
   /* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
-  mld_zeroize(seed_ext, sizeof(seed_ext));
+  mld_zeroize(single_seed, sizeof(single_seed));
 }
 
 MLD_INTERNAL_API
@@ -234,7 +237,6 @@ void mld_polyvecl_add(mld_polyvecl *u, const mld_polyvecl *v)
     invariant(i <= MLDSA_L)
     invariant(forall(k0, i, MLDSA_L,
               forall(k1, 0, MLDSA_N, u->vec[k0].coeffs[k1] == loop_entry(*u).vec[k0].coeffs[k1])))
-    invariant(forall(k4, 0, i, forall(k5, 0, MLDSA_N, u->vec[k4].coeffs[k5] == loop_entry(*u).vec[k4].coeffs[k5] + v->vec[k4].coeffs[k5])))
     invariant(forall(k6, 0, i, array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX)))
   )
   {
@@ -302,87 +304,129 @@ void mld_polyvecl_pointwise_poly_montgomery(mld_polyvecl *r, const mld_poly *a,
   mld_assert_abs_bound_2d(r->vec, MLDSA_L, MLDSA_N, MLDSA_Q);
 }
 
+#if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4) && \
+    MLD_CONFIG_PARAMETER_SET == 44
+
 MLD_INTERNAL_API
 void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
                                            const mld_polyvecl *v)
 {
-#if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4) && \
-    MLD_CONFIG_PARAMETER_SET == 44
   /* TODO: proof */
   mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
   mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
   mld_polyvecl_pointwise_acc_montgomery_l4_native(
       w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
       (const int32_t(*)[MLDSA_N])v->vec);
   mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
+}
+
 #elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5) && \
     MLD_CONFIG_PARAMETER_SET == 65
+
+void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
+                                           const mld_polyvecl *v)
+{
   /* TODO: proof */
   mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
   mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
   mld_polyvecl_pointwise_acc_montgomery_l5_native(
       w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
       (const int32_t(*)[MLDSA_N])v->vec);
   mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
+}
+
 #elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7) && \
     MLD_CONFIG_PARAMETER_SET == 87
+void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
+                                           const mld_polyvecl *v)
+{
   /* TODO: proof */
   mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
   mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
   mld_polyvecl_pointwise_acc_montgomery_l7_native(
       w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
       (const int32_t(*)[MLDSA_N])v->vec);
   mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
-#else  /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
-          MLD_CONFIG_PARAMETER_SET == 44) &&                       \
-          !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
-          MLD_CONFIG_PARAMETER_SET == 65) &&                       \
-          MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 &&   \
-          MLD_CONFIG_PARAMETER_SET == 87 */
-  unsigned int i, j;
-  mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
-  mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
-  /* The first input is bounded by [0, Q-1] inclusive
-   * The second input is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
+}
+
+#else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
+         MLD_CONFIG_PARAMETER_SET == 44) &&                       \
+         !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
+         MLD_CONFIG_PARAMETER_SET == 65) &&                       \
+         MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 &&   \
+         MLD_CONFIG_PARAMETER_SET == 87 */
+
+static int64_t mld_pointwise_sum_of_products(const mld_polyvecl *u,
+                                             const mld_polyvecl *v,
+                                             unsigned int i)
+__contract__(
+  requires(memory_no_alias(u, sizeof(mld_polyvecl)))
+  requires(memory_no_alias(v, sizeof(mld_polyvecl)))
+  requires(i < MLDSA_N)
+  requires(forall(l0, 0, MLDSA_L,
+                  array_bound(u->vec[l0].coeffs, 0, MLDSA_N, 0, MLDSA_Q)))
+  requires(forall(l1, 0, MLDSA_L,
+    array_abs_bound(v->vec[l1].coeffs, 0, MLDSA_N, MLD_NTT_BOUND)))
+  ensures(return_value >= -(int64_t) MLDSA_L*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
+  ensures(return_value <=  (int64_t) MLDSA_L*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
+)
+{
+  /* Input vector u is bounded by [0, Q-1] inclusive
+   * Input vector v is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
    * safely accumulate in 64-bits without intermediate reductions as
    * MLDSA_L * (MLD_NTT_BOUND-1) * (Q-1) < INT64_MAX
    *
    * The worst case is ML-DSA-87: 7 * (9Q-1) * (Q-1) < 2**52
    * (and likewise for negative values)
    */
 
+  int64_t t = 0;
+  unsigned int j;
+  for (j = 0; j < MLDSA_L; j++)
+  __loop__(
+    assigns(j, t)
+    invariant(j <= MLDSA_L)
+    invariant(t >= -(int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
+    invariant(t <= (int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
+  )
+  {
+    const int64_t u64 = (int64_t)u->vec[j].coeffs[i];
+    const int64_t v64 = (int64_t)v->vec[j].coeffs[i];
+    /* Helper assertions for proof efficiency. Do not remove */
+    mld_assert(u64 >= 0 && u64 < MLDSA_Q);
+    mld_assert(v64 > -MLD_NTT_BOUND && v64 < MLD_NTT_BOUND);
+    t += (u64 * v64);
+  }
+  return t;
+}
+
+void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
+                                           const mld_polyvecl *v)
+{
+  unsigned int i;
+
+  mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
+  mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
   for (i = 0; i < MLDSA_N; i++)
   __loop__(
-    assigns(i, j, object_whole(w))
+    assigns(i, object_whole(w))
     invariant(i <= MLDSA_N)
     invariant(array_abs_bound(w->coeffs, 0, i, MLDSA_Q))
   )
   {
-    int64_t t = 0;
-    int32_t r;
-    for (j = 0; j < MLDSA_L; j++)
-    __loop__(
-      assigns(j, t)
-      invariant(j <= MLDSA_L)
-      invariant(t >= -(int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
-      invariant(t <= (int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
-    )
-    {
-      t += (int64_t)u->vec[j].coeffs[i] * v->vec[j].coeffs[i];
-    }
-
-    r = mld_montgomery_reduce(t);
-    w->coeffs[i] = r;
+    w->coeffs[i] =
+        mld_montgomery_reduce(mld_pointwise_sum_of_products(u, v, i));
   }
 
   mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
+}
+
 #endif /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
           MLD_CONFIG_PARAMETER_SET == 44) &&                       \
           !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
           MLD_CONFIG_PARAMETER_SET == 65) &&                       \
           !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
           MLD_CONFIG_PARAMETER_SET == 87) */
-}
 
 MLD_INTERNAL_API
 uint32_t mld_polyvecl_chknorm(const mld_polyvecl *v, int32_t bound)
 
@@ -85,7 +85,6 @@ __contract__(
   requires(forall(k0, 0, MLDSA_L, forall(k1, 0, MLDSA_N, (int64_t) u->vec[k0].coeffs[k1] + v->vec[k0].coeffs[k1] < REDUCE32_DOMAIN_MAX)))
   requires(forall(k2, 0, MLDSA_L, forall(k3, 0, MLDSA_N, (int64_t) u->vec[k2].coeffs[k3] + v->vec[k2].coeffs[k3] >= INT32_MIN)))
   assigns(object_whole(u))
-  ensures(forall(k4, 0, MLDSA_L, forall(k5, 0, MLDSA_N, u->vec[k4].coeffs[k5] == old(*u).vec[k4].coeffs[k5] + v->vec[k4].coeffs[k5])))
   ensures(forall(k6, 0, MLDSA_L,
                  array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX)))
 );