Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mldsa/mldsa_native.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@
#undef mld_polyvec_matrix_pointwise_montgomery
#undef mld_polyveck
#undef mld_polyveck_add
#undef mld_polyveck_add_error
#undef mld_polyveck_caddq
#undef mld_polyveck_chknorm
#undef mld_polyveck_decompose
Expand Down
221 changes: 150 additions & 71 deletions mldsa/src/polyvec.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,47 @@
#include "poly_kl.h"
#include "polyvec.h"

#if !defined(MLD_USE_NATIVE_NTT_CUSTOM_ORDER)
/* This namespacing is not done at the top to avoid a naming conflict
* with native backends, which are currently not yet namespaced. */
#define mld_poly_permute_bitrev_to_custom \
MLD_NAMESPACE_KL(mld_poly_permute_bitrev_to_custom)
#define mld_matrix_permute_bitrev_to_custom \
MLD_NAMESPACE_KL(mld_matrix_permute_bitrev_to_custom)

/* Helper function to ensure that the polynomial entries in the output
* of mld_polyvec_matrix_expand use the standard (bitreversed) ordering
* of coefficients.
* No-op unless a native backend with a custom ordering is used.
*/
static void mld_matrix_permute_bitrev_to_custom(mld_polyvecl mat[MLDSA_K])
__contract__(
/* We don't specify that this should be a permutation, but only
* that it does not change the bound established at the end of
* mld_polyvec_matrix_expand.
*/
requires(memory_no_alias(mat, MLDSA_K * sizeof(mld_polyvecl)))
requires(forall(k1, 0, MLDSA_K, forall(l1, 0, MLDSA_L,
array_bound(mat[k1].vec[l1].coeffs, 0, MLDSA_N, 0, MLDSA_Q))))
assigns(object_whole(mat))
ensures(forall(k1, 0, MLDSA_K, forall(l1, 0, MLDSA_L,
array_bound(mat[k1].vec[l1].coeffs, 0, MLDSA_N, 0, MLDSA_Q))))
)
{
#if defined(MLD_USE_NATIVE_NTT_CUSTOM_ORDER)
unsigned int i, j;
for (i = 0; i < MLDSA_K; i++)
{
for (j = 0; j < MLDSA_L; j++)
{
mld_poly_permute_bitrev_to_custom(mat[i].vec[j].coeffs);
}
}

#else /* MLD_USE_NATIVE_NTT_CUSTOM_ORDER */

/* Nothing to do */
((void)mat);

static MLD_INLINE void mld_poly_permute_bitrev_to_custom(int32_t data[MLDSA_N])
{
((void)data);
}
#endif /* !MLD_USE_NATIVE_NTT_CUSTOM_ORDER */
}


MLD_INTERNAL_API
Expand All @@ -45,20 +75,12 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
* of the same parent object.
*/

MLD_ALIGN uint8_t seed_ext[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];

for (j = 0; j < 4; j++)
__loop__(
assigns(j, object_whole(seed_ext))
invariant(j <= 4)
)
{
mld_memcpy(seed_ext[j], rho, MLDSA_SEEDBYTES);
}
MLD_ALIGN uint8_t single_seed[MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
MLD_ALIGN uint8_t batched_seeds[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
/* Sample 4 matrix entries a time. */
for (i = 0; i < (MLDSA_K * MLDSA_L / 4) * 4; i += 4)
__loop__(
assigns(i, j, object_whole(seed_ext), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
assigns(i, j, object_whole(batched_seeds), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
invariant(i <= (MLDSA_K * MLDSA_L / 4) * 4 && i % 4 == 0)
/* vectors 0 .. i / MLDSA_L are completely sampled */
invariant(forall(k1, 0, i / MLDSA_L, forall(l1, 0, MLDSA_L,
Expand All @@ -70,28 +92,31 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
{
for (j = 0; j < 4; j++)
__loop__(
assigns(j, object_whole(seed_ext))
assigns(j, object_whole(batched_seeds))
invariant(j <= 4)
)
{
uint8_t x = (uint8_t)((i + j) / MLDSA_L);
uint8_t y = (uint8_t)((i + j) % MLDSA_L);

seed_ext[j][MLDSA_SEEDBYTES + 0] = y;
seed_ext[j][MLDSA_SEEDBYTES + 1] = x;
mld_memcpy(batched_seeds[j], rho, MLDSA_SEEDBYTES);
batched_seeds[j][MLDSA_SEEDBYTES + 0] = y;
batched_seeds[j][MLDSA_SEEDBYTES + 1] = x;
}

mld_poly_uniform_4x(&mat[i / MLDSA_L].vec[i % MLDSA_L],
&mat[(i + 1) / MLDSA_L].vec[(i + 1) % MLDSA_L],
&mat[(i + 2) / MLDSA_L].vec[(i + 2) % MLDSA_L],
&mat[(i + 3) / MLDSA_L].vec[(i + 3) % MLDSA_L],
seed_ext);
batched_seeds);
}

mld_memcpy(single_seed, rho, MLDSA_SEEDBYTES);

/* For MLDSA_K=6, MLDSA_L=5, process the last two entries individually */
while (i < MLDSA_K * MLDSA_L)
__loop__(
assigns(i, object_whole(seed_ext), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
assigns(i, object_whole(single_seed), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
invariant(i <= MLDSA_K * MLDSA_L)
/* vectors 0 .. i / MLDSA_L are completely sampled */
invariant(forall(k1, 0, i / MLDSA_L, forall(l1, 0, MLDSA_L,
Expand All @@ -105,27 +130,17 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
uint8_t y = (uint8_t)(i % MLDSA_L);
mld_poly *this_poly = &mat[i / MLDSA_L].vec[i % MLDSA_L];

seed_ext[0][MLDSA_SEEDBYTES + 0] = y;
seed_ext[0][MLDSA_SEEDBYTES + 1] = x;
single_seed[MLDSA_SEEDBYTES + 0] = y;
single_seed[MLDSA_SEEDBYTES + 1] = x;

mld_poly_uniform(this_poly, seed_ext[0]);
mld_poly_uniform(this_poly, single_seed);
i++;
}

/*
* The public matrix is generated in NTT domain. If the native backend
* uses a custom order in NTT domain, permute A accordingly.
*/
for (i = 0; i < MLDSA_K; i++)
{
for (j = 0; j < MLDSA_L; j++)
{
mld_poly_permute_bitrev_to_custom(mat[i].vec[j].coeffs);
}
}
mld_matrix_permute_bitrev_to_custom(mat);

/* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
mld_zeroize(seed_ext, sizeof(seed_ext));
mld_zeroize(single_seed, sizeof(single_seed));
}

MLD_INTERNAL_API
Expand Down Expand Up @@ -161,7 +176,7 @@ void mld_polyvecl_uniform_gamma1(mld_polyvecl *v,
/* Safety: nonce is at most ((UINT16_MAX - MLDSA_L) / MLDSA_L), and, hence,
* this cast is safe. See NONCE_UB comment in sign.c. */
nonce = (uint16_t)(MLDSA_L * nonce);
/* Now, nonce <= UINT16_MAX - (MLDSA_L - 1), so the casts below are safe. */
/* Now, nonce <= UINT16_MAX - (MLDSA_L - 1), so the casts below are safe. */
#if MLDSA_L == 4
mld_poly_uniform_gamma1_4x(&v->vec[0], &v->vec[1], &v->vec[2], &v->vec[3],
seed, nonce, (uint16_t)(nonce + 1),
Expand Down Expand Up @@ -219,7 +234,6 @@ void mld_polyvecl_add(mld_polyvecl *u, const mld_polyvecl *v)
invariant(i <= MLDSA_L)
invariant(forall(k0, i, MLDSA_L,
forall(k1, 0, MLDSA_N, u->vec[k0].coeffs[k1] == loop_entry(*u).vec[k0].coeffs[k1])))
invariant(forall(k4, 0, i, forall(k5, 0, MLDSA_N, u->vec[k4].coeffs[k5] == loop_entry(*u).vec[k4].coeffs[k5] + v->vec[k4].coeffs[k5])))
invariant(forall(k6, 0, i, array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX)))
)
{
Expand Down Expand Up @@ -287,87 +301,131 @@ void mld_polyvecl_pointwise_poly_montgomery(mld_polyvecl *r, const mld_poly *a,
mld_assert_abs_bound_2d(r->vec, MLDSA_L, MLDSA_N, MLDSA_Q);
}

#if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4) && \
MLD_CONFIG_PARAMETER_SET == 44

MLD_INTERNAL_API
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
const mld_polyvecl *v)
{
#if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4) && \
MLD_CONFIG_PARAMETER_SET == 44
/* TODO: proof */
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
mld_polyvecl_pointwise_acc_montgomery_l4_native(
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
(const int32_t(*)[MLDSA_N])v->vec);
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
}

#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5) && \
MLD_CONFIG_PARAMETER_SET == 65

void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
const mld_polyvecl *v)
{
/* TODO: proof */
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
mld_polyvecl_pointwise_acc_montgomery_l5_native(
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
(const int32_t(*)[MLDSA_N])v->vec);
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
}

#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7) && \
MLD_CONFIG_PARAMETER_SET == 87
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
const mld_polyvecl *v)
{
/* TODO: proof */
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
mld_polyvecl_pointwise_acc_montgomery_l7_native(
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
(const int32_t(*)[MLDSA_N])v->vec);
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
#else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
MLD_CONFIG_PARAMETER_SET == 44) && \
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
MLD_CONFIG_PARAMETER_SET == 65) && \
MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
MLD_CONFIG_PARAMETER_SET == 87 */
unsigned int i, j;
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
/* The first input is bounded by [0, Q-1] inclusive
* The second input is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
}

#else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
MLD_CONFIG_PARAMETER_SET == 44) && \
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
MLD_CONFIG_PARAMETER_SET == 65) && \
MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
MLD_CONFIG_PARAMETER_SET == 87 */

#define mld_pointwise_sum_of_products \
MLD_NAMESPACE_KL(mld_pointwise_sum_of_products)
static int64_t mld_pointwise_sum_of_products(const mld_polyvecl *u,
const mld_polyvecl *v,
unsigned int i)
__contract__(
requires(memory_no_alias(u, sizeof(mld_polyvecl)))
requires(memory_no_alias(v, sizeof(mld_polyvecl)))
requires(i < MLDSA_N)
requires(forall(l0, 0, MLDSA_L,
array_bound(u->vec[l0].coeffs, 0, MLDSA_N, 0, MLDSA_Q)))
requires(forall(l1, 0, MLDSA_L,
array_abs_bound(v->vec[l1].coeffs, 0, MLDSA_N, MLD_NTT_BOUND)))
ensures(return_value >= -(int64_t) MLDSA_L*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
ensures(return_value <= (int64_t) MLDSA_L*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
)
{
/* Input vector u is bounded by [0, Q-1] inclusive
* Input vector v is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
* safely accumulate in 64-bits without intermediate reductions as
* MLDSA_L * (MLD_NTT_BOUND-1) * (Q-1) < INT64_MAX
*
* The worst case is ML-DSA-87: 7 * (9Q-1) * (Q-1) < 2**52
* (and likewise for negative values)
*/

int64_t t = 0;
unsigned int j;
for (j = 0; j < MLDSA_L; j++)
__loop__(
assigns(j, t)
invariant(j <= MLDSA_L)
invariant(t >= -(int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
invariant(t <= (int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
)
{
const int64_t u64 = (int64_t)u->vec[j].coeffs[i];
const int64_t v64 = (int64_t)v->vec[j].coeffs[i];
/* Helper assertions for proof efficiency. Do not remove */
mld_assert(u64 >= 0 && u64 < MLDSA_Q);
mld_assert(v64 > -MLD_NTT_BOUND && v64 < MLD_NTT_BOUND);
t += (u64 * v64);
}
return t;
}

void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
const mld_polyvecl *v)
{
unsigned int i;

mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
for (i = 0; i < MLDSA_N; i++)
__loop__(
assigns(i, j, object_whole(w))
assigns(i, object_whole(w))
invariant(i <= MLDSA_N)
invariant(array_abs_bound(w->coeffs, 0, i, MLDSA_Q))
)
{
int64_t t = 0;
int32_t r;
for (j = 0; j < MLDSA_L; j++)
__loop__(
assigns(j, t)
invariant(j <= MLDSA_L)
invariant(t >= -(int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
invariant(t <= (int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
)
{
t += (int64_t)u->vec[j].coeffs[i] * v->vec[j].coeffs[i];
}

r = mld_montgomery_reduce(t);
w->coeffs[i] = r;
w->coeffs[i] =
mld_montgomery_reduce(mld_pointwise_sum_of_products(u, v, i));
}

mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
}

#endif /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
MLD_CONFIG_PARAMETER_SET == 44) && \
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
MLD_CONFIG_PARAMETER_SET == 65) && \
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
MLD_CONFIG_PARAMETER_SET == 87) */
}

MLD_INTERNAL_API
uint32_t mld_polyvecl_chknorm(const mld_polyvecl *v, int32_t bound)
Expand Down Expand Up @@ -450,7 +508,6 @@ void mld_polyveck_add(mld_polyveck *u, const mld_polyveck *v)
invariant(i <= MLDSA_K)
invariant(forall(k0, i, MLDSA_K,
forall(k1, 0, MLDSA_N, u->vec[k0].coeffs[k1] == loop_entry(*u).vec[k0].coeffs[k1])))
invariant(forall(k4, 0, i, forall(k5, 0, MLDSA_N, u->vec[k4].coeffs[k5] == loop_entry(*u).vec[k4].coeffs[k5] + v->vec[k4].coeffs[k5])))
invariant(forall(k6, 0, i, array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX)))
)
{
Expand All @@ -459,6 +516,28 @@ void mld_polyveck_add(mld_polyveck *u, const mld_polyveck *v)
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX);
}

/* Reference: We use destructive version (output=first input) to avoid
* reasoning about aliasing in the CBMC specification */
MLD_INTERNAL_API
void mld_polyveck_add_error(mld_polyveck *u, const mld_polyveck *v)
{
unsigned int i;

for (i = 0; i < MLDSA_K; ++i)
__loop__(
assigns(i, memory_slice(u, sizeof(mld_polyveck)))
invariant(i <= MLDSA_K)
invariant(forall(k0, i, MLDSA_K,
forall(k1, 0, MLDSA_N, u->vec[k0].coeffs[k1] == loop_entry(*u).vec[k0].coeffs[k1])))
invariant(forall(k6, 0, i, array_abs_bound(u->vec[k6].coeffs, 0, MLDSA_N, MLDSA_Q)))
)
{
mld_poly_add(&u->vec[i], &v->vec[i]);
}
mld_assert_abs_bound_2d(u->vec, MLDSA_L, MLDSA_N, MLDSA_Q);
}


MLD_INTERNAL_API
void mld_polyveck_sub(mld_polyveck *u, const mld_polyveck *v)
{
Expand Down
Loading
Loading