Skip to content

Commit 2c3bf8b

Browse files
committed
Simplification and refactoring to restore proof speed and stability.
1. Weaken post-condition and loop invariant in polyvecl_add(). The stonger post-condition was unncessary. 2. Simplify polyvec_matrix_expand(). Small performance loss here since batched_seeds[] is (re-) initialized every time. This is bit slower but removes a loop statement entirely. 3. Refactor polyvec_pointwise_acc_montgomery() by splitting core "sum of products" calculation into a distinct local function mld_pointwise_sum_of_products(). Add proof of the latter. Proof time for parameter set 87 now 4 minutes (real-time) and 40 minutes (user time) with 64 cores on an r7g instance. Signed-off-by: Rod Chapman <[email protected]>
1 parent bb357ee commit 2c3bf8b

File tree

5 files changed

+167
-54
lines changed

5 files changed

+167
-54
lines changed

mldsa/polyvec.c

Lines changed: 96 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -44,22 +44,14 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
4444
* of the same parent object.
4545
*/
4646

47-
MLD_ALIGN uint8_t seed_ext[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
48-
49-
for (j = 0; j < 4; j++)
50-
__loop__(
51-
assigns(j, object_whole(seed_ext))
52-
invariant(j <= 4)
53-
)
54-
{
55-
mld_memcpy(seed_ext[j], rho, MLDSA_SEEDBYTES);
56-
}
47+
MLD_ALIGN uint8_t single_seed[MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
5748

5849
#if !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY)
50+
MLD_ALIGN uint8_t batched_seeds[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
5951
/* Sample 4 matrix entries a time. */
6052
for (i = 0; i < (MLDSA_K * MLDSA_L / 4) * 4; i += 4)
6153
__loop__(
62-
assigns(i, j, object_whole(seed_ext), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
54+
assigns(i, j, object_whole(batched_seeds), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
6355
invariant(i <= (MLDSA_K * MLDSA_L / 4) * 4 && i % 4 == 0)
6456
/* vectors 0 .. i / MLDSA_L are completely sampled */
6557
invariant(forall(k1, 0, i / MLDSA_L, forall(l1, 0, MLDSA_L,
@@ -71,31 +63,38 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
7163
{
7264
for (j = 0; j < 4; j++)
7365
__loop__(
74-
assigns(j, object_whole(seed_ext))
66+
assigns(j, object_whole(batched_seeds))
7567
invariant(j <= 4)
7668
)
7769
{
7870
uint8_t x = (uint8_t)((i + j) / MLDSA_L);
7971
uint8_t y = (uint8_t)((i + j) % MLDSA_L);
8072

81-
seed_ext[j][MLDSA_SEEDBYTES + 0] = y;
82-
seed_ext[j][MLDSA_SEEDBYTES + 1] = x;
73+
mld_memcpy(batched_seeds[j], rho, MLDSA_SEEDBYTES);
74+
batched_seeds[j][MLDSA_SEEDBYTES + 0] = y;
75+
batched_seeds[j][MLDSA_SEEDBYTES + 1] = x;
8376
}
8477

8578
mld_poly_uniform_4x(&mat[i / MLDSA_L].vec[i % MLDSA_L],
8679
&mat[(i + 1) / MLDSA_L].vec[(i + 1) % MLDSA_L],
8780
&mat[(i + 2) / MLDSA_L].vec[(i + 2) % MLDSA_L],
8881
&mat[(i + 3) / MLDSA_L].vec[(i + 3) % MLDSA_L],
89-
seed_ext);
82+
batched_seeds);
9083
}
84+
85+
/* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
86+
mld_zeroize(batched_seeds, sizeof(batched_seeds));
87+
9188
#else /* !MLD_CONFIG_SERIAL_FIPS202_ONLY */
9289
i = 0;
9390
#endif /* MLD_CONFIG_SERIAL_FIPS202_ONLY */
9491

92+
mld_memcpy(single_seed, rho, MLDSA_SEEDBYTES);
93+
9594
/* For MLDSA_K=6, MLDSA_L=5, process the last two entries individually */
9695
while (i < MLDSA_K * MLDSA_L)
9796
__loop__(
98-
assigns(i, object_whole(seed_ext), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
97+
assigns(i, object_whole(single_seed), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
9998
invariant(i <= MLDSA_K * MLDSA_L)
10099
/* vectors 0 .. i / MLDSA_L are completely sampled */
101100
invariant(forall(k1, 0, i / MLDSA_L, forall(l1, 0, MLDSA_L,
@@ -109,27 +108,31 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
109108
uint8_t y = (uint8_t)(i % MLDSA_L);
110109
mld_poly *this_poly = &mat[i / MLDSA_L].vec[i % MLDSA_L];
111110

112-
seed_ext[0][MLDSA_SEEDBYTES + 0] = y;
113-
seed_ext[0][MLDSA_SEEDBYTES + 1] = x;
111+
single_seed[MLDSA_SEEDBYTES + 0] = y;
112+
single_seed[MLDSA_SEEDBYTES + 1] = x;
114113

115-
mld_poly_uniform(this_poly, seed_ext[0]);
114+
mld_poly_uniform(this_poly, single_seed);
116115
i++;
117116
}
118117

119118
/*
120119
* The public matrix is generated in NTT domain. If the native backend
121-
* uses a custom order in NTT domain, permute A accordingly.
120+
* uses a custom order in NTT domain, permute A accordingly. This does
121+
* not affect the bounds on the coefficients, so we ignore this for CBMC
122+
* to simplify proof.
122123
*/
124+
#ifndef CBMC
123125
for (i = 0; i < MLDSA_K; i++)
124126
{
125127
for (j = 0; j < MLDSA_L; j++)
126128
{
127129
mld_poly_permute_bitrev_to_custom(mat[i].vec[j].coeffs);
128130
}
129131
}
132+
#endif /* !CBMC */
130133

131134
/* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
132-
mld_zeroize(seed_ext, sizeof(seed_ext));
135+
mld_zeroize(single_seed, sizeof(single_seed));
133136
}
134137

135138
MLD_INTERNAL_API
@@ -234,7 +237,6 @@ void mld_polyvecl_add(mld_polyvecl *u, const mld_polyvecl *v)
234237
invariant(i <= MLDSA_L)
235238
invariant(forall(k0, i, MLDSA_L,
236239
forall(k1, 0, MLDSA_N, u->vec[k0].coeffs[k1] == loop_entry(*u).vec[k0].coeffs[k1])))
237-
invariant(forall(k4, 0, i, forall(k5, 0, MLDSA_N, u->vec[k4].coeffs[k5] == loop_entry(*u).vec[k4].coeffs[k5] + v->vec[k4].coeffs[k5])))
238240
invariant(forall(k6, 0, i, array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX)))
239241
)
240242
{
@@ -302,87 +304,129 @@ void mld_polyvecl_pointwise_poly_montgomery(mld_polyvecl *r, const mld_poly *a,
302304
mld_assert_abs_bound_2d(r->vec, MLDSA_L, MLDSA_N, MLDSA_Q);
303305
}
304306

307+
#if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4) && \
308+
MLD_CONFIG_PARAMETER_SET == 44
309+
305310
MLD_INTERNAL_API
306311
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
307312
const mld_polyvecl *v)
308313
{
309-
#if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4) && \
310-
MLD_CONFIG_PARAMETER_SET == 44
311314
/* TODO: proof */
312315
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
313316
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
314317
mld_polyvecl_pointwise_acc_montgomery_l4_native(
315318
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
316319
(const int32_t(*)[MLDSA_N])v->vec);
317320
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
321+
}
322+
318323
#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5) && \
319324
MLD_CONFIG_PARAMETER_SET == 65
325+
326+
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
327+
const mld_polyvecl *v)
328+
{
320329
/* TODO: proof */
321330
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
322331
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
323332
mld_polyvecl_pointwise_acc_montgomery_l5_native(
324333
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
325334
(const int32_t(*)[MLDSA_N])v->vec);
326335
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
336+
}
337+
327338
#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7) && \
328339
MLD_CONFIG_PARAMETER_SET == 87
340+
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
341+
const mld_polyvecl *v)
342+
{
329343
/* TODO: proof */
330344
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
331345
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
332346
mld_polyvecl_pointwise_acc_montgomery_l7_native(
333347
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
334348
(const int32_t(*)[MLDSA_N])v->vec);
335349
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
336-
#else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
337-
MLD_CONFIG_PARAMETER_SET == 44) && \
338-
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
339-
MLD_CONFIG_PARAMETER_SET == 65) && \
340-
MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
341-
MLD_CONFIG_PARAMETER_SET == 87 */
342-
unsigned int i, j;
343-
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
344-
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
345-
/* The first input is bounded by [0, Q-1] inclusive
346-
* The second input is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
350+
}
351+
352+
#else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
353+
MLD_CONFIG_PARAMETER_SET == 44) && \
354+
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
355+
MLD_CONFIG_PARAMETER_SET == 65) && \
356+
MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
357+
MLD_CONFIG_PARAMETER_SET == 87 */
358+
359+
static int64_t mld_pointwise_sum_of_products(const mld_polyvecl *u,
360+
const mld_polyvecl *v,
361+
unsigned int i)
362+
__contract__(
363+
requires(memory_no_alias(u, sizeof(mld_polyvecl)))
364+
requires(memory_no_alias(v, sizeof(mld_polyvecl)))
365+
requires(i < MLDSA_N)
366+
requires(forall(l0, 0, MLDSA_L,
367+
array_bound(u->vec[l0].coeffs, 0, MLDSA_N, 0, MLDSA_Q)))
368+
requires(forall(l1, 0, MLDSA_L,
369+
array_abs_bound(v->vec[l1].coeffs, 0, MLDSA_N, MLD_NTT_BOUND)))
370+
ensures(return_value >= -(int64_t) MLDSA_L*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
371+
ensures(return_value <= (int64_t) MLDSA_L*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
372+
)
373+
{
374+
/* Input vector u is bounded by [0, Q-1] inclusive
375+
* Input vector v is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
347376
* safely accumulate in 64-bits without intermediate reductions as
348377
* MLDSA_L * (MLD_NTT_BOUND-1) * (Q-1) < INT64_MAX
349378
*
350379
* The worst case is ML-DSA-87: 7 * (9Q-1) * (Q-1) < 2**52
351380
* (and likewise for negative values)
352381
*/
353382

383+
int64_t t = 0;
384+
unsigned int j;
385+
for (j = 0; j < MLDSA_L; j++)
386+
__loop__(
387+
assigns(j, t)
388+
invariant(j <= MLDSA_L)
389+
invariant(t >= -(int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
390+
invariant(t <= (int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
391+
)
392+
{
393+
const int64_t u64 = (int64_t)u->vec[j].coeffs[i];
394+
const int64_t v64 = (int64_t)v->vec[j].coeffs[i];
395+
/* Helper assertions for proof efficiency. Do not remove */
396+
mld_assert(u64 >= 0 && u64 < MLDSA_Q);
397+
mld_assert(v64 > -MLD_NTT_BOUND && v64 < MLD_NTT_BOUND);
398+
t += (u64 * v64);
399+
}
400+
return t;
401+
}
402+
403+
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
404+
const mld_polyvecl *v)
405+
{
406+
unsigned int i;
407+
408+
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
409+
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
354410
for (i = 0; i < MLDSA_N; i++)
355411
__loop__(
356-
assigns(i, j, object_whole(w))
412+
assigns(i, object_whole(w))
357413
invariant(i <= MLDSA_N)
358414
invariant(array_abs_bound(w->coeffs, 0, i, MLDSA_Q))
359415
)
360416
{
361-
int64_t t = 0;
362-
int32_t r;
363-
for (j = 0; j < MLDSA_L; j++)
364-
__loop__(
365-
assigns(j, t)
366-
invariant(j <= MLDSA_L)
367-
invariant(t >= -(int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
368-
invariant(t <= (int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
369-
)
370-
{
371-
t += (int64_t)u->vec[j].coeffs[i] * v->vec[j].coeffs[i];
372-
}
373-
374-
r = mld_montgomery_reduce(t);
375-
w->coeffs[i] = r;
417+
w->coeffs[i] =
418+
mld_montgomery_reduce(mld_pointwise_sum_of_products(u, v, i));
376419
}
377420

378421
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
422+
}
423+
379424
#endif /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
380425
MLD_CONFIG_PARAMETER_SET == 44) && \
381426
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
382427
MLD_CONFIG_PARAMETER_SET == 65) && \
383428
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
384429
MLD_CONFIG_PARAMETER_SET == 87) */
385-
}
386430

387431
MLD_INTERNAL_API
388432
uint32_t mld_polyvecl_chknorm(const mld_polyvecl *v, int32_t bound)

mldsa/polyvec.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ __contract__(
8585
requires(forall(k0, 0, MLDSA_L, forall(k1, 0, MLDSA_N, (int64_t) u->vec[k0].coeffs[k1] + v->vec[k0].coeffs[k1] < REDUCE32_DOMAIN_MAX)))
8686
requires(forall(k2, 0, MLDSA_L, forall(k3, 0, MLDSA_N, (int64_t) u->vec[k2].coeffs[k3] + v->vec[k2].coeffs[k3] >= INT32_MIN)))
8787
assigns(object_whole(u))
88-
ensures(forall(k4, 0, MLDSA_L, forall(k5, 0, MLDSA_N, u->vec[k4].coeffs[k5] == old(*u).vec[k4].coeffs[k5] + v->vec[k4].coeffs[k5])))
8988
ensures(forall(k6, 0, MLDSA_L,
9089
array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX)))
9190
);
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright (c) The mldsa-native project authors
2+
# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3+
4+
include ../Makefile_params.common
5+
6+
HARNESS_ENTRY = harness
7+
HARNESS_FILE = pointwise_sum_of_products_harness
8+
9+
# This should be a unique identifier for this proof, and will appear on the
10+
# Litani dashboard. It can be human-readable and contain spaces if you wish.
11+
PROOF_UID = pointwise_sum_of_products
12+
13+
DEFINES +=
14+
INCLUDES +=
15+
16+
REMOVE_FUNCTION_BODY +=
17+
UNWINDSET +=
18+
19+
PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
20+
PROJECT_SOURCES += $(SRCDIR)/mldsa/polyvec.c
21+
22+
CHECK_FUNCTION_CONTRACTS=mld_pointwise_sum_of_products
23+
USE_FUNCTION_CONTRACTS=
24+
APPLY_LOOP_CONTRACTS=on
25+
USE_DYNAMIC_FRAMES=1
26+
27+
# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
28+
EXTERNAL_SAT_SOLVER=
29+
CBMCFLAGS=--smt2 --slice-formula
30+
31+
FUNCTION_NAME = pointwise_sum_of_products
32+
33+
# If this proof is found to consume huge amounts of RAM, you can set the
34+
# EXPENSIVE variable. With new enough versions of the proof tools, this will
35+
# restrict the number of EXPENSIVE CBMC jobs running at once. See the
36+
# documentation in Makefile.common under the "Job Pools" heading for details.
37+
# EXPENSIVE = true
38+
39+
# This function is large enough to need...
40+
CBMC_OBJECT_BITS = 12
41+
42+
# If you require access to a file-local ("static") function or object to conduct
43+
# your proof, set the following (and do not include the original source file
44+
# ("mldsa/poly.c") in PROJECT_SOURCES).
45+
# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
46+
# include ../Makefile.common
47+
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mldsa/poly.c
48+
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
49+
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
50+
# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
51+
# be set before including Makefile.common, but any use of variables on the
52+
# left-hand side requires those variables to be defined. Hence, _SOURCE,
53+
# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
54+
55+
include ../Makefile.common
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Copyright (c) The mldsa-native project authors
2+
// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3+
4+
#include "polyvec.h"
5+
6+
int64_t mld_pointwise_sum_of_products(const mld_polyvecl *u,
7+
const mld_polyvecl *v, unsigned int i);
8+
9+
void harness(void)
10+
{
11+
mld_polyvecl *u, *v;
12+
unsigned int i;
13+
int64_t r;
14+
r = mld_pointwise_sum_of_products(u, v, i);
15+
}

proofs/cbmc/polyvecl_pointwise_acc_montgomery/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
2020
PROJECT_SOURCES += $(SRCDIR)/mldsa/polyvec.c
2121

2222
CHECK_FUNCTION_CONTRACTS=$(MLD_NAMESPACE)polyvecl_pointwise_acc_montgomery
23-
USE_FUNCTION_CONTRACTS=mld_montgomery_reduce
23+
USE_FUNCTION_CONTRACTS=mld_montgomery_reduce mld_pointwise_sum_of_products
2424
APPLY_LOOP_CONTRACTS=on
2525
USE_DYNAMIC_FRAMES=1
2626

0 commit comments

Comments
 (0)