Skip to content

Commit 7819c17

Browse files
rod-chapmanmkannwischer
authored andcommitted
Simplification and refactoring to restore proof speed and stability.
1. Weaken post-condition and loop invariant in polyvecl_add(). The stonger post-condition was unncessary. 2. Simplify polyvec_matrix_expand(). Small performance loss here since batched_seeds[] is (re-) initialized every time. This is bit slower but removes a loop statement entirely. 3. Refactor polyvec_pointwise_acc_montgomery() by splitting core "sum of products" calculation into a distinct local function mld_pointwise_sum_of_products(). Add proof of the latter. Proof time for parameter set 87 now 4 minutes (real-time) and 40 minutes (user time) with 64 cores on an r7g instance. Signed-off-by: Rod Chapman <[email protected]>
1 parent 59c7e2c commit 7819c17

File tree

5 files changed

+167
-54
lines changed

5 files changed

+167
-54
lines changed

mldsa/src/polyvec.c

Lines changed: 94 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -45,20 +45,12 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
4545
* of the same parent object.
4646
*/
4747

48-
MLD_ALIGN uint8_t seed_ext[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
49-
50-
for (j = 0; j < 4; j++)
51-
__loop__(
52-
assigns(j, object_whole(seed_ext))
53-
invariant(j <= 4)
54-
)
55-
{
56-
mld_memcpy(seed_ext[j], rho, MLDSA_SEEDBYTES);
57-
}
48+
MLD_ALIGN uint8_t single_seed[MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
49+
MLD_ALIGN uint8_t batched_seeds[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)];
5850
/* Sample 4 matrix entries a time. */
5951
for (i = 0; i < (MLDSA_K * MLDSA_L / 4) * 4; i += 4)
6052
__loop__(
61-
assigns(i, j, object_whole(seed_ext), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
53+
assigns(i, j, object_whole(batched_seeds), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
6254
invariant(i <= (MLDSA_K * MLDSA_L / 4) * 4 && i % 4 == 0)
6355
/* vectors 0 .. i / MLDSA_L are completely sampled */
6456
invariant(forall(k1, 0, i / MLDSA_L, forall(l1, 0, MLDSA_L,
@@ -70,28 +62,31 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
7062
{
7163
for (j = 0; j < 4; j++)
7264
__loop__(
73-
assigns(j, object_whole(seed_ext))
65+
assigns(j, object_whole(batched_seeds))
7466
invariant(j <= 4)
7567
)
7668
{
7769
uint8_t x = (uint8_t)((i + j) / MLDSA_L);
7870
uint8_t y = (uint8_t)((i + j) % MLDSA_L);
7971

80-
seed_ext[j][MLDSA_SEEDBYTES + 0] = y;
81-
seed_ext[j][MLDSA_SEEDBYTES + 1] = x;
72+
mld_memcpy(batched_seeds[j], rho, MLDSA_SEEDBYTES);
73+
batched_seeds[j][MLDSA_SEEDBYTES + 0] = y;
74+
batched_seeds[j][MLDSA_SEEDBYTES + 1] = x;
8275
}
8376

8477
mld_poly_uniform_4x(&mat[i / MLDSA_L].vec[i % MLDSA_L],
8578
&mat[(i + 1) / MLDSA_L].vec[(i + 1) % MLDSA_L],
8679
&mat[(i + 2) / MLDSA_L].vec[(i + 2) % MLDSA_L],
8780
&mat[(i + 3) / MLDSA_L].vec[(i + 3) % MLDSA_L],
88-
seed_ext);
81+
batched_seeds);
8982
}
9083

84+
mld_memcpy(single_seed, rho, MLDSA_SEEDBYTES);
85+
9186
/* For MLDSA_K=6, MLDSA_L=5, process the last two entries individually */
9287
while (i < MLDSA_K * MLDSA_L)
9388
__loop__(
94-
assigns(i, object_whole(seed_ext), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
89+
assigns(i, object_whole(single_seed), memory_slice(mat, MLDSA_K * sizeof(mld_polyvecl)))
9590
invariant(i <= MLDSA_K * MLDSA_L)
9691
/* vectors 0 .. i / MLDSA_L are completely sampled */
9792
invariant(forall(k1, 0, i / MLDSA_L, forall(l1, 0, MLDSA_L,
@@ -105,27 +100,31 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
105100
uint8_t y = (uint8_t)(i % MLDSA_L);
106101
mld_poly *this_poly = &mat[i / MLDSA_L].vec[i % MLDSA_L];
107102

108-
seed_ext[0][MLDSA_SEEDBYTES + 0] = y;
109-
seed_ext[0][MLDSA_SEEDBYTES + 1] = x;
103+
single_seed[MLDSA_SEEDBYTES + 0] = y;
104+
single_seed[MLDSA_SEEDBYTES + 1] = x;
110105

111-
mld_poly_uniform(this_poly, seed_ext[0]);
106+
mld_poly_uniform(this_poly, single_seed);
112107
i++;
113108
}
114109

115110
/*
116111
* The public matrix is generated in NTT domain. If the native backend
117-
* uses a custom order in NTT domain, permute A accordingly.
112+
* uses a custom order in NTT domain, permute A accordingly. This does
113+
* not affect the bounds on the coefficients, so we ignore this for CBMC
114+
* to simplify proof.
118115
*/
116+
#ifndef CBMC
119117
for (i = 0; i < MLDSA_K; i++)
120118
{
121119
for (j = 0; j < MLDSA_L; j++)
122120
{
123121
mld_poly_permute_bitrev_to_custom(mat[i].vec[j].coeffs);
124122
}
125123
}
124+
#endif /* !CBMC */
126125

127126
/* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
128-
mld_zeroize(seed_ext, sizeof(seed_ext));
127+
mld_zeroize(single_seed, sizeof(single_seed));
129128
}
130129

131130
MLD_INTERNAL_API
@@ -219,7 +218,6 @@ void mld_polyvecl_add(mld_polyvecl *u, const mld_polyvecl *v)
219218
invariant(i <= MLDSA_L)
220219
invariant(forall(k0, i, MLDSA_L,
221220
forall(k1, 0, MLDSA_N, u->vec[k0].coeffs[k1] == loop_entry(*u).vec[k0].coeffs[k1])))
222-
invariant(forall(k4, 0, i, forall(k5, 0, MLDSA_N, u->vec[k4].coeffs[k5] == loop_entry(*u).vec[k4].coeffs[k5] + v->vec[k4].coeffs[k5])))
223221
invariant(forall(k6, 0, i, array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX)))
224222
)
225223
{
@@ -287,87 +285,131 @@ void mld_polyvecl_pointwise_poly_montgomery(mld_polyvecl *r, const mld_poly *a,
287285
mld_assert_abs_bound_2d(r->vec, MLDSA_L, MLDSA_N, MLDSA_Q);
288286
}
289287

288+
#if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4) && \
289+
MLD_CONFIG_PARAMETER_SET == 44
290+
290291
MLD_INTERNAL_API
291292
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
292293
const mld_polyvecl *v)
293294
{
294-
#if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4) && \
295-
MLD_CONFIG_PARAMETER_SET == 44
296295
/* TODO: proof */
297296
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
298297
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
299298
mld_polyvecl_pointwise_acc_montgomery_l4_native(
300299
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
301300
(const int32_t(*)[MLDSA_N])v->vec);
302301
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
302+
}
303+
303304
#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5) && \
304305
MLD_CONFIG_PARAMETER_SET == 65
306+
307+
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
308+
const mld_polyvecl *v)
309+
{
305310
/* TODO: proof */
306311
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
307312
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
308313
mld_polyvecl_pointwise_acc_montgomery_l5_native(
309314
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
310315
(const int32_t(*)[MLDSA_N])v->vec);
311316
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
317+
}
318+
312319
#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7) && \
313320
MLD_CONFIG_PARAMETER_SET == 87
321+
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
322+
const mld_polyvecl *v)
323+
{
314324
/* TODO: proof */
315325
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
316326
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
317327
mld_polyvecl_pointwise_acc_montgomery_l7_native(
318328
w->coeffs, (const int32_t(*)[MLDSA_N])u->vec,
319329
(const int32_t(*)[MLDSA_N])v->vec);
320330
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
321-
#else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
322-
MLD_CONFIG_PARAMETER_SET == 44) && \
323-
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
324-
MLD_CONFIG_PARAMETER_SET == 65) && \
325-
MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
326-
MLD_CONFIG_PARAMETER_SET == 87 */
327-
unsigned int i, j;
328-
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
329-
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
330-
/* The first input is bounded by [0, Q-1] inclusive
331-
* The second input is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
331+
}
332+
333+
#else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
334+
MLD_CONFIG_PARAMETER_SET == 44) && \
335+
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
336+
MLD_CONFIG_PARAMETER_SET == 65) && \
337+
MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
338+
MLD_CONFIG_PARAMETER_SET == 87 */
339+
340+
#define mld_pointwise_sum_of_products \
341+
MLD_NAMESPACE_KL(mld_pointwise_sum_of_products)
342+
static int64_t mld_pointwise_sum_of_products(const mld_polyvecl *u,
343+
const mld_polyvecl *v,
344+
unsigned int i)
345+
__contract__(
346+
requires(memory_no_alias(u, sizeof(mld_polyvecl)))
347+
requires(memory_no_alias(v, sizeof(mld_polyvecl)))
348+
requires(i < MLDSA_N)
349+
requires(forall(l0, 0, MLDSA_L,
350+
array_bound(u->vec[l0].coeffs, 0, MLDSA_N, 0, MLDSA_Q)))
351+
requires(forall(l1, 0, MLDSA_L,
352+
array_abs_bound(v->vec[l1].coeffs, 0, MLDSA_N, MLD_NTT_BOUND)))
353+
ensures(return_value >= -(int64_t) MLDSA_L*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
354+
ensures(return_value <= (int64_t) MLDSA_L*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
355+
)
356+
{
357+
/* Input vector u is bounded by [0, Q-1] inclusive
358+
* Input vector v is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
332359
* safely accumulate in 64-bits without intermediate reductions as
333360
* MLDSA_L * (MLD_NTT_BOUND-1) * (Q-1) < INT64_MAX
334361
*
335362
* The worst case is ML-DSA-87: 7 * (9Q-1) * (Q-1) < 2**52
336363
* (and likewise for negative values)
337364
*/
338365

366+
int64_t t = 0;
367+
unsigned int j;
368+
for (j = 0; j < MLDSA_L; j++)
369+
__loop__(
370+
assigns(j, t)
371+
invariant(j <= MLDSA_L)
372+
invariant(t >= -(int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
373+
invariant(t <= (int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
374+
)
375+
{
376+
const int64_t u64 = (int64_t)u->vec[j].coeffs[i];
377+
const int64_t v64 = (int64_t)v->vec[j].coeffs[i];
378+
/* Helper assertions for proof efficiency. Do not remove */
379+
mld_assert(u64 >= 0 && u64 < MLDSA_Q);
380+
mld_assert(v64 > -MLD_NTT_BOUND && v64 < MLD_NTT_BOUND);
381+
t += (u64 * v64);
382+
}
383+
return t;
384+
}
385+
386+
void mld_polyvecl_pointwise_acc_montgomery(mld_poly *w, const mld_polyvecl *u,
387+
const mld_polyvecl *v)
388+
{
389+
unsigned int i;
390+
391+
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, 0, MLDSA_Q);
392+
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
339393
for (i = 0; i < MLDSA_N; i++)
340394
__loop__(
341-
assigns(i, j, object_whole(w))
395+
assigns(i, object_whole(w))
342396
invariant(i <= MLDSA_N)
343397
invariant(array_abs_bound(w->coeffs, 0, i, MLDSA_Q))
344398
)
345399
{
346-
int64_t t = 0;
347-
int32_t r;
348-
for (j = 0; j < MLDSA_L; j++)
349-
__loop__(
350-
assigns(j, t)
351-
invariant(j <= MLDSA_L)
352-
invariant(t >= -(int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
353-
invariant(t <= (int64_t)j*(MLDSA_Q - 1)*(MLD_NTT_BOUND - 1))
354-
)
355-
{
356-
t += (int64_t)u->vec[j].coeffs[i] * v->vec[j].coeffs[i];
357-
}
358-
359-
r = mld_montgomery_reduce(t);
360-
w->coeffs[i] = r;
400+
w->coeffs[i] =
401+
mld_montgomery_reduce(mld_pointwise_sum_of_products(u, v, i));
361402
}
362403

363404
mld_assert_abs_bound(w->coeffs, MLDSA_N, MLDSA_Q);
405+
}
406+
364407
#endif /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
365408
MLD_CONFIG_PARAMETER_SET == 44) && \
366409
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
367410
MLD_CONFIG_PARAMETER_SET == 65) && \
368411
!(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
369412
MLD_CONFIG_PARAMETER_SET == 87) */
370-
}
371413

372414
MLD_INTERNAL_API
373415
uint32_t mld_polyvecl_chknorm(const mld_polyvecl *v, int32_t bound)

mldsa/src/polyvec.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ __contract__(
9393
requires(forall(k0, 0, MLDSA_L, forall(k1, 0, MLDSA_N, (int64_t) u->vec[k0].coeffs[k1] + v->vec[k0].coeffs[k1] < REDUCE32_DOMAIN_MAX)))
9494
requires(forall(k2, 0, MLDSA_L, forall(k3, 0, MLDSA_N, (int64_t) u->vec[k2].coeffs[k3] + v->vec[k2].coeffs[k3] >= INT32_MIN)))
9595
assigns(object_whole(u))
96-
ensures(forall(k4, 0, MLDSA_L, forall(k5, 0, MLDSA_N, u->vec[k4].coeffs[k5] == old(*u).vec[k4].coeffs[k5] + v->vec[k4].coeffs[k5])))
9796
ensures(forall(k6, 0, MLDSA_L,
9897
array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, REDUCE32_DOMAIN_MAX)))
9998
);
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright (c) The mldsa-native project authors
2+
# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3+
4+
include ../Makefile_params.common
5+
6+
HARNESS_ENTRY = harness
7+
HARNESS_FILE = pointwise_sum_of_products_harness
8+
9+
# This should be a unique identifier for this proof, and will appear on the
10+
# Litani dashboard. It can be human-readable and contain spaces if you wish.
11+
PROOF_UID = pointwise_sum_of_products
12+
13+
DEFINES +=
14+
INCLUDES +=
15+
16+
REMOVE_FUNCTION_BODY +=
17+
UNWINDSET +=
18+
19+
PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
20+
PROJECT_SOURCES += $(SRCDIR)/mldsa/src/polyvec.c
21+
22+
CHECK_FUNCTION_CONTRACTS=$(MLD_NAMESPACE)mld_pointwise_sum_of_products
23+
USE_FUNCTION_CONTRACTS=
24+
APPLY_LOOP_CONTRACTS=on
25+
USE_DYNAMIC_FRAMES=1
26+
27+
# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
28+
EXTERNAL_SAT_SOLVER=
29+
CBMCFLAGS=--smt2 --slice-formula
30+
31+
FUNCTION_NAME = pointwise_sum_of_products
32+
33+
# If this proof is found to consume huge amounts of RAM, you can set the
34+
# EXPENSIVE variable. With new enough versions of the proof tools, this will
35+
# restrict the number of EXPENSIVE CBMC jobs running at once. See the
36+
# documentation in Makefile.common under the "Job Pools" heading for details.
37+
# EXPENSIVE = true
38+
39+
# This function is large enough to need...
40+
CBMC_OBJECT_BITS = 12
41+
42+
# If you require access to a file-local ("static") function or object to conduct
43+
# your proof, set the following (and do not include the original source file
44+
# ("mldsa/poly.c") in PROJECT_SOURCES).
45+
# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
46+
# include ../Makefile.common
47+
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mldsa/poly.c
48+
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
49+
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
50+
# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
51+
# be set before including Makefile.common, but any use of variables on the
52+
# left-hand side requires those variables to be defined. Hence, _SOURCE,
53+
# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
54+
55+
include ../Makefile.common
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Copyright (c) The mldsa-native project authors
2+
// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3+
4+
#include "polyvec.h"
5+
6+
#define mld_pointwise_sum_of_products \
7+
MLD_NAMESPACE_KL(mld_pointwise_sum_of_products)
8+
int64_t mld_pointwise_sum_of_products(const mld_polyvecl *u,
9+
const mld_polyvecl *v, unsigned int i);
10+
11+
void harness(void)
12+
{
13+
mld_polyvecl *u, *v;
14+
unsigned int i;
15+
int64_t r;
16+
r = mld_pointwise_sum_of_products(u, v, i);
17+
}

proofs/cbmc/polyvecl_pointwise_acc_montgomery/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
2020
PROJECT_SOURCES += $(SRCDIR)/mldsa/src/polyvec.c
2121

2222
CHECK_FUNCTION_CONTRACTS=$(MLD_NAMESPACE)polyvecl_pointwise_acc_montgomery
23-
USE_FUNCTION_CONTRACTS=mld_montgomery_reduce
23+
USE_FUNCTION_CONTRACTS=mld_montgomery_reduce $(MLD_NAMESPACE)mld_pointwise_sum_of_products
2424
APPLY_LOOP_CONTRACTS=on
2525
USE_DYNAMIC_FRAMES=1
2626

0 commit comments

Comments
 (0)