Skip to content

Commit 7436be9

Browse files
committed
sign stack usage: compute z incrementally
This commit reduces the stack usage of signing by computing z = y + s1*cp incrementally (one polynomial at a time) allowing to eliminate the polyvecl z (at to cost of a single poly z). De-facto this saves L-1 KB irrespective of MLD_CONFIG_REDUCE_RAM. Practically, the same buffer was used early in the function too. Here we instead introduce a new polyvecl buffer tmp, but that can be placed in a union together with w1. Unfortuantely, with the current struct workaround for diffblue/cbmc#8813, this results in an increase in stack space by L KB. This gets eliminated when MLD_CONFIG_REDUCE_RAM is set. Hoisted out from #791 Signed-off-by: Matthias J. Kannwischer <[email protected]>
1 parent 3040a3f commit 7436be9

File tree

21 files changed

+112
-550
lines changed

21 files changed

+112
-550
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ Yes. mldsa-native supports all three ML-DSA security levels (ML-DSA-44, ML-DSA-6
158158
Yes. mldsa-native provides a compile-time option `MLD_CONFIG_REDUCE_RAM` that reduces RAM usage. This trades memory for performance:
159159

160160
- **Memory savings**: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), 49 KB (ML-DSA-87) for each of key generation, signing, and verification.
161-
For signing, additional 4 KB (ML-DSA-44), 5 KB (ML-DSA-65), and 7 KB (ML-DSA-87) are saved.
161+
For signing, additional 8 KB (ML-DSA-44), 10 KB (ML-DSA-65), and 14 KB (ML-DSA-87) are saved.
162162
- **Performance cost**: Matrix generation is no longer batched, resulting in slower signing and verification
163163

164164
To enable this mode, define `MLD_CONFIG_REDUCE_RAM` in [mldsa_native_config.h](mldsa/mldsa_native_config.h) or pass `-DMLD_CONFIG_REDUCE_RAM` as a compiler flag.

mldsa/mldsa_native.S

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@
222222
#undef MLD_PACKING_H
223223
#undef mld_pack_pk
224224
#undef mld_pack_sig
225+
#undef mld_pack_sig_z
225226
#undef mld_pack_sk
226227
#undef mld_unpack_pk
227228
#undef mld_unpack_sig
@@ -296,15 +297,10 @@
296297
#undef mld_polyveck_unpack_t0
297298
#undef mld_polyveck_use_hint
298299
#undef mld_polyvecl
299-
#undef mld_polyvecl_add
300300
#undef mld_polyvecl_chknorm
301-
#undef mld_polyvecl_invntt_tomont
302301
#undef mld_polyvecl_ntt
303302
#undef mld_polyvecl_pack_eta
304-
#undef mld_polyvecl_pack_z
305303
#undef mld_polyvecl_pointwise_acc_montgomery
306-
#undef mld_polyvecl_pointwise_poly_montgomery
307-
#undef mld_polyvecl_reduce
308304
#undef mld_polyvecl_uniform_gamma1
309305
#undef mld_polyvecl_unpack_eta
310306
#undef mld_polyvecl_unpack_z

mldsa/mldsa_native.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@
218218
#undef MLD_PACKING_H
219219
#undef mld_pack_pk
220220
#undef mld_pack_sig
221+
#undef mld_pack_sig_z
221222
#undef mld_pack_sk
222223
#undef mld_unpack_pk
223224
#undef mld_unpack_sig
@@ -292,15 +293,10 @@
292293
#undef mld_polyveck_unpack_t0
293294
#undef mld_polyveck_use_hint
294295
#undef mld_polyvecl
295-
#undef mld_polyvecl_add
296296
#undef mld_polyvecl_chknorm
297-
#undef mld_polyvecl_invntt_tomont
298297
#undef mld_polyvecl_ntt
299298
#undef mld_polyvecl_pack_eta
300-
#undef mld_polyvecl_pack_z
301299
#undef mld_polyvecl_pointwise_acc_montgomery
302-
#undef mld_polyvecl_pointwise_poly_montgomery
303-
#undef mld_polyvecl_reduce
304300
#undef mld_polyvecl_uniform_gamma1
305301
#undef mld_polyvecl_unpack_eta
306302
#undef mld_polyvecl_unpack_z

mldsa/src/packing.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,15 @@ void mld_unpack_sk(uint8_t rho[MLDSA_SEEDBYTES], uint8_t tr[MLDSA_TRBYTES],
100100

101101
MLD_INTERNAL_API
102102
void mld_pack_sig(uint8_t sig[MLDSA_CRYPTO_BYTES],
103-
const uint8_t c[MLDSA_CTILDEBYTES], const mld_polyvecl *z,
104-
const mld_polyveck *h, const unsigned int number_of_hints)
103+
const uint8_t c[MLDSA_CTILDEBYTES], const mld_polyveck *h,
104+
const unsigned int number_of_hints)
105105
{
106106
unsigned int i, j, k;
107107

108108
mld_memcpy(sig, c, MLDSA_CTILDEBYTES);
109109
sig += MLDSA_CTILDEBYTES;
110110

111-
mld_polyvecl_pack_z(sig, z);
111+
/* skip z component - packed via mld_pack_sig_z */
112112
sig += MLDSA_L * MLDSA_POLYZ_PACKEDBYTES;
113113

114114
/* Encode hints h */
@@ -168,6 +168,15 @@ void mld_pack_sig(uint8_t sig[MLDSA_CRYPTO_BYTES],
168168
}
169169
}
170170

171+
MLD_INTERNAL_API
172+
void mld_pack_sig_z(uint8_t sig[MLDSA_CRYPTO_BYTES], const mld_poly *zi,
173+
unsigned i)
174+
{
175+
sig += MLDSA_CTILDEBYTES;
176+
sig += i * MLDSA_POLYZ_PACKEDBYTES;
177+
mld_polyz_pack(sig, zi);
178+
}
179+
171180
/*************************************************
172181
* Name: mld_unpack_hints
173182
*

mldsa/src/packing.h

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,12 @@ __contract__(
7373
/*************************************************
7474
* Name: mld_pack_sig
7575
*
76-
* Description: Bit-pack signature sig = (c, z, h).
76+
* Description: Bit-pack c and h component of sig = (c, z, h).
77+
* The z component is packed separately using mld_pack_sig_z.
7778
*
7879
* Arguments: - uint8_t sig[]: output byte array
7980
* - const uint8_t *c: pointer to challenge hash length
8081
* MLDSA_SEEDBYTES
81-
* - const mld_polyvecl *z: pointer to vector z
8282
* - const mld_polyveck *h: pointer to hint vector h
8383
* - const unsigned int number_of_hints: total
8484
* hints in *h
@@ -89,21 +89,41 @@ __contract__(
8989
**************************************************/
9090
MLD_INTERNAL_API
9191
void mld_pack_sig(uint8_t sig[MLDSA_CRYPTO_BYTES],
92-
const uint8_t c[MLDSA_CTILDEBYTES], const mld_polyvecl *z,
93-
const mld_polyveck *h, const unsigned int number_of_hints)
92+
const uint8_t c[MLDSA_CTILDEBYTES], const mld_polyveck *h,
93+
const unsigned int number_of_hints)
9494
__contract__(
9595
requires(memory_no_alias(sig, MLDSA_CRYPTO_BYTES))
9696
requires(memory_no_alias(c, MLDSA_CTILDEBYTES))
97-
requires(memory_no_alias(z, sizeof(mld_polyvecl)))
9897
requires(memory_no_alias(h, sizeof(mld_polyveck)))
99-
requires(forall(k0, 0, MLDSA_L,
100-
array_bound(z->vec[k0].coeffs, 0, MLDSA_N, -(MLDSA_GAMMA1 - 1), MLDSA_GAMMA1 + 1)))
10198
requires(forall(k1, 0, MLDSA_K,
10299
array_bound(h->vec[k1].coeffs, 0, MLDSA_N, 0, 2)))
103100
requires(number_of_hints <= MLDSA_OMEGA)
104101
assigns(memory_slice(sig, MLDSA_CRYPTO_BYTES))
105102
);
106103

104+
#define mld_pack_sig_z MLD_NAMESPACE_KL(pack_sig_z)
105+
/*************************************************
106+
* Name: mld_pack_sig_z
107+
*
108+
* Description: Bit-pack single polynomial of z component of sig = (c, z, h).
109+
* The c and h components are packed separately using mld_pack_sig.
110+
*
111+
* Arguments: - uint8_t sig[]: output byte array
112+
* - const mld_poly *zi: pointer to a single polynomial in z
113+
* - const unsigned int i: index of zi in vector z
114+
*
115+
**************************************************/
116+
MLD_INTERNAL_API
117+
void mld_pack_sig_z(uint8_t sig[MLDSA_CRYPTO_BYTES], const mld_poly *zi,
118+
unsigned i)
119+
__contract__(
120+
requires(memory_no_alias(sig, MLDSA_CRYPTO_BYTES))
121+
requires(memory_no_alias(zi, sizeof(mld_poly)))
122+
requires(i < MLDSA_L)
123+
requires(array_bound(zi->coeffs, 0, MLDSA_N, -(MLDSA_GAMMA1 - 1), MLDSA_GAMMA1 + 1))
124+
assigns(memory_slice(sig, MLDSA_CRYPTO_BYTES))
125+
);
126+
107127
#define mld_unpack_pk MLD_NAMESPACE_KL(unpack_pk)
108128
/*************************************************
109129
* Name: mld_unpack_pk

mldsa/src/polyvec.c

Lines changed: 0 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -264,50 +264,6 @@ void mld_polyvecl_uniform_gamma1(mld_polyvecl *v,
264264
MLDSA_GAMMA1 + 1);
265265
}
266266

267-
MLD_INTERNAL_API
268-
void mld_polyvecl_reduce(mld_polyvecl *v)
269-
{
270-
unsigned int i;
271-
mld_assert_bound_2d(v->vec, MLDSA_L, MLDSA_N, INT32_MIN,
272-
MLD_REDUCE32_DOMAIN_MAX);
273-
274-
for (i = 0; i < MLDSA_L; ++i)
275-
__loop__(
276-
assigns(i, memory_slice(v, sizeof(mld_polyvecl)))
277-
invariant(i <= MLDSA_L)
278-
invariant(forall(k0, i, MLDSA_L, forall(k1, 0, MLDSA_N, v->vec[k0].coeffs[k1] == loop_entry(*v).vec[k0].coeffs[k1])))
279-
invariant(forall(k2, 0, i,
280-
array_bound(v->vec[k2].coeffs, 0, MLDSA_N, -MLD_REDUCE32_RANGE_MAX, MLD_REDUCE32_RANGE_MAX))))
281-
{
282-
mld_poly_reduce(&v->vec[i]);
283-
}
284-
285-
mld_assert_bound_2d(v->vec, MLDSA_L, MLDSA_N, -MLD_REDUCE32_RANGE_MAX,
286-
MLD_REDUCE32_RANGE_MAX);
287-
}
288-
289-
/* Reference: We use destructive version (output=first input) to avoid
290-
* reasoning about aliasing in the CBMC specification */
291-
MLD_INTERNAL_API
292-
void mld_polyvecl_add(mld_polyvecl *u, const mld_polyvecl *v)
293-
{
294-
unsigned int i;
295-
296-
for (i = 0; i < MLDSA_L; ++i)
297-
__loop__(
298-
assigns(i, memory_slice(u, sizeof(mld_polyvecl)))
299-
invariant(i <= MLDSA_L)
300-
invariant(forall(k0, i, MLDSA_L,
301-
forall(k1, 0, MLDSA_N, u->vec[k0].coeffs[k1] == loop_entry(*u).vec[k0].coeffs[k1])))
302-
invariant(forall(k6, 0, i, array_bound(u->vec[k6].coeffs, 0, MLDSA_N, INT32_MIN, MLD_REDUCE32_DOMAIN_MAX)))
303-
)
304-
{
305-
mld_poly_add(&u->vec[i], &v->vec[i]);
306-
}
307-
mld_assert_bound_2d(u->vec, MLDSA_L, MLDSA_N, INT32_MIN,
308-
MLD_REDUCE32_DOMAIN_MAX);
309-
}
310-
311267
MLD_INTERNAL_API
312268
void mld_polyvecl_ntt(mld_polyvecl *v)
313269
{
@@ -327,46 +283,6 @@ void mld_polyvecl_ntt(mld_polyvecl *v)
327283
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
328284
}
329285

330-
MLD_INTERNAL_API
331-
void mld_polyvecl_invntt_tomont(mld_polyvecl *v)
332-
{
333-
unsigned int i;
334-
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLDSA_Q);
335-
336-
for (i = 0; i < MLDSA_L; ++i)
337-
__loop__(
338-
assigns(i, memory_slice(v, sizeof(mld_polyvecl)))
339-
invariant(i <= MLDSA_L)
340-
invariant(forall(k0, i, MLDSA_L, forall(k1, 0, MLDSA_N, v->vec[k0].coeffs[k1] == loop_entry(*v).vec[k0].coeffs[k1])))
341-
invariant(forall(k1, 0, i, array_abs_bound(v->vec[k1].coeffs, 0, MLDSA_N, MLD_INTT_BOUND))))
342-
{
343-
mld_poly_invntt_tomont(&v->vec[i]);
344-
}
345-
346-
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_INTT_BOUND);
347-
}
348-
349-
MLD_INTERNAL_API
350-
void mld_polyvecl_pointwise_poly_montgomery(mld_polyvecl *r, const mld_poly *a,
351-
const mld_polyvecl *v)
352-
{
353-
unsigned int i;
354-
mld_assert_abs_bound(a->coeffs, MLDSA_N, MLD_NTT_BOUND);
355-
mld_assert_abs_bound_2d(v->vec, MLDSA_L, MLDSA_N, MLD_NTT_BOUND);
356-
357-
for (i = 0; i < MLDSA_L; ++i)
358-
__loop__(
359-
assigns(i, memory_slice(r, sizeof(mld_polyvecl)))
360-
invariant(i <= MLDSA_L)
361-
invariant(forall(k2, 0, i, array_abs_bound(r->vec[k2].coeffs, 0, MLDSA_N, MLDSA_Q)))
362-
)
363-
{
364-
mld_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]);
365-
}
366-
367-
mld_assert_abs_bound_2d(r->vec, MLDSA_L, MLDSA_N, MLDSA_Q);
368-
}
369-
370286
MLD_STATIC_TESTABLE void mld_polyvecl_pointwise_acc_montgomery_c(
371287
mld_poly *w, const mld_polyvecl *u, const mld_polyvecl *v)
372288
__contract__(
@@ -832,23 +748,6 @@ void mld_polyvecl_pack_eta(uint8_t r[MLDSA_L * MLDSA_POLYETA_PACKEDBYTES],
832748
}
833749
}
834750

835-
MLD_INTERNAL_API
836-
void mld_polyvecl_pack_z(uint8_t r[MLDSA_L * MLDSA_POLYZ_PACKEDBYTES],
837-
const mld_polyvecl *p)
838-
{
839-
unsigned int i;
840-
mld_assert_bound_2d(p->vec, MLDSA_L, MLDSA_N, -(MLDSA_GAMMA1 - 1),
841-
MLDSA_GAMMA1 + 1);
842-
for (i = 0; i < MLDSA_L; ++i)
843-
__loop__(
844-
assigns(i, memory_slice(r, MLDSA_L * MLDSA_POLYZ_PACKEDBYTES))
845-
invariant(i <= MLDSA_L)
846-
)
847-
{
848-
mld_polyz_pack(&r[i * MLDSA_POLYZ_PACKEDBYTES], &p->vec[i]);
849-
}
850-
}
851-
852751
MLD_INTERNAL_API
853752
void mld_polyveck_pack_t0(uint8_t r[MLDSA_K * MLDSA_POLYT0_PACKEDBYTES],
854753
const mld_polyveck *p)

0 commit comments

Comments
 (0)