@@ -45,20 +45,12 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
4545 * of the same parent object.
4646 */
4747
48- MLD_ALIGN uint8_t seed_ext [4 ][MLD_ALIGN_UP (MLDSA_SEEDBYTES + 2 )];
49-
50- for (j = 0 ; j < 4 ; j ++ )
51- __loop__ (
52- assigns (j , object_whole (seed_ext ))
53- invariant (j <= 4 )
54- )
55- {
56- mld_memcpy (seed_ext [j ], rho , MLDSA_SEEDBYTES );
57- }
48+ MLD_ALIGN uint8_t single_seed [MLD_ALIGN_UP (MLDSA_SEEDBYTES + 2 )];
49+ MLD_ALIGN uint8_t batched_seeds [4 ][MLD_ALIGN_UP (MLDSA_SEEDBYTES + 2 )];
5850 /* Sample 4 matrix entries a time. */
5951 for (i = 0 ; i < (MLDSA_K * MLDSA_L / 4 ) * 4 ; i += 4 )
6052 __loop__ (
61- assigns (i , j , object_whole (seed_ext ), memory_slice (mat , MLDSA_K * sizeof (mld_polyvecl )))
53+ assigns (i , j , object_whole (batched_seeds ), memory_slice (mat , MLDSA_K * sizeof (mld_polyvecl )))
6254 invariant (i <= (MLDSA_K * MLDSA_L / 4 ) * 4 && i % 4 == 0 )
6355 /* vectors 0 .. i / MLDSA_L are completely sampled */
6456 invariant (forall (k1 , 0 , i / MLDSA_L , forall (l1 , 0 , MLDSA_L ,
@@ -70,28 +62,31 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
7062 {
7163 for (j = 0 ; j < 4 ; j ++ )
7264 __loop__ (
73- assigns (j , object_whole (seed_ext ))
65+ assigns (j , object_whole (batched_seeds ))
7466 invariant (j <= 4 )
7567 )
7668 {
7769 uint8_t x = (uint8_t )((i + j ) / MLDSA_L );
7870 uint8_t y = (uint8_t )((i + j ) % MLDSA_L );
7971
80- seed_ext [j ][MLDSA_SEEDBYTES + 0 ] = y ;
81- seed_ext [j ][MLDSA_SEEDBYTES + 1 ] = x ;
72+ mld_memcpy (batched_seeds [j ], rho , MLDSA_SEEDBYTES );
73+ batched_seeds [j ][MLDSA_SEEDBYTES + 0 ] = y ;
74+ batched_seeds [j ][MLDSA_SEEDBYTES + 1 ] = x ;
8275 }
8376
8477 mld_poly_uniform_4x (& mat [i / MLDSA_L ].vec [i % MLDSA_L ],
8578 & mat [(i + 1 ) / MLDSA_L ].vec [(i + 1 ) % MLDSA_L ],
8679 & mat [(i + 2 ) / MLDSA_L ].vec [(i + 2 ) % MLDSA_L ],
8780 & mat [(i + 3 ) / MLDSA_L ].vec [(i + 3 ) % MLDSA_L ],
88- seed_ext );
81+ batched_seeds );
8982 }
9083
84+ mld_memcpy (single_seed , rho , MLDSA_SEEDBYTES );
85+
9186 /* For MLDSA_K=6, MLDSA_L=5, process the last two entries individually */
9287 while (i < MLDSA_K * MLDSA_L )
9388 __loop__ (
94- assigns (i , object_whole (seed_ext ), memory_slice (mat , MLDSA_K * sizeof (mld_polyvecl )))
89+ assigns (i , object_whole (single_seed ), memory_slice (mat , MLDSA_K * sizeof (mld_polyvecl )))
9590 invariant (i <= MLDSA_K * MLDSA_L )
9691 /* vectors 0 .. i / MLDSA_L are completely sampled */
9792 invariant (forall (k1 , 0 , i / MLDSA_L , forall (l1 , 0 , MLDSA_L ,
@@ -105,27 +100,31 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
105100 uint8_t y = (uint8_t )(i % MLDSA_L );
106101 mld_poly * this_poly = & mat [i / MLDSA_L ].vec [i % MLDSA_L ];
107102
108- seed_ext [ 0 ] [MLDSA_SEEDBYTES + 0 ] = y ;
109- seed_ext [ 0 ] [MLDSA_SEEDBYTES + 1 ] = x ;
103+ single_seed [MLDSA_SEEDBYTES + 0 ] = y ;
104+ single_seed [MLDSA_SEEDBYTES + 1 ] = x ;
110105
111- mld_poly_uniform (this_poly , seed_ext [ 0 ] );
106+ mld_poly_uniform (this_poly , single_seed );
112107 i ++ ;
113108 }
114109
115110 /*
116111 * The public matrix is generated in NTT domain. If the native backend
117- * uses a custom order in NTT domain, permute A accordingly.
112+ * uses a custom order in NTT domain, permute A accordingly. This does
113+ * not affect the bounds on the coefficients, so we ignore this for CBMC
114+ * to simplify proof.
118115 */
116+ #ifndef CBMC
119117 for (i = 0 ; i < MLDSA_K ; i ++ )
120118 {
121119 for (j = 0 ; j < MLDSA_L ; j ++ )
122120 {
123121 mld_poly_permute_bitrev_to_custom (mat [i ].vec [j ].coeffs );
124122 }
125123 }
124+ #endif /* !CBMC */
126125
127126 /* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
128- mld_zeroize (seed_ext , sizeof (seed_ext ));
127+ mld_zeroize (single_seed , sizeof (single_seed ));
129128}
130129
131130MLD_INTERNAL_API
@@ -219,7 +218,6 @@ void mld_polyvecl_add(mld_polyvecl *u, const mld_polyvecl *v)
219218 invariant (i <= MLDSA_L )
220219 invariant (forall (k0 , i , MLDSA_L ,
221220 forall (k1 , 0 , MLDSA_N , u -> vec [k0 ].coeffs [k1 ] == loop_entry (* u ).vec [k0 ].coeffs [k1 ])))
222- invariant (forall (k4 , 0 , i , forall (k5 , 0 , MLDSA_N , u -> vec [k4 ].coeffs [k5 ] == loop_entry (* u ).vec [k4 ].coeffs [k5 ] + v -> vec [k4 ].coeffs [k5 ])))
223221 invariant (forall (k6 , 0 , i , array_bound (u -> vec [k6 ].coeffs , 0 , MLDSA_N , INT32_MIN , REDUCE32_DOMAIN_MAX )))
224222 )
225223 {
@@ -287,87 +285,131 @@ void mld_polyvecl_pointwise_poly_montgomery(mld_polyvecl *r, const mld_poly *a,
287285 mld_assert_abs_bound_2d (r -> vec , MLDSA_L , MLDSA_N , MLDSA_Q );
288286}
289287
288+ #if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 ) && \
289+ MLD_CONFIG_PARAMETER_SET == 44
290+
290291MLD_INTERNAL_API
291292void mld_polyvecl_pointwise_acc_montgomery (mld_poly * w , const mld_polyvecl * u ,
292293 const mld_polyvecl * v )
293294{
294- #if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 ) && \
295- MLD_CONFIG_PARAMETER_SET == 44
296295 /* TODO: proof */
297296 mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
298297 mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
299298 mld_polyvecl_pointwise_acc_montgomery_l4_native (
300299 w -> coeffs , (const int32_t (* )[MLDSA_N ])u -> vec ,
301300 (const int32_t (* )[MLDSA_N ])v -> vec );
302301 mld_assert_abs_bound (w -> coeffs , MLDSA_N , MLDSA_Q );
302+ }
303+
303304#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 ) && \
304305 MLD_CONFIG_PARAMETER_SET == 65
306+
307+ void mld_polyvecl_pointwise_acc_montgomery (mld_poly * w , const mld_polyvecl * u ,
308+ const mld_polyvecl * v )
309+ {
305310 /* TODO: proof */
306311 mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
307312 mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
308313 mld_polyvecl_pointwise_acc_montgomery_l5_native (
309314 w -> coeffs , (const int32_t (* )[MLDSA_N ])u -> vec ,
310315 (const int32_t (* )[MLDSA_N ])v -> vec );
311316 mld_assert_abs_bound (w -> coeffs , MLDSA_N , MLDSA_Q );
317+ }
318+
312319#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 ) && \
313320 MLD_CONFIG_PARAMETER_SET == 87
321+ void mld_polyvecl_pointwise_acc_montgomery (mld_poly * w , const mld_polyvecl * u ,
322+ const mld_polyvecl * v )
323+ {
314324 /* TODO: proof */
315325 mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
316326 mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
317327 mld_polyvecl_pointwise_acc_montgomery_l7_native (
318328 w -> coeffs , (const int32_t (* )[MLDSA_N ])u -> vec ,
319329 (const int32_t (* )[MLDSA_N ])v -> vec );
320330 mld_assert_abs_bound (w -> coeffs , MLDSA_N , MLDSA_Q );
321- #else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
322- MLD_CONFIG_PARAMETER_SET == 44) && \
323- !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
324- MLD_CONFIG_PARAMETER_SET == 65) && \
325- MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
326- MLD_CONFIG_PARAMETER_SET == 87 */
327- unsigned int i , j ;
328- mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
329- mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
330- /* The first input is bounded by [0, Q-1] inclusive
331- * The second input is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
331+ }
332+
333+ #else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
334+ MLD_CONFIG_PARAMETER_SET == 44) && \
335+ !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
336+ MLD_CONFIG_PARAMETER_SET == 65) && \
337+ MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
338+ MLD_CONFIG_PARAMETER_SET == 87 */
339+
340+ #define mld_pointwise_sum_of_products \
341+ MLD_NAMESPACE_KL(mld_pointwise_sum_of_products)
342+ static int64_t mld_pointwise_sum_of_products (const mld_polyvecl * u ,
343+ const mld_polyvecl * v ,
344+ unsigned int i )
345+ __contract__ (
346+ requires (memory_no_alias (u , sizeof (mld_polyvecl )))
347+ requires (memory_no_alias (v , sizeof (mld_polyvecl )))
348+ requires (i < MLDSA_N )
349+ requires (forall (l0 , 0 , MLDSA_L ,
350+ array_bound (u - > vec [l0 ].coeffs , 0 , MLDSA_N , 0 , MLDSA_Q )))
351+ requires (forall (l1 , 0 , MLDSA_L ,
352+ array_abs_bound (v - > vec [l1 ].coeffs , 0 , MLDSA_N , MLD_NTT_BOUND )))
353+ ensures (return_value >= - (int64_t ) MLDSA_L * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
354+ ensures (return_value <= (int64_t ) MLDSA_L * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
355+ )
356+ {
357+ /* Input vector u is bounded by [0, Q-1] inclusive
358+ * Input vector v is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
332359 * safely accumulate in 64-bits without intermediate reductions as
333360 * MLDSA_L * (MLD_NTT_BOUND-1) * (Q-1) < INT64_MAX
334361 *
335362 * The worst case is ML-DSA-87: 7 * (9Q-1) * (Q-1) < 2**52
336363 * (and likewise for negative values)
337364 */
338365
366+ int64_t t = 0 ;
367+ unsigned int j ;
368+ for (j = 0 ; j < MLDSA_L ; j ++ )
369+ __loop__ (
370+ assigns (j , t )
371+ invariant (j <= MLDSA_L )
372+ invariant (t >= - (int64_t )j * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
373+ invariant (t <= (int64_t )j * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
374+ )
375+ {
376+ const int64_t u64 = (int64_t )u -> vec [j ].coeffs [i ];
377+ const int64_t v64 = (int64_t )v -> vec [j ].coeffs [i ];
378+ /* Helper assertions for proof efficiency. Do not remove */
379+ mld_assert (u64 >= 0 && u64 < MLDSA_Q );
380+ mld_assert (v64 > - MLD_NTT_BOUND && v64 < MLD_NTT_BOUND );
381+ t += (u64 * v64 );
382+ }
383+ return t ;
384+ }
385+
386+ void mld_polyvecl_pointwise_acc_montgomery (mld_poly * w , const mld_polyvecl * u ,
387+ const mld_polyvecl * v )
388+ {
389+ unsigned int i ;
390+
391+ mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
392+ mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
339393 for (i = 0 ; i < MLDSA_N ; i ++ )
340394 __loop__ (
341- assigns (i , j , object_whole (w ))
395+ assigns (i , object_whole (w ))
342396 invariant (i <= MLDSA_N )
343397 invariant (array_abs_bound (w -> coeffs , 0 , i , MLDSA_Q ))
344398 )
345399 {
346- int64_t t = 0 ;
347- int32_t r ;
348- for (j = 0 ; j < MLDSA_L ; j ++ )
349- __loop__ (
350- assigns (j , t )
351- invariant (j <= MLDSA_L )
352- invariant (t >= - (int64_t )j * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
353- invariant (t <= (int64_t )j * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
354- )
355- {
356- t += (int64_t )u -> vec [j ].coeffs [i ] * v -> vec [j ].coeffs [i ];
357- }
358-
359- r = mld_montgomery_reduce (t );
360- w -> coeffs [i ] = r ;
400+ w -> coeffs [i ] =
401+ mld_montgomery_reduce (mld_pointwise_sum_of_products (u , v , i ));
361402 }
362403
363404 mld_assert_abs_bound (w -> coeffs , MLDSA_N , MLDSA_Q );
405+ }
406+
364407#endif /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
365408 MLD_CONFIG_PARAMETER_SET == 44) && \
366409 !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
367410 MLD_CONFIG_PARAMETER_SET == 65) && \
368411 !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
369412 MLD_CONFIG_PARAMETER_SET == 87) */
370- }
371413
372414MLD_INTERNAL_API
373415uint32_t mld_polyvecl_chknorm (const mld_polyvecl * v , int32_t bound )
0 commit comments