@@ -44,22 +44,14 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
4444 * of the same parent object.
4545 */
4646
47- MLD_ALIGN uint8_t seed_ext [4 ][MLD_ALIGN_UP (MLDSA_SEEDBYTES + 2 )];
48-
49- for (j = 0 ; j < 4 ; j ++ )
50- __loop__ (
51- assigns (j , object_whole (seed_ext ))
52- invariant (j <= 4 )
53- )
54- {
55- mld_memcpy (seed_ext [j ], rho , MLDSA_SEEDBYTES );
56- }
47+ MLD_ALIGN uint8_t single_seed [MLD_ALIGN_UP (MLDSA_SEEDBYTES + 2 )];
5748
5849#if !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY )
50+ MLD_ALIGN uint8_t batched_seeds [4 ][MLD_ALIGN_UP (MLDSA_SEEDBYTES + 2 )];
5951 /* Sample 4 matrix entries a time. */
6052 for (i = 0 ; i < (MLDSA_K * MLDSA_L / 4 ) * 4 ; i += 4 )
6153 __loop__ (
62- assigns (i , j , object_whole (seed_ext ), memory_slice (mat , MLDSA_K * sizeof (mld_polyvecl )))
54+ assigns (i , j , object_whole (batched_seeds ), memory_slice (mat , MLDSA_K * sizeof (mld_polyvecl )))
6355 invariant (i <= (MLDSA_K * MLDSA_L / 4 ) * 4 && i % 4 == 0 )
6456 /* vectors 0 .. i / MLDSA_L are completely sampled */
6557 invariant (forall (k1 , 0 , i / MLDSA_L , forall (l1 , 0 , MLDSA_L ,
@@ -71,31 +63,38 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
7163 {
7264 for (j = 0 ; j < 4 ; j ++ )
7365 __loop__ (
74- assigns (j , object_whole (seed_ext ))
66+ assigns (j , object_whole (batched_seeds ))
7567 invariant (j <= 4 )
7668 )
7769 {
7870 uint8_t x = (uint8_t )((i + j ) / MLDSA_L );
7971 uint8_t y = (uint8_t )((i + j ) % MLDSA_L );
8072
81- seed_ext [j ][MLDSA_SEEDBYTES + 0 ] = y ;
82- seed_ext [j ][MLDSA_SEEDBYTES + 1 ] = x ;
73+ mld_memcpy (batched_seeds [j ], rho , MLDSA_SEEDBYTES );
74+ batched_seeds [j ][MLDSA_SEEDBYTES + 0 ] = y ;
75+ batched_seeds [j ][MLDSA_SEEDBYTES + 1 ] = x ;
8376 }
8477
8578 mld_poly_uniform_4x (& mat [i / MLDSA_L ].vec [i % MLDSA_L ],
8679 & mat [(i + 1 ) / MLDSA_L ].vec [(i + 1 ) % MLDSA_L ],
8780 & mat [(i + 2 ) / MLDSA_L ].vec [(i + 2 ) % MLDSA_L ],
8881 & mat [(i + 3 ) / MLDSA_L ].vec [(i + 3 ) % MLDSA_L ],
89- seed_ext );
82+ batched_seeds );
9083 }
84+
85+ /* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
86+ mld_zeroize (batched_seeds , sizeof (batched_seeds ));
87+
9188#else /* !MLD_CONFIG_SERIAL_FIPS202_ONLY */
9289 i = 0 ;
9390#endif /* MLD_CONFIG_SERIAL_FIPS202_ONLY */
9491
92+ mld_memcpy (single_seed , rho , MLDSA_SEEDBYTES );
93+
9594 /* For MLDSA_K=6, MLDSA_L=5, process the last two entries individually */
9695 while (i < MLDSA_K * MLDSA_L )
9796 __loop__ (
98- assigns (i , object_whole (seed_ext ), memory_slice (mat , MLDSA_K * sizeof (mld_polyvecl )))
97+ assigns (i , object_whole (single_seed ), memory_slice (mat , MLDSA_K * sizeof (mld_polyvecl )))
9998 invariant (i <= MLDSA_K * MLDSA_L )
10099 /* vectors 0 .. i / MLDSA_L are completely sampled */
101100 invariant (forall (k1 , 0 , i / MLDSA_L , forall (l1 , 0 , MLDSA_L ,
@@ -109,27 +108,31 @@ void mld_polyvec_matrix_expand(mld_polyvecl mat[MLDSA_K],
109108 uint8_t y = (uint8_t )(i % MLDSA_L );
110109 mld_poly * this_poly = & mat [i / MLDSA_L ].vec [i % MLDSA_L ];
111110
112- seed_ext [ 0 ] [MLDSA_SEEDBYTES + 0 ] = y ;
113- seed_ext [ 0 ] [MLDSA_SEEDBYTES + 1 ] = x ;
111+ single_seed [MLDSA_SEEDBYTES + 0 ] = y ;
112+ single_seed [MLDSA_SEEDBYTES + 1 ] = x ;
114113
115- mld_poly_uniform (this_poly , seed_ext [ 0 ] );
114+ mld_poly_uniform (this_poly , single_seed );
116115 i ++ ;
117116 }
118117
119118 /*
120119 * The public matrix is generated in NTT domain. If the native backend
121- * uses a custom order in NTT domain, permute A accordingly.
120+ * uses a custom order in NTT domain, permute A accordingly. This does
121+ * not affect the bounds on the coefficients, so we ignore this for CBMC
122+ * to simplify proof.
122123 */
124+ #ifndef CBMC
123125 for (i = 0 ; i < MLDSA_K ; i ++ )
124126 {
125127 for (j = 0 ; j < MLDSA_L ; j ++ )
126128 {
127129 mld_poly_permute_bitrev_to_custom (mat [i ].vec [j ].coeffs );
128130 }
129131 }
132+ #endif /* !CBMC */
130133
131134 /* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */
132- mld_zeroize (seed_ext , sizeof (seed_ext ));
135+ mld_zeroize (single_seed , sizeof (single_seed ));
133136}
134137
135138MLD_INTERNAL_API
@@ -234,7 +237,6 @@ void mld_polyvecl_add(mld_polyvecl *u, const mld_polyvecl *v)
234237 invariant (i <= MLDSA_L )
235238 invariant (forall (k0 , i , MLDSA_L ,
236239 forall (k1 , 0 , MLDSA_N , u -> vec [k0 ].coeffs [k1 ] == loop_entry (* u ).vec [k0 ].coeffs [k1 ])))
237- invariant (forall (k4 , 0 , i , forall (k5 , 0 , MLDSA_N , u -> vec [k4 ].coeffs [k5 ] == loop_entry (* u ).vec [k4 ].coeffs [k5 ] + v -> vec [k4 ].coeffs [k5 ])))
238240 invariant (forall (k6 , 0 , i , array_bound (u -> vec [k6 ].coeffs , 0 , MLDSA_N , INT32_MIN , REDUCE32_DOMAIN_MAX )))
239241 )
240242 {
@@ -302,87 +304,129 @@ void mld_polyvecl_pointwise_poly_montgomery(mld_polyvecl *r, const mld_poly *a,
302304 mld_assert_abs_bound_2d (r -> vec , MLDSA_L , MLDSA_N , MLDSA_Q );
303305}
304306
307+ #if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 ) && \
308+ MLD_CONFIG_PARAMETER_SET == 44
309+
305310MLD_INTERNAL_API
306311void mld_polyvecl_pointwise_acc_montgomery (mld_poly * w , const mld_polyvecl * u ,
307312 const mld_polyvecl * v )
308313{
309- #if defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 ) && \
310- MLD_CONFIG_PARAMETER_SET == 44
311314 /* TODO: proof */
312315 mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
313316 mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
314317 mld_polyvecl_pointwise_acc_montgomery_l4_native (
315318 w -> coeffs , (const int32_t (* )[MLDSA_N ])u -> vec ,
316319 (const int32_t (* )[MLDSA_N ])v -> vec );
317320 mld_assert_abs_bound (w -> coeffs , MLDSA_N , MLDSA_Q );
321+ }
322+
318323#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 ) && \
319324 MLD_CONFIG_PARAMETER_SET == 65
325+
326+ void mld_polyvecl_pointwise_acc_montgomery (mld_poly * w , const mld_polyvecl * u ,
327+ const mld_polyvecl * v )
328+ {
320329 /* TODO: proof */
321330 mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
322331 mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
323332 mld_polyvecl_pointwise_acc_montgomery_l5_native (
324333 w -> coeffs , (const int32_t (* )[MLDSA_N ])u -> vec ,
325334 (const int32_t (* )[MLDSA_N ])v -> vec );
326335 mld_assert_abs_bound (w -> coeffs , MLDSA_N , MLDSA_Q );
336+ }
337+
327338#elif defined(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 ) && \
328339 MLD_CONFIG_PARAMETER_SET == 87
340+ void mld_polyvecl_pointwise_acc_montgomery (mld_poly * w , const mld_polyvecl * u ,
341+ const mld_polyvecl * v )
342+ {
329343 /* TODO: proof */
330344 mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
331345 mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
332346 mld_polyvecl_pointwise_acc_montgomery_l7_native (
333347 w -> coeffs , (const int32_t (* )[MLDSA_N ])u -> vec ,
334348 (const int32_t (* )[MLDSA_N ])v -> vec );
335349 mld_assert_abs_bound (w -> coeffs , MLDSA_N , MLDSA_Q );
336- #else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
337- MLD_CONFIG_PARAMETER_SET == 44) && \
338- !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
339- MLD_CONFIG_PARAMETER_SET == 65) && \
340- MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
341- MLD_CONFIG_PARAMETER_SET == 87 */
342- unsigned int i , j ;
343- mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
344- mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
345- /* The first input is bounded by [0, Q-1] inclusive
346- * The second input is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
350+ }
351+
352+ #else /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
353+ MLD_CONFIG_PARAMETER_SET == 44) && \
354+ !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
355+ MLD_CONFIG_PARAMETER_SET == 65) && \
356+ MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
357+ MLD_CONFIG_PARAMETER_SET == 87 */
358+
359+ static int64_t mld_pointwise_sum_of_products (const mld_polyvecl * u ,
360+ const mld_polyvecl * v ,
361+ unsigned int i )
362+ __contract__ (
363+ requires (memory_no_alias (u , sizeof (mld_polyvecl )))
364+ requires (memory_no_alias (v , sizeof (mld_polyvecl )))
365+ requires (i < MLDSA_N )
366+ requires (forall (l0 , 0 , MLDSA_L ,
367+ array_bound (u - > vec [l0 ].coeffs , 0 , MLDSA_N , 0 , MLDSA_Q )))
368+ requires (forall (l1 , 0 , MLDSA_L ,
369+ array_abs_bound (v - > vec [l1 ].coeffs , 0 , MLDSA_N , MLD_NTT_BOUND )))
370+ ensures (return_value >= - (int64_t ) MLDSA_L * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
371+ ensures (return_value <= (int64_t ) MLDSA_L * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
372+ )
373+ {
374+ /* Input vector u is bounded by [0, Q-1] inclusive
375+ * Input vector v is bounded by [-9Q+1, 9Q-1] inclusive . Hence, we can
347376 * safely accumulate in 64-bits without intermediate reductions as
348377 * MLDSA_L * (MLD_NTT_BOUND-1) * (Q-1) < INT64_MAX
349378 *
350379 * The worst case is ML-DSA-87: 7 * (9Q-1) * (Q-1) < 2**52
351380 * (and likewise for negative values)
352381 */
353382
383+ int64_t t = 0 ;
384+ unsigned int j ;
385+ for (j = 0 ; j < MLDSA_L ; j ++ )
386+ __loop__ (
387+ assigns (j , t )
388+ invariant (j <= MLDSA_L )
389+ invariant (t >= - (int64_t )j * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
390+ invariant (t <= (int64_t )j * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
391+ )
392+ {
393+ const int64_t u64 = (int64_t )u -> vec [j ].coeffs [i ];
394+ const int64_t v64 = (int64_t )v -> vec [j ].coeffs [i ];
395+ /* Helper assertions for proof efficiency. Do not remove */
396+ mld_assert (u64 >= 0 && u64 < MLDSA_Q );
397+ mld_assert (v64 > - MLD_NTT_BOUND && v64 < MLD_NTT_BOUND );
398+ t += (u64 * v64 );
399+ }
400+ return t ;
401+ }
402+
403+ void mld_polyvecl_pointwise_acc_montgomery (mld_poly * w , const mld_polyvecl * u ,
404+ const mld_polyvecl * v )
405+ {
406+ unsigned int i ;
407+
408+ mld_assert_bound_2d (u -> vec , MLDSA_L , MLDSA_N , 0 , MLDSA_Q );
409+ mld_assert_abs_bound_2d (v -> vec , MLDSA_L , MLDSA_N , MLD_NTT_BOUND );
354410 for (i = 0 ; i < MLDSA_N ; i ++ )
355411 __loop__ (
356- assigns (i , j , object_whole (w ))
412+ assigns (i , object_whole (w ))
357413 invariant (i <= MLDSA_N )
358414 invariant (array_abs_bound (w -> coeffs , 0 , i , MLDSA_Q ))
359415 )
360416 {
361- int64_t t = 0 ;
362- int32_t r ;
363- for (j = 0 ; j < MLDSA_L ; j ++ )
364- __loop__ (
365- assigns (j , t )
366- invariant (j <= MLDSA_L )
367- invariant (t >= - (int64_t )j * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
368- invariant (t <= (int64_t )j * (MLDSA_Q - 1 )* (MLD_NTT_BOUND - 1 ))
369- )
370- {
371- t += (int64_t )u -> vec [j ].coeffs [i ] * v -> vec [j ].coeffs [i ];
372- }
373-
374- r = mld_montgomery_reduce (t );
375- w -> coeffs [i ] = r ;
417+ w -> coeffs [i ] =
418+ mld_montgomery_reduce (mld_pointwise_sum_of_products (u , v , i ));
376419 }
377420
378421 mld_assert_abs_bound (w -> coeffs , MLDSA_N , MLDSA_Q );
422+ }
423+
379424#endif /* !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4 && \
380425 MLD_CONFIG_PARAMETER_SET == 44) && \
381426 !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5 && \
382427 MLD_CONFIG_PARAMETER_SET == 65) && \
383428 !(MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7 && \
384429 MLD_CONFIG_PARAMETER_SET == 87) */
385- }
386430
387431MLD_INTERNAL_API
388432uint32_t mld_polyvecl_chknorm (const mld_polyvecl * v , int32_t bound )
0 commit comments