Skip to content

Commit f520f98

Browse files
committed
Better explanation for Barrett division in decompose (NEON)
Adapt the new explanation to the NEON implementation. Signed-off-by: jammychiou1 <[email protected]>
1 parent d3c6070 commit f520f98

File tree

2 files changed

+58
-4
lines changed

2 files changed

+58
-4
lines changed

dev/aarch64_clean/src/poly_decompose_32_asm.S

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,37 @@
1111
.macro decompose32 a1, a, temp
1212
// range: 0 <= a <= Q-1 = 32*GAMMA2
1313

14+
/* check-magic: 523776 == 2 * intdiv(MLDSA_Q - 1, 32) */
15+
/* check-magic: 1074791425 == floor(2**49 / 523776) */
16+
/* check-magic: 575897802350002176 == 1 / (1 / 523776 - 1074791425 / 2^49) */
1417
// Compute a1 = round-(a / (2*GAMMA2)) = round-(a / 523776) ≈
1518
// round(a * 1074791425 / 2^49), where round-() denotes "round half
16-
// down". This is exact for 0 <= a < Q. Note that half is rounded down
17-
// since 1074791425 / 2^491 / 523776.
19+
// down". This is exact for 0 <= a < Q. We'll prove this in the
20+
// following paragraphs, in which we denote 2*GAMMA2 as B to avoid
21+
// clutter.
22+
//
23+
// Consider the (signed) error a * (1 / B - 1074791425 / 2^49) between
24+
// a / B and the (under-)approximation a * 1074791425 / 2^49. Because
25+
// eps := 1 / B - 1074791425 / 2^49 is 1 / 575897802350002176
26+
// 2^(-58.99) < 2^(-58), we have 0 <= a * eps < 2^23 * 2^(-58) =
27+
// 1 / 2^35 < 1 / 2^19 < 1 / B (note that a is non-negative).
28+
//
29+
// On the other hand, 1 / B is the spacing between the integral
30+
// multiples of 1 / B, which includes all rounding boundaries n + 0.5
31+
// (since B is even). Hence, if a / B is not of the form n + 0.5, then
32+
// it is at least 1 / B away from the nearest rounding boundary, so
33+
// moving from a / B to a * 1074791425 / 2^49 does not affect the
34+
// rounding result, no matter the type of rounding used in either side.
35+
// In particular, we have round-(a / B) = round(a * 1074791425 / 2^49)
36+
// as claimed.
37+
//
38+
// As for the remaining case where a / B _is_ of the form n + 0.5,
39+
// because a * 1074791425 / 2^49 is slightly but strictly below a / B =
40+
// n + 0.5 (note that a and thus the error a * eps cannot be 0 here), it
41+
// is always rounded down to n. More precisely, we have round-(a / B) =
42+
// round(a * 1074791425 / 2^49), where the round-down on the LHS is
43+
// essential, and on the RHS the type of rounding again does not matter.
44+
// This concludes the proof.
1845
sqdmulh \a1\().4s, \a\().4s, barrett_const.4s
1946
srshr \a1\().4s, \a1\().4s, #18
2047
// range: 0 <= a1 <= 16

dev/aarch64_clean/src/poly_decompose_88_asm.S

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,37 @@
1111
.macro decompose88 a1, a, temp
1212
// range: 0 <= a <= Q-1 = 88*GAMMA2
1313

14+
/* check-magic: 190464 == 2 * intdiv(MLDSA_Q - 1, 88) */
15+
/* check-magic: 1477838209 == floor(2**48 / 190464) */
16+
/* check-magic: 26177172834091008 == 35 / (1 / 190464 - 1477838209 / 2^48) */
1417
// Compute a1 = round-(a / (2*GAMMA2)) = round-(a / 190464) ≈
1518
// round(a * 1477838209 / 2^48), where round-() denotes "round half
16-
// down". This is exact for 0 <= a < Q. Note that half is rounded down
17-
// since 1477838209 / 2^481 / 190464.
19+
// down". This is exact for 0 <= a < Q. We'll prove this in the
20+
// following paragraphs, in which we denote 2*GAMMA2 as B to avoid
21+
// clutter.
22+
//
23+
// Consider the (signed) error a * (1 / B - 1477838209 / 2^48) between
24+
// a / B and the (under-)approximation a * 1477838209 / 2^48. Because
25+
// eps := 1 / B - 1477838209 / 2^48 is 35 / 26177172834091008
26+
// 2^(-49.41) < 2^(-49), we have 0 <= a * eps < 2^23 * 2^(-49) =
27+
// 1 / 2^26 < 1 / 2^18 < 1 / B (note that a is non-negative).
28+
//
29+
// On the other hand, 1 / B is the spacing between the integral
30+
// multiples of 1 / B, which includes all rounding boundaries n + 0.5
31+
// (since B is even). Hence, if a / B is not of the form n + 0.5, then
32+
// it is at least 1 / B away from the nearest rounding boundary, so
33+
// moving from a / B to a * 1477838209 / 2^48 does not affect the
34+
// rounding result, no matter the type of rounding used in either side.
35+
// In particular, we have round-(a / B) = round(a * 1477838209 / 2^48)
36+
// as claimed.
37+
//
38+
// As for the remaining case where a / B _is_ of the form n + 0.5,
39+
// because a * 1477838209 / 2^48 is slightly but strictly below a / B =
40+
// n + 0.5 (note that a and thus the error a * eps cannot be 0 here), it
41+
// is always rounded down to n. More precisely, we have round-(a / B) =
42+
// round(a * 1477838209 / 2^48), where the round-down on the LHS is
43+
// essential, and on the RHS the type of rounding again does not matter.
44+
// This concludes the proof.
1845
sqdmulh \a1\().4s, \a\().4s, barrett_const.4s
1946
srshr \a1\().4s, \a1\().4s, #17
2047
// range: 0 <= a1 <= 44

0 commit comments

Comments
 (0)