Skip to content

Commit bd64e91

Browse files
committed
asm/ct*_inverse_mod_*.pl: harmonize commentary with implementation.
1 parent 01d167c commit bd64e91

File tree

5 files changed

+24
-9
lines changed

5 files changed

+24
-9
lines changed

src/asm/ct_inverse_mod_256-armv8.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
v += mod
6464
if v < 0:
6565
v += mod
66-
elif v == 1<<512
66+
elif v == 1<<512:
6767
v -= mod
6868
6969
return v & (2**512 - 1) # to be reduced % mod

src/asm/ct_inverse_mod_256-x86_64.pl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
v += mod
6363
if v < 0:
6464
v += mod
65-
elif v == 1<<512
65+
elif v == 1<<512:
6666
v -= mod
6767
6868
return v & (2**512 - 1) # to be reduced % mod
@@ -366,10 +366,10 @@
366366
# bit-length of the |f?| and |g?| single-limb multiplicands. However!
367367
# The latter should not be taken literally, as they are always chosen so
368368
# that "bad things" don't happen. For example, there comes a point when
369-
# |v| grows beyond 383 bits, while |u| remains 383 bits wide. Yet, we
370-
# always call __smul_383x63 to perform |u|*|f0|+|v|*|g0| step. This is
369+
# |v| grows beyond 256 bits, while |u| remains 256 bits wide. Yet, we
370+
# always call __smulq_256x63 to perform |u|*|f0|+|v|*|g0| step. This is
371371
# because past that point |f0| is always 1 and |g0| is always 0. And,
372-
# since |u| never grows beyond 383 bits, __smul_767x63 doesn't have to
372+
# since |u| never grows beyond 256 bits, __smulq_512x63 doesn't have to
373373
# perform full-width |u|*|f1| multiplication, half-width one with sign
374374
# extension is sufficient...
375375
$code.=<<___;

src/asm/ct_inverse_mod_384-armv8.pl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
if b < 0:
4545
b, f1, g1 = -b, -f1, -g1
4646
47-
# __smul_767x63
47+
# __smul_768x63
4848
u, v = u*f0 + v*g0, u*f1 + v*g1
4949
5050
if 768 % k:
@@ -58,8 +58,13 @@
5858
5959
v = u*f1 + v*g1
6060
61+
mod <<= 768 - mod.bit_length() # align to the left
6162
if v < 0:
62-
v += mod << (768 - mod.bit_length()) # left aligned
63+
v += mod
64+
if v < 0:
65+
v += mod
66+
elif v == 1<<768:
67+
v -= mod
6368
6469
return v & (2**768 - 1) # to be reduced % mod
6570
___

src/asm/ctq_inverse_mod_384-x86_64.pl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,13 @@
5858
5959
v = u*f1 + v*g1
6060
61+
mod <<= 768 - mod.bit_length() # align to the left
6162
if v < 0:
62-
v += mod << (768 - mod.bit_length()) # left aligned
63+
v += mod
64+
if v < 0:
65+
v += mod
66+
elif v == 1<<768:
67+
v -= mod
6368
6469
return v & (2**768 - 1) # to be reduced % mod
6570
___

src/asm/ctx_inverse_mod_384-x86_64.pl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,13 @@
5757
5858
v = u*f1 + v*g1
5959
60+
mod <<= 768 - mod.bit_length() # align to the left
6061
if v < 0:
61-
v += mod << (768 - mod.bit_length()) # left aligned
62+
v += mod
63+
if v < 0:
64+
v += mod
65+
elif v == 1<<768:
66+
v -= mod
6267
6368
return v & (2**768 - 1) # to be reduced % mod
6469
___

0 commit comments

Comments
 (0)