Skip to content

Commit 220ca67

Browse files
adieneschriselrod
andauthored
improve performance of powermod (#59930)
based off of and closes #54866 differences to #54866 are: * checks `iszero(r)` in `mod(a, ::SignedMultiplicativeInverse)` for cases like `mod(0, multiplicativeinverse(-5))` * catches some cases of mismatched signed-ness of `x, m` that were failing * added a very very rough heuristic `(p > 2sizeof(mm))` so we don't bother computing the mi when the power is very small * switched to LSB multiplication loop from MSB benchmark: ``` using BenchmarkTools function setup() x,p,m = rand(NTuple{3, Int}) while iszero(m) || ((p < 0) && !isone(gcd(x, m))) x,p,m = rand(NTuple{3, Int}) end return (x, p, m) end @benchmark powermod(x, p, m) setup=((x,p,m) = setup()) #master BenchmarkTools.Trial: 10000 samples with 10 evaluations per sample. Range (min … max): 1.304 μs … 8.938 μs ┊ GC (min … max): 0.00% … 0.00% Time (median): 1.779 μs ┊ GC (median): 0.00% Time (mean ± σ): 1.803 μs ± 270.848 ns ┊ GC (mean ± σ): 0.00% ± 0.00% ▂▄▅████▇▆▄▁▁ ▁▁▁▁▁▁▁▂▂▃▅▅▇█████████████▆▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▃ 1.3 μs Histogram: frequency by time 2.76 μs < Memory estimate: 0 bytes, allocs estimate: 0. #PR julia> @benchmark powermod(x, p, m) setup=((x,p,m) = setup()) BenchmarkTools.Trial: 10000 samples with 135 evaluations per sample. Range (min … max): 667.896 ns … 1.267 μs ┊ GC (min … max): 0.00% … 0.00% Time (median): 854.015 ns ┊ GC (median): 0.00% Time (mean ± σ): 886.407 ns ± 74.610 ns ┊ GC (mean ± σ): 0.00% ± 0.00% ▂▃▆██▅▂ ▂▂▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▅▆▇████████▆▅▄▃▃▃▃▃▄▄▅▆▆▇▇████▇▆▆▅▄▃▃▃▃▂▂ ▄ 668 ns Histogram: frequency by time 1.06 μs < Memory estimate: 0 bytes, allocs estimate: 0. ``` Co-authored-by: Chris Elrod <[email protected]>
1 parent a3cf9b1 commit 220ca67

File tree

3 files changed

+51
-13
lines changed

3 files changed

+51
-13
lines changed

base/intfuncs.jl

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -535,20 +535,47 @@ function powermod(x::Integer, p::Integer, m::T) where T<:Integer
535535
end
536536
end
537537
(m == 1 || m == -1) && return zero(m)
538-
b = oftype(m,mod(x,m)) # this also checks for divide by zero
539-
540-
t = prevpow(2, p)
541-
r = 1
542-
while true
543-
if p >= t
544-
r = mod(widemul(r,b),m)
545-
p -= t
538+
539+
mm = uabs(m)
540+
rr = one(mm)
541+
bb = oftype(mm, mod(x, mm))
542+
543+
# legal && profitable
544+
if _powermod_mi_legal(mm) && (p > 2sizeof(mm))
545+
if bb == 0
546+
rr = zero(mm)
547+
else
548+
mis = MultiplicativeInverses.multiplicativeinverse(mm)
549+
Base.@assume_effects :terminates_locally while true
550+
if (p & 1) != 0
551+
rr = mod(rr * bb, mis)
552+
end
553+
p >>= 1
554+
p == 0 && break
555+
bb = mod(bb * bb, mis)
556+
end
557+
end
558+
else
559+
if bb == 0
560+
rr = zero(mm)
561+
else
562+
Base.@assume_effects :terminates_locally while true
563+
if (p & 1) != 0
564+
rr = oftype(mm, mod(widemul(rr, bb), mm))
565+
end
566+
p >>= 1
567+
p == 0 && break
568+
bb = oftype(mm, mod(widemul(bb, bb), mm))
569+
end
546570
end
547-
t >>>= 1
548-
t <= 0 && break
549-
r = mod(widemul(r,r),m)
550571
end
551-
return r
572+
r = oftype(m, rr)
573+
return (iszero(r) || (m > 0)) ? r : r + m
574+
end
575+
576+
_powermod_mi_legal(::Integer) = false
577+
function _powermod_mi_legal(mm::T) where {T<:Unsigned}
578+
return Base.hastypemax(T) && (mm <= (typemax(T) >> (sizeof(T) << 2)))
552579
end
553580

554581
# optimization: promote the modulus m to BigInt only once (cf. widemul in generic powermod above)

base/multinverses.jl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
module MultiplicativeInverses
44

5-
import Base: div, divrem, mul_hi, rem, unsigned
5+
import Base: div, divrem, mul_hi, rem, unsigned, mod
66
using Base: IndexLinear, IndexCartesian, tail
77
export multiplicativeinverse
88

@@ -153,6 +153,13 @@ function divrem(a::T, b::MultiplicativeInverse{T}) where T
153153
(d, a - d*b.divisor)
154154
end
155155

156+
mod(a::T, b::UnsignedMultiplicativeInverse{T}) where {T} = rem(a, b)
157+
158+
function mod(a::T, b::SignedMultiplicativeInverse{T}) where {T}
159+
r = rem(a, b)
160+
return (iszero(r) || signbit(r) == signbit(b.divisor)) ? r : r + b.divisor
161+
end
162+
156163
multiplicativeinverse(x::Signed) = SignedMultiplicativeInverse(x)
157164
multiplicativeinverse(x::Unsigned) = UnsignedMultiplicativeInverse(x)
158165

test/intfuncs.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,10 @@ end
385385

386386
@test powermod(-3, 0x80, 7) === 2
387387
@test powermod(0x03, 0x80, 0x07) === 0x02
388+
389+
@test powermod(511, 1, 0x00000021) === 0x00000010
390+
@test powermod(Int8(-1), 0xff, Int8(33)) === Int8(32)
391+
@test powermod(0, 10, -5) === 0
388392
end
389393

390394
@testset "nextpow/prevpow" begin

0 commit comments

Comments
 (0)