Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Temporary
results.csv
*.DS_Store
*.swp
*.jl.cov
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ There are three classes of benchmarks:
| | linked | list.jl | Small pointer-heavy data structure. |
| | | tree.jl | Small pointer-heavy data structure. |
| | strings | strings.jl | |
| | forwarddiff | fdiff.jl| ForwardDiff matrix benchmark |
| Multithreaded | binary_tree | tree_immutable.jl | Small pointer-heavy data structure. |
| | | tree_mutable.jl | Small pointer-heavy data structure. |
| Slow | rb\_tree | rb\_tree.jl | Pointer graph whose minimum linear arrangement has cost Θ(n²). |
Expand Down
4 changes: 4 additions & 0 deletions benches/serial/forwarddiff/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[deps]
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
244 changes: 244 additions & 0 deletions benches/serial/forwarddiff/fdiff.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
include(joinpath("..", "..", "..", "utils.jl"))

using LinearAlgebra, Statistics, ForwardDiff

const BENCH_OPNORMS = (66.0, 33.0, 22.0, 11.0, 6.0, 3.0, 2.0, 0.5, 0.03, 0.001)

function randmatrices(n)
A = randn(n, n)
op = opnorm(A, 1)
map(BENCH_OPNORMS) do x
(x / op) .* A
end
end

function _matevalpoly!(B, C, D, A::AbstractMatrix{T}, t::NTuple{1}) where {T}
M = size(A, 1)
te = T(last(t))
@inbounds for n = 1:M, m = 1:M
B[m, n] = ifelse(m == n, te, zero(te))
end
@inbounds for n = 1:M, k = 1:M, m = 1:M
B[m, n] = muladd(A[m, k], D[k, n], B[m, n])
end
return B
end
function _matevalpoly!(B, C, D, A::AbstractMatrix{T}, t::NTuple) where {T}
M = size(A, 1)
te = T(last(t))
@inbounds for n = 1:M, m = 1:M
C[m, n] = ifelse(m == n, te, zero(te))
end
@inbounds for n = 1:M, k = 1:M, m = 1:M
C[m, n] = muladd(A[m, k], D[k, n], C[m, n])
end
_matevalpoly!(B, D, C, A, Base.front(t))
end
function matevalpoly!(B, C, D, A::AbstractMatrix{T}, t::NTuple) where {T}
t1 = Base.front(t)
te = T(last(t))
tp = T(last(t1))
@inbounds for j in axes(A, 2), i in axes(A, 1)
D[i, j] = muladd(A[i, j], te, ifelse(i == j, tp, zero(tp)))
end
_matevalpoly!(B, C, D, A, Base.front(t1))
end
function matevalpoly!(B, C, D, A::AbstractMatrix{T}, t::NTuple{2}) where {T}
t1 = Base.front(t)
te = T(last(t))
tp = T(last(t1))
@inbounds for j in axes(A, 2), i in axes(A, 1)
B[i, j] = muladd(A[i, j], te, ifelse(i == j, tp, zero(tp)))
end
return B
end
ceillog2(x::Float64) =
(reinterpret(Int, x) - 1) >> Base.significand_bits(Float64) - 1022

naive_matmul!(C, A, B) = @inbounds for n in axes(C, 2), m in axes(C, 1)
Cmn = zero(eltype(C))
for k in axes(A, 2)
Cmn = muladd(A[m, k], B[k, n], Cmn)
end
C[m, n] = Cmn
end
naive_matmuladd!(C, A, B) = @inbounds for n in axes(C, 2), m in axes(C, 1)
Cmn = zero(eltype(C))
for k in axes(A, 2)
Cmn = muladd(A[m, k], B[k, n], Cmn)
end
C[m, n] += Cmn
end
_deval(x) = x
_deval(x::ForwardDiff.Dual) = _deval(ForwardDiff.value(x))

function opnorm1(A)
n = _deval(zero(eltype(A)))
@inbounds for j in axes(A, 2)
s = _deval(zero(eltype(A)))
@fastmath for i in axes(A, 1)
s += abs(_deval(A[i, j]))
end
n = max(n, s)
end
return n
end

function expm!(
Z::AbstractMatrix,
A::AbstractMatrix,
(matmul!)=naive_matmul!,
(matmuladd!)=naive_matmuladd!
)
# omitted: matrix balancing, i.e., LAPACK.gebal!
# nA = maximum(sum(abs.(A); dims=Val(1))) # marginally more performant than norm(A, 1)
nA = opnorm1(A)
N = LinearAlgebra.checksquare(A)
# B and C are temporaries
## For sufficiently small nA, use lower order Padé-Approximations
A2 = similar(A)
matmul!(A2, A, A)
if nA <= 2.1
U = Z
V = similar(A)
if nA <= 0.015
matevalpoly!(V, nothing, nothing, A2, (60, 1))
matmul!(U, A, V)
matevalpoly!(V, nothing, nothing, A2, (120, 12))
else
B = similar(A)
if nA <= 0.25
matevalpoly!(V, nothing, U, A2, (15120, 420, 1))
matmul!(U, A, V)
matevalpoly!(V, nothing, B, A2, (30240, 3360, 30))
else
C = similar(A)
if nA <= 0.95
matevalpoly!(V, C, U, A2, (8648640, 277200, 1512, 1))
matmul!(U, A, V)
matevalpoly!(V, B, C, A2, (17297280, 1995840, 25200, 56))
else
matevalpoly!(V, C, U, A2, (8821612800, 302702400, 2162160, 3960, 1))
matmul!(U, A, V)
matevalpoly!(
V,
B,
C,
A2,
(17643225600, 2075673600, 30270240, 110880, 90)
)
end
end
end
@inbounds for m = 1:N*N
u = U[m]
v = V[m]
U[m] = v + u
V[m] = v - u
end
ldiv!(lu!(V), U)
expA = U
# expA = (V - U) \ (V + U)
else
si = ceillog2(nA / 5.4) # power of 2 later reversed by squaring
if si > 0
A = A / exp2(si)
end
A4 = similar(A)
A6 = similar(A)
matmul!(A4, A2, A2)
matmul!(A6, A2, A4)

U = Z
B = zero(A)
@inbounds for m = 1:N
B[m, m] = 32382376266240000
end
@inbounds for m = 1:N*N
a6 = A6[m]
a4 = A4[m]
a2 = A2[m]
B[m] = muladd(
33522128640,
a6,
muladd(10559470521600, a4, muladd(1187353796428800, a2, B[m]))
)
U[m] = muladd(16380, a4, muladd(40840800, a2, a6))
end
matmuladd!(B, A6, U)
matmul!(U, A, B)

V = si > 0 ? fill!(A, 0) : zero(A)
@inbounds for m = 1:N
V[m, m] = 64764752532480000
end
@inbounds for m = 1:N*N
a6 = A6[m]
a4 = A4[m]
a2 = A2[m]
B[m] = muladd(182, a6, muladd(960960, a4, 1323241920 * a2))
V[m] = muladd(
670442572800,
a6,
muladd(129060195264000, a4, muladd(7771770303897600, a2, V[m]))
)
end
matmuladd!(V, A6, B)

@inbounds for m = 1:N*N
u = U[m]
v = V[m]
U[m] = v + u
V[m] = v - u
end
ldiv!(lu!(V), U)
expA = U
# expA = (V - U) \ (V + U)

if si > 0 # squaring to reverse dividing by power of 2
for _ = 1:si
matmul!(V, expA, expA)
expA, V = V, expA
end
if Z !== expA
copyto!(Z, expA)
expA = Z
end
end
end
expA
end
expm_bad!(Z, A) = expm!(Z, A, mul!, (C, A, B) -> mul!(C, A, B, 1.0, 1.0))

d(x, n) = ForwardDiff.Dual(x, ntuple(_ -> randn(), n))
function dualify(A, n, j)
if n > 0
A = d.(A, n)
if (j > 0)
A = d.(A, j)
end
end
A
end
struct ForEach{A,B,F}
f::F
a::A
b::B
end
ForEach(f, b) = ForEach(f, nothing, b)
(f::ForEach)() = foreach(Base.Fix1(f.f, f.a), f.b)
(f::ForEach{Nothing})() = foreach(f.f, f.b)

l = 2;
j = 2;
n = 7;
const As = map(x -> dualify(x, n, j), randmatrices(l));
const B = similar(As[1]);

function foo()
for i in 1:400000
ForEach(expm_bad!, B, As)()
end
end

@gctime foo()