diff --git a/perf/lanczos/Project.toml b/perf/lanczos/Project.toml new file mode 100644 index 0000000000..0d00c55007 --- /dev/null +++ b/perf/lanczos/Project.toml @@ -0,0 +1,7 @@ +[deps] +PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +Reactant = "3c362404-f566-11ee-1572-e11a4b42c853" +Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" + +[sources] +Reactant = {path = "../.."} diff --git a/perf/lanczos/README.md b/perf/lanczos/README.md new file mode 100644 index 0000000000..411f06105a --- /dev/null +++ b/perf/lanczos/README.md @@ -0,0 +1,19 @@ +# Lanczos benchmark + +This benchmark implements the Lanczos algorithm for top-k eigenvalue decomposition of a Hermitian matrix. + +> **require** $A, b$ +> +> $\beta_1 v_1 = b$ s.t. $||v_1|| = 1$ +> +> **for** $k = 1, 2, \dots$ +> +> $\quad\alpha_k = v_k^\dagger A v_k$ +> +> $\quad\beta_{k+1} v_{k+1} = A v_k - \alpha_k v_k - \beta_k^* v_{k-1}$ +> +> **end** + +$A V_k = V_k T_k + \beta_{k+1,k} v_{k+1}e_k^T = V_{k+1}T_{k+1,k}$ + +$V_k^\dagger V_k = \mathbb{I}_k$ diff --git a/perf/lanczos/main.jl b/perf/lanczos/main.jl new file mode 100644 index 0000000000..d3f3279190 --- /dev/null +++ b/perf/lanczos/main.jl @@ -0,0 +1,138 @@ +using Reactant +using Reactant: Ops, TracedRNumber +using LinearAlgebra +using Random +using Statistics +using BenchmarkTools +using PrettyTables +using Unitful + +# setup +Random.seed!(0) + +A = rand(Float64, 512, 512) +A = A' * A # make it hermitian +@assert ishermitian(A) + +b = normalize!(rand(Float64, 512)) + +A_re = Reactant.to_rarray(A) +b_re = Reactant.to_rarray(b) + +# fixes +# TODO move to Reactant +function Base.zeros(::Type{TracedRNumber{T}}, dims::NTuple{N,<:Integer}) where {T,N} + _zero = Ops.constant(zero(T)) + return Ops.broadcast_in_dim(_zero, Int[], collect(dims)) +end + +# algorithm +# - A: matrix to (partially) decompose. lanczos requires it to be symmetric/hermitian. +# - v0: initial vector, should be normalized. +# - m: decomposition rank +function lanczos(A, v0, m) + n = size(A, 1) + V = zeros(eltype(A), n, m + 1) + T = zeros(eltype(A), m, m) + + v = v0 / norm(v0) + V[:, 1] = v + beta = 0.0 + w = similar(v) + + @allowscalar for j in 1:m + w .= A * v + if j > 1 + w .-= beta * V[:, j - 1] + end + alpha = dot(w, v) + w .-= alpha * v + beta = norm(w) + + T[j, j] = alpha + if j < m + T[j, j + 1] = beta + T[j + 1, j] = beta + end + + # early termination if Krylov subspace is reached + # TODO Reactant.@trace doesn't support return statements yet + # @trace if beta < eps(eltype(A)) + # return V[:, 1:j], T[1:j, 1:j] + # end + + # full reorthogonalization via modified Gram-Schmidt to avoid spurious duplicate eigenvalues + # TODO implicitly restarted Lanczos? available at KrylovKit + for k in 1:j + w .-= dot(V[:, k], w) * V[:, k] + end + + v = w / beta + V[:, j + 1] = v + end + + return V, T +end + +V, T = lanczos(A, b, 512) +eigvals(T) + +l1_error = sum(abs.(eigvals(A) - eigvals(T))) +l2_error = sqrt(sum(abs2.(eigvals(A) - eigvals(T)))) +linf_error = maximum(abs.(eigvals(A) - eigvals(T))) +@info "Error" l1 = l1_error l2 = l2_error linf = linf_error + +# benchmarking +krylovdim = 16 # considered constant + +T = typeof(1.0u"ns") +results = Vector{Tuple{String,String,T,T,Float64}}() + +# primal +## only XLA +f_xla = @compile compile_options = Reactant.DefaultXLACompileOptions(; sync=true) lanczos( + A_re, b_re, krylovdim +) +b = @benchmark $f_xla($A_re, $b_re, krylovdim) setup = (GC.gc(true)) +baseline = median(b).time +push!( + results, ("Primal", "Only XLA", median(b).time * 1.0u"ns", std(b).time * 1.0u"ns", 1.0) +) + +## default +f_default = @compile sync = true lanczos(A_re, b_re, krylovdim) +b = @benchmark $f_default($A_re, $b_re, krylovdim) setup = (GC.gc(true)) +push!( + results, + ( + "Primal", + "Default", + median(b).time * 1u"ns", + std(b).time * 1u"ns", + median(b).time / baseline, + ), +) + +# print results +header = ( + ["Mode", "Optimization Passes", "Median Time", "Std. Dev. Time", "Relative Timing"], + ["", "", "μs", "μs", "Time / XLA Time"], +) + +let results = copy(results) + results = permutedims(stack(collect.(results)), (2, 1)) + results[:, 3] .= uconvert.(u"μs", results[:, 3]) + results[:, 4] .= uconvert.(u"μs", results[:, 4]) + + hl_r = Highlighter((data, i, j) -> j == 5 && data[i, j] > 1.0, crayon"bold red") + hl_g = Highlighter((data, i, j) -> j == 5 && data[i, j] < 1.0, crayon"bold green") + display( + pretty_table( + results; + header, + header_crayon=crayon"yellow bold", + highlighters=(hl_r, hl_g), + tf=tf_unicode_rounded, + ), + ) +end \ No newline at end of file