Skip to content

Commit 7aea254

Browse files
more fixes
1 parent b1b2e6b commit 7aea254

File tree

3 files changed

+81
-5
lines changed

3 files changed

+81
-5
lines changed

Project.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
88
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
99
Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
1010
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
11-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1211
SparseBandedMatrices = "bd59d7e1-4699-4102-944e-d05209cb92aa"
1312
StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da"
1413
TriangularSolve = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
@@ -19,7 +18,6 @@ LinearAlgebra = "1.5"
1918
LoopVectorization = "0.10,0.11, 0.12"
2019
Polyester = "0.3.2,0.4.1, 0.5, 0.6, 0.7"
2120
PrecompileTools = "1"
22-
SparseArrays = "1.11.0"
2321
SparseBandedMatrices = "0.1.0"
2422
StrideArraysCore = "0.5.5"
2523
TriangularSolve = "0.2"

perf/lu.jl

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
using BenchmarkTools, Random
2+
using LinearAlgebra, RecursiveFactorization, VectorizationBase
3+
nc = min(Int(VectorizationBase.num_cores()), Threads.nthreads())
4+
BLAS.set_num_threads(nc)
5+
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 0.5
6+
7+
function luflop(m, n = m; innerflop = 2)
8+
sum(1:min(m, n)) do k
9+
invflop = 1
10+
scaleflop = isempty((k + 1):m) ? 0 : sum((k + 1):m)
11+
updateflop = isempty((k + 1):n) ? 0 :
12+
sum((k + 1):n) do j
13+
isempty((k + 1):m) ? 0 : sum((k + 1):m) do i
14+
innerflop
15+
end
16+
end
17+
invflop + scaleflop + updateflop
18+
end
19+
end
20+
21+
bas_mflops = Float64[]
22+
rec_mflops = Float64[]
23+
rec4_mflops = Float64[]
24+
rec800_mflops = Float64[]
25+
ref_mflops = Float64[]
26+
ns = 4:8:500
27+
for n in ns
28+
@info "$n × $n"
29+
rng = MersenneTwister(123)
30+
global A = rand(rng, n, n)
31+
bt = @belapsed LinearAlgebra.lu!(B) setup=(B = copy(A))
32+
push!(bas_mflops, luflop(n) / bt / 1e9)
33+
34+
rt = @belapsed RecursiveFactorization.lu!(B) setup=(B = copy(A))
35+
push!(rec_mflops, luflop(n) / rt / 1e9)
36+
37+
rt4 = @belapsed RecursiveFactorization.lu!(B; threshold = 4) setup=(B = copy(A))
38+
push!(rec4_mflops, luflop(n) / rt4 / 1e9)
39+
40+
rt800 = @belapsed RecursiveFactorization.lu!(B; threshold = 800) setup=(B = copy(A))
41+
push!(rec800_mflops, luflop(n) / rt800 / 1e9)
42+
43+
ref = @belapsed LinearAlgebra.generic_lufact!(B) setup=(B = copy(A))
44+
push!(ref_mflops, luflop(n) / ref / 1e9)
45+
end
46+
47+
using DataFrames, VegaLite
48+
blaslib = if VERSION v"1.7.0-beta2"
49+
config = BLAS.get_config().loaded_libs
50+
occursin("mkl_rt", config[1].libname) ? :MKL : :OpenBLAS
51+
else
52+
BLAS.vendor() === :mkl ? :MKL : :OpenBLAS
53+
end
54+
df = DataFrame(Size = ns,
55+
Reference = ref_mflops)
56+
setproperty!(df, blaslib, bas_mflops)
57+
setproperty!(df, Symbol("RF with default threshold"), rec_mflops)
58+
setproperty!(df, Symbol("RF fully recursive"), rec4_mflops)
59+
setproperty!(df, Symbol("RF fully iterative"), rec800_mflops)
60+
df = stack(df,
61+
[Symbol("RF with default threshold"),
62+
Symbol("RF fully recursive"),
63+
Symbol("RF fully iterative"),
64+
blaslib,
65+
:Reference], variable_name = :Library, value_name = :GFLOPS)
66+
plt = df |> @vlplot(:line, color={:Library, scale = {scheme = "category10"}},
67+
x={:Size}, y={:GFLOPS},
68+
width=1000, height=600)
69+
save(joinpath(homedir(), "Pictures",
70+
"lu_float64_$(VERSION)_$(Sys.CPU_NAME)_$(nc)cores_$blaslib.png"), plt)
71+
72+
#=
73+
using Plot
74+
plt = plot(ns, bas_mflops, legend=:bottomright, lab="OpenBLAS", title="LU Factorization Benchmark", marker=:auto, dpi=300)
75+
plot!(plt, ns, rec_mflops, lab="RecursiveFactorization", marker=:auto)
76+
plot!(plt, ns, ref_mflops, lab="Reference", marker=:auto)
77+
xaxis!(plt, "size (N x N)")
78+
yaxis!(plt, "GFLOPS")
79+
savefig("lubench.png")
80+
savefig("lubench.pdf")
81+
=#

src/butterflylu.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,17 +160,14 @@ function materializeUV(A, (uv,))
160160
🦋2!(view(Bu2, 1 : Mh, 1 : Nh), U₁u, U₁l)
161161
🦋2!(view(Bu2, Mh + 1: M, Nh + 1: N), U₂u, U₂l)
162162

163-
#Bu1 = spzeros(M, N)
164163
Bu1 = SparseBandedMatrix{typeof(uv[1])}(undef, M, N)
165164
🦋!(A, Bu1, Uu, Ul)
166165

167-
#Bv2 = spzeros(M, N)
168166
Bv2 = SparseBandedMatrix{typeof(uv[1])}(undef, M, N)
169167

170168
🦋2!(view(Bv2, 1 : Mh, 1 : Nh), V₁u, V₁l)
171169
🦋2!(view(Bv2, Mh + 1: M, Nh + 1: N), V₂u, V₂l)
172170

173-
#Bv1 = spzeros(M, N)
174171
Bv1 = SparseBandedMatrix{typeof(uv[1])}(undef, M, N)
175172
🦋!(A, Bv1, Vu, Vl)
176173

0 commit comments

Comments
 (0)