Skip to content

Commit 02bd68f

Browse files
committed
Change threshold for OpenBLAS/Haswell and update perf.jl
1 parent a031648 commit 02bd68f

File tree

2 files changed

+13
-10
lines changed

2 files changed

+13
-10
lines changed

perf/lu.jl

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using BenchmarkTools
1+
using BenchmarkTools, Random
22
using LinearAlgebra, RecursiveFactorization
33

44
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 0.5
@@ -24,7 +24,8 @@ ref_mflops = Float64[]
2424
ns = 4:8:500
2525
for n in ns
2626
@info "$n × $n"
27-
global A = rand(n, n)
27+
rng = MersenneTwister(123)
28+
global A = rand(rng, n, n)
2829
bt = @belapsed LinearAlgebra.lu!(B) setup=(B = copy(A))
2930
push!(bas_mflops, luflop(n)/bt/1e9)
3031

@@ -44,14 +45,14 @@ end
4445
using DataFrames, VegaLite
4546
blaslib = BLAS.vendor() === :mkl ? :MKL : :OpenBLAS
4647
df = DataFrame(Size = ns,
47-
RecursiveFactorization = rec_mflops,
48-
RecursiveFactorization4 = rec4_mflops,
49-
RecursiveFactorization800 = rec800_mflops,
5048
Reference = ref_mflops)
5149
setproperty!(df, blaslib, bas_mflops)
52-
df = stack(df, [:RecursiveFactorization,
53-
:RecursiveFactorization4,
54-
:RecursiveFactorization800,
50+
setproperty!(df, Symbol("RecursiveFactorization with default recursion threshold"), rec_mflops)
51+
setproperty!(df, Symbol("RecursiveFactorization fully recursive"), rec4_mflops)
52+
setproperty!(df, Symbol("RecursiveFactorization fully iterative"), rec800_mflops)
53+
df = stack(df, [Symbol("RecursiveFactorization with default recursion threshold"),
54+
Symbol("RecursiveFactorization fully recursive"),
55+
Symbol("RecursiveFactorization fully iterative"),
5556
blaslib,
5657
:Reference], variable_name = :Library, value_name = :GFLOPS)
5758
plt = df |> @vlplot(

src/lu.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ function lu!(A, pivot::Union{Val{false}, Val{true}} = Val(true); check=true, kwa
1616
return F
1717
end
1818

19-
# Use a function here to make sure it gets optimized away
19+
const RECURSION_THRESHOLD = Ref(-1)
20+
2021
# AVX512 needs a smaller recursion limit
2122
function pick_threshold()
2223
blasvendor = BLAS.vendor()
24+
RECURSION_THRESHOLD[] >= 0 && return RECURSION_THRESHOLD[]
2325
if blasvendor === :openblas || blasvendor === :openblas64
24-
LoopVectorization.VectorizationBase.AVX512F ? 110 : 192
26+
LoopVectorization.VectorizationBase.AVX512F ? 110 : 72
2527
else
2628
LoopVectorization.VectorizationBase.AVX512F ? 48 : 72
2729
end

0 commit comments

Comments
 (0)