1
+ using BenchmarkTools, Random
2
+ using LinearAlgebra, RecursiveFactorization, VectorizationBase
3
+ nc = min (Int (VectorizationBase. num_cores ()), Threads. nthreads ())
4
+ BLAS. set_num_threads (nc)
5
+ BenchmarkTools. DEFAULT_PARAMETERS. seconds = 0.5
6
+
7
+ function luflop (m, n = m; innerflop = 2 )
8
+ sum (1 : min (m, n)) do k
9
+ invflop = 1
10
+ scaleflop = isempty ((k + 1 ): m) ? 0 : sum ((k + 1 ): m)
11
+ updateflop = isempty ((k + 1 ): n) ? 0 :
12
+ sum ((k + 1 ): n) do j
13
+ isempty ((k + 1 ): m) ? 0 : sum ((k + 1 ): m) do i
14
+ innerflop
15
+ end
16
+ end
17
+ invflop + scaleflop + updateflop
18
+ end
19
+ end
20
+
21
+ bas_mflops = Float64[]
22
+ rec_mflops = Float64[]
23
+ rec4_mflops = Float64[]
24
+ rec800_mflops = Float64[]
25
+ ref_mflops = Float64[]
26
+ ns = 4 : 8 : 500
27
+ for n in ns
28
+ @info " $n × $n "
29
+ rng = MersenneTwister (123 )
30
+ global A = rand (rng, n, n)
31
+ bt = @belapsed LinearAlgebra. lu! (B) setup= (B = copy (A))
32
+ push! (bas_mflops, luflop (n) / bt / 1e9 )
33
+
34
+ rt = @belapsed RecursiveFactorization. lu! (B) setup= (B = copy (A))
35
+ push! (rec_mflops, luflop (n) / rt / 1e9 )
36
+
37
+ rt4 = @belapsed RecursiveFactorization. lu! (B; threshold = 4 ) setup= (B = copy (A))
38
+ push! (rec4_mflops, luflop (n) / rt4 / 1e9 )
39
+
40
+ rt800 = @belapsed RecursiveFactorization. lu! (B; threshold = 800 ) setup= (B = copy (A))
41
+ push! (rec800_mflops, luflop (n) / rt800 / 1e9 )
42
+
43
+ ref = @belapsed LinearAlgebra. generic_lufact! (B) setup= (B = copy (A))
44
+ push! (ref_mflops, luflop (n) / ref / 1e9 )
45
+ end
46
+
47
+ using DataFrames, VegaLite
48
+ blaslib = if VERSION ≥ v " 1.7.0-beta2"
49
+ config = BLAS. get_config (). loaded_libs
50
+ occursin (" mkl_rt" , config[1 ]. libname) ? :MKL : :OpenBLAS
51
+ else
52
+ BLAS. vendor () === :mkl ? :MKL : :OpenBLAS
53
+ end
54
+ df = DataFrame (Size = ns,
55
+ Reference = ref_mflops)
56
+ setproperty! (df, blaslib, bas_mflops)
57
+ setproperty! (df, Symbol (" RF with default threshold" ), rec_mflops)
58
+ setproperty! (df, Symbol (" RF fully recursive" ), rec4_mflops)
59
+ setproperty! (df, Symbol (" RF fully iterative" ), rec800_mflops)
60
+ df = stack (df,
61
+ [Symbol (" RF with default threshold" ),
62
+ Symbol (" RF fully recursive" ),
63
+ Symbol (" RF fully iterative" ),
64
+ blaslib,
65
+ :Reference ], variable_name = :Library , value_name = :GFLOPS )
66
+ plt = df |> @vlplot (:line , color= {:Library , scale = {scheme = " category10" }},
67
+ x= {:Size }, y= {:GFLOPS },
68
+ width= 1000 , height= 600 )
69
+ save (joinpath (homedir (), " Pictures" ,
70
+ " lu_float64_$(VERSION ) _$(Sys. CPU_NAME) _$(nc) cores_$blaslib .png" ), plt)
71
+
72
+ #=
73
+ using Plot
74
+ plt = plot(ns, bas_mflops, legend=:bottomright, lab="OpenBLAS", title="LU Factorization Benchmark", marker=:auto, dpi=300)
75
+ plot!(plt, ns, rec_mflops, lab="RecursiveFactorization", marker=:auto)
76
+ plot!(plt, ns, ref_mflops, lab="Reference", marker=:auto)
77
+ xaxis!(plt, "size (N x N)")
78
+ yaxis!(plt, "GFLOPS")
79
+ savefig("lubench.png")
80
+ savefig("lubench.pdf")
81
+ =#
0 commit comments