Skip to content

Commit 0f87264

Browse files
committed
Actually benchmark g++ and clang++-compiled Eigen.
1 parent 32f9011 commit 0f87264

15 files changed

+20
-18
lines changed

benchmark/benchmarkflops.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,10 +284,10 @@ function benchmark_dot3(sizes)
284284
pmap(is -> dot3_bench!(sm, is[2], is[1]), enumerate(sizes))
285285
br
286286
end
287-
BLAS.set_num_threads(1)
287+
# BLAS.set_num_threads(1)
288288
function sse!(Xβ, y, X, β)
289-
mul!(copyto!(Xβ, y), X, β, 1.0, -1.0)
290-
dot(Xβ, Xβ)
289+
dgemvmkl!(copyto!(Xβ, y), X, β, 1.0, -1.0)
290+
jdot(Xβ, Xβ)
291291
end
292292
sse_totwotuple(s::NTuple{2}) = s
293293
sse_totwotuple(s::Integer) = ((3s) >> 1, s >> 1)
@@ -318,7 +318,7 @@ function sse_bench!(br, s, i)
318318
br[9,i] = n_gflop / @belapsed sse!($Xβ, $y, $X, $β)
319319
end
320320
function benchmark_sse(sizes)
321-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", BLAS.vendor() === :mkl ? "MKL" : "OpenBLAS"]
321+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "MKL"]
322322
br = BenchmarkResult(tests, sizes)
323323
sm = br.sizedresults.results
324324
pmap(is -> sse_bench!(sm, is[2], is[1]), enumerate(sizes))

benchmark/driver.jl

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,16 @@ AtmulB_bench = benchmark_AtmulB(sizes); println("A' * B benchmark results:"); pr
3434
Amulvb_bench = benchmark_Amulvb(sizes); println("A * b benchmark results:"); println(Amulvb_bench)
3535
Atmulvb_bench = benchmark_Atmulvb(sizes); println("A' * b benchmark results:"); println(Atmulvb_bench)
3636

37-
filter2d_dynamic_bench = benchmark_filter2ddynamic(sizes); println("Benchmark results for dynamically sized 3x3 convolution:"); println(filter2d_dynamic_bench)
38-
filter2d_3x3_bench = benchmark_filter2d3x3(sizes); println("Benchmark results for statically sized 3x3 convolution:"); println(filter2d_3x3_bench)
39-
filter2d_unrolled_bench = benchmark_filter2dunrolled(sizes); println("Benchmark results for unrolled 3x3 convolution:"); println(filter2d_unrolled_bench)
40-
4137
dot_bench = benchmark_dot(longsizes); println("a' * b benchmark results:"); println(dot_bench)
4238
selfdot_bench = benchmark_selfdot(longsizes); println("a' * a benchmark results:"); println(selfdot_bench)
4339
sse_bench = benchmark_sse(sizes); println("Benchmark resutls of summing squared error:"); println(sse_bench)
4440
aplusBc_bench = benchmark_aplusBc(sizes); println("Benchmark results of a .+ B .* c':"); println(aplusBc_bench)
4541
AplusAt_bench = benchmark_AplusAt(sizes); println("Benchmark results of A * A':"); println(AplusAt_bench)
42+
43+
filter2d_dynamic_bench = benchmark_filter2ddynamic(sizes); println("Benchmark results for dynamically sized 3x3 convolution:"); println(filter2d_dynamic_bench)
44+
filter2d_3x3_bench = benchmark_filter2d3x3(sizes); println("Benchmark results for statically sized 3x3 convolution:"); println(filter2d_3x3_bench)
45+
filter2d_unrolled_bench = benchmark_filter2dunrolled(sizes); println("Benchmark results for unrolled 3x3 convolution:"); println(filter2d_unrolled_bench)
46+
4647
vexp_bench = benchmark_exp(sizes); println("Benchmark results of exponentiating a vector:"); println(vexp_bench)
4748
randomaccess_bench = benchmark_random_access(sizes); println("Benchmark results from using a vector of indices:"); println(randomaccess_bench)
4849

@@ -51,24 +52,27 @@ using Cairo, Fontconfig
5152
const PICTURES = joinpath(pkgdir(LoopVectorization), "docs", "src", "assets")
5253
saveplot(f, br) = draw(PNG(joinpath(PICTURES, f * "$v.png"), 12inch, 8inch), plot(br))
5354

54-
saveplot("bench_logdettriangle_v", logdettriangle_bench);
55-
saveplot("bench_filter2d_dynamic_v", filter2d_dynamic_bench);
56-
saveplot("bench_filter2d_3x3_v", filter2d_3x3_bench);
57-
saveplot("bench_filter2d_unrolled_v", filter2d_unrolled_bench);
55+
saveplot("bench_dot3_v", dot3_bench);
5856
saveplot("bench_dot_v", dot_bench);
5957
saveplot("bench_selfdot_v", selfdot_bench);
60-
saveplot("bench_dot3_v", dot3_bench);
6158
saveplot("bench_sse_v", sse_bench);
6259
saveplot("bench_aplusBc_v", aplusBc_bench);
6360
saveplot("bench_AplusAt_v", AplusAt_bench);
64-
saveplot("bench_exp_v", vexp_bench);
65-
saveplot("bench_random_access_v", randomaccess_bench);
6661
saveplot("bench_AmulB_v", AmulB_bench);
6762
saveplot("bench_AmulBt_v", AmulBt_bench);
6863
saveplot("bench_AtmulB_v", AtmulB_bench);
6964
saveplot("bench_AtmulBt_v", AtmulBt_bench);
7065
saveplot("bench_Amulvb_v", Amulvb_bench);
7166
saveplot("bench_Atmulvb_v", Atmulvb_bench);
7267

68+
saveplot("bench_logdettriangle_v", logdettriangle_bench);
69+
saveplot("bench_filter2d_dynamic_v", filter2d_dynamic_bench);
70+
saveplot("bench_filter2d_3x3_v", filter2d_3x3_bench);
71+
saveplot("bench_filter2d_unrolled_v", filter2d_unrolled_bench);
72+
saveplot("bench_exp_v", vexp_bench);
73+
saveplot("bench_random_access_v", randomaccess_bench);
74+
75+
# @load "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench vexp_bench randomaccess_bench
76+
7377
@save "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot_bench selfdot_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench
7478

benchmark/loadsharedlibs.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ mkl_set_num_threads(N::Integer) = ccall(MKL_SET_NUM_THREADS, Cvoid, (Int32,), N
116116
mkl_set_num_threads(1)
117117
openblas_set_num_threads(N::Integer) = ccall(OPENBLAS_SET_NUM_THREADS, Cvoid, (Int64,), N)
118118
openblas_set_num_threads(1)
119-
function dgemvmkl!(y::AbstractVector{Float64}, A::AbstractMatrix{Float64}, x::AbstractVector{Float64})
119+
function dgemvmkl!(y::AbstractVector{Float64}, A::AbstractMatrix{Float64}, x::AbstractVector{Float64}, α = 1.0, β = 0.0)
120120
transA = istransposed(A)
121121
pA = parent(A)
122122
M, N = size(pA)
@@ -125,8 +125,6 @@ function dgemvmkl!(y::AbstractVector{Float64}, A::AbstractMatrix{Float64}, x::Ab
125125
ldA = stride(pA, 2) % Int32
126126
incx = LinearAlgebra.stride1(x) % Int32
127127
incy = LinearAlgebra.stride1(y) % Int32
128-
α = 1.0
129-
β = 0.0
130128
ccall(
131129
DGEMV_MKL, Cvoid,
132130
(Ref{UInt8}, Ref{Int32}, Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Float64}, Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Int32}),

docs/src/assets/bench_AmulB_v1.png

13.6 KB
Loading

docs/src/assets/bench_AmulBt_v1.png

14.5 KB
Loading

docs/src/assets/bench_Amulvb_v1.png

3.04 KB
Loading

docs/src/assets/bench_AplusAt_v1.png

13.8 KB
Loading

docs/src/assets/bench_AtmulB_v1.png

3.54 KB
Loading

docs/src/assets/bench_AtmulBt_v1.png

11.8 KB
Loading

docs/src/assets/bench_Atmulvb_v1.png

-38.8 KB
Loading

0 commit comments

Comments
 (0)