Skip to content

Commit baf8f6a

Browse files
committed
Remove assets
1 parent c2d2a43 commit baf8f6a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+395
-472
lines changed

benchmark/Manifest.toml

Lines changed: 358 additions & 207 deletions
Large diffs are not rendered by default.

benchmark/Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
[deps]
2+
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
23
Cairo = "159f3aea-2a34-519c-b102-8c37f9878175"
34
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
45
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
@@ -12,5 +13,6 @@ MKL_jll = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
1213
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
1314
OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
1415
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
16+
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
1517
SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
1618
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"

benchmark/driver.jl

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,18 @@
22
# const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmarks")
33
# includet(joinpath(LOOPVECBENCHDIR, "driver.jl"))
44

5-
using Distributed, LoopVectorization, JLD2
6-
5+
using Distributed, LoopVectorization, JLD2, ProgressMeter
76
const LOOPVECBENCHDIR = joinpath(pkgdir(LoopVectorization), "benchmark")
87
include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
98
include(joinpath(LOOPVECBENCHDIR, "plotbenchmarks.jl"))
109

1110

12-
# nprocs_to_add() = (Sys.CPU_THREADS >> 1) - 1
13-
nprocs_to_add() = (Sys.CPU_THREADS >> 1)
11+
nprocs_to_add() = ((Sys.CPU_THREADS)::Int >> 1)
12+
# nprocs_to_add() = ((Sys.CPU_THREADS)::Int >> 1) - 1
1413
start_worker(wid) = remotecall(include, wid, joinpath(LOOPVECBENCHDIR, "setup_worker.jl"))
15-
function start_workers()
16-
addprocs(nprocs_to_add())
17-
foreach(wait, map(start_worker, workers()))
14+
function start_workers(nprocs=nprocs_to_add())
15+
addprocs(nprocs, exeflags="--project=$(Base.active_project())")
16+
foreach(wait, map(start_worker, workers()))
1817
end
1918
stop_workers() = rmprocs(workers())
2019

@@ -29,36 +28,36 @@ function blastests()
2928
end
3029
function benchmark_AmulB(sizes)
3130
tests = blastests()
32-
start_workers()
31+
start_workers(nprocs_to_add()>>1)
3332
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
34-
pmap(is -> A_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
33+
@showprogress pmap(is -> A_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
3534
br = BenchmarkResult(Matrix(sm), tests, sizes)
3635
stop_workers()
3736
br
3837
end
3938
function benchmark_AmulBt(sizes)
4039
tests = blastests()
41-
start_workers()
40+
start_workers(nprocs_to_add()>>1)
4241
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
43-
pmap(is -> A_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
42+
@showprogress pmap(is -> A_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
4443
br = BenchmarkResult(Matrix(sm), tests, sizes)
4544
stop_workers()
4645
br
4746
end
4847
function benchmark_AtmulB(sizes)
4948
tests = blastests()
50-
start_workers()
49+
start_workers(nprocs_to_add()>>1)
5150
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
52-
pmap(is -> At_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
51+
@showprogress pmap(is -> At_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
5352
br = BenchmarkResult(Matrix(sm), tests, sizes)
5453
stop_workers()
5554
br
5655
end
5756
function benchmark_AtmulBt(sizes)
5857
tests = blastests()
59-
start_workers()
58+
start_workers(nprocs_to_add()>>1)
6059
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
61-
pmap(is -> At_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
60+
@showprogress pmap(is -> At_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
6261
br = BenchmarkResult(Matrix(sm), tests, sizes)
6362
stop_workers()
6463
br
@@ -73,7 +72,7 @@ function benchmark_dot(sizes)
7372
tests = dot_tests()
7473
start_workers()
7574
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
76-
pmap(is -> dot_bench!(sm, is[2], is[1]), enumerate(sizes))
75+
@showprogress pmap(is -> dot_bench!(sm, is[2], is[1]), enumerate(sizes))
7776
br = BenchmarkResult(Matrix(sm), tests, sizes)
7877
stop_workers()
7978
br
@@ -82,7 +81,7 @@ function benchmark_selfdot(sizes)
8281
tests = dot_tests()
8382
start_workers()
8483
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
85-
pmap(is -> selfdot_bench!(sm, is[2], is[1]), enumerate(sizes))
84+
@showprogress pmap(is -> selfdot_bench!(sm, is[2], is[1]), enumerate(sizes))
8685
br = BenchmarkResult(Matrix(sm), tests, sizes)
8786
stop_workers()
8887
br
@@ -91,7 +90,7 @@ function benchmark_Amulvb(sizes)
9190
tests = blastests()
9291
start_workers()
9392
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
94-
pmap(is -> A_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
93+
@showprogress pmap(is -> A_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
9594
br = BenchmarkResult(Matrix(sm), tests, sizes)
9695
stop_workers()
9796
br
@@ -100,31 +99,31 @@ function benchmark_Atmulvb(sizes)
10099
tests = blastests()
101100
start_workers()
102101
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
103-
pmap(is -> At_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
102+
@showprogress pmap(is -> At_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
104103
br = BenchmarkResult(Matrix(sm), tests, sizes)
105104
stop_workers()
106105
br
107106
end
108107
function benchmark_dot3(sizes)
109108
tests = ["LoopVectorization", "Julia", "Clang", "GFortran"]
110109
INTEL_BENCH && push!(tests, "icc", "ifort")
111-
push!(test, "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra")
110+
push!(tests, "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra")
112111
start_workers()
113112
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
114-
pmap(is -> dot3_bench!(sm, is[2], is[1]), enumerate(sizes))
113+
@showprogress pmap(is -> dot3_bench!(sm, is[2], is[1]), enumerate(sizes))
115114
br = BenchmarkResult(Matrix(sm), tests, sizes)
116115
stop_workers()
117116
br
118117
end
119118
function benchmark_sse(sizes)
120119
tests = ["LoopVectorization", "Julia", "Clang", "GFortran"]
121120
INTEL_BENCH && push!(tests, "icc", "ifort")
122-
push!(test, "g++ & Eigen-3", "clang++ & Eigen-3", "MKL")
121+
push!(tests, "g++ & Eigen-3", "clang++ & Eigen-3")
123122
MKL_BENCH && push!(tests, "MKL")
124123

125124
start_workers()
126125
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
127-
pmap(is -> sse_bench!(sm, is[2], is[1]), enumerate(sizes))
126+
@showprogress pmap(is -> sse_bench!(sm, is[2], is[1]), enumerate(sizes))
128127
br = BenchmarkResult(Matrix(sm), tests, sizes)
129128
stop_workers()
130129
br
@@ -134,7 +133,7 @@ function benchmark_exp(sizes)
134133
INTEL_BENCH && push!(tests, "icc", "ifort")
135134
start_workers()
136135
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
137-
pmap(is -> exp_bench!(sm, is[2], is[1]), enumerate(sizes))
136+
@showprogress pmap(is -> exp_bench!(sm, is[2], is[1]), enumerate(sizes))
138137
br = BenchmarkResult(Matrix(sm), tests, sizes)
139138
stop_workers()
140139
br
@@ -145,7 +144,7 @@ function benchmark_aplusBc(sizes)
145144
push!(tests, "g++ & Eigen-3", "clang++ & Eigen-3")
146145
start_workers()
147146
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
148-
pmap(is -> aplusBc_bench!(sm, is[2], is[1]), enumerate(sizes))
147+
@showprogress pmap(is -> aplusBc_bench!(sm, is[2], is[1]), enumerate(sizes))
149148
br = BenchmarkResult(Matrix(sm), tests, sizes)
150149
stop_workers()
151150
br
@@ -157,7 +156,7 @@ function benchmark_AplusAt(sizes)
157156
INTEL_BENCH && push!(tests, "ifort-builtin")
158157
start_workers()
159158
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
160-
pmap(is -> AplusAt_bench!(sm, is[2], is[1]), enumerate(sizes))
159+
@showprogress pmap(is -> AplusAt_bench!(sm, is[2], is[1]), enumerate(sizes))
161160
br = BenchmarkResult(Matrix(sm), tests, sizes)
162161
stop_workers()
163162
br
@@ -167,7 +166,7 @@ function benchmark_random_access(sizes)
167166
INTEL_BENCH && push!(tests, "icc", "ifort")
168167
start_workers()
169168
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
170-
pmap(is -> randomaccess_bench!(sm, is[2], is[1]), enumerate(sizes))
169+
@showprogress pmap(is -> randomaccess_bench!(sm, is[2], is[1]), enumerate(sizes))
171170
br = BenchmarkResult(Matrix(sm), tests, sizes)
172171
stop_workers()
173172
br
@@ -179,7 +178,7 @@ function benchmark_logdettriangle(sizes)
179178
push!(tests, "LinearAlgebra")
180179
start_workers()
181180
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
182-
pmap(is -> logdettriangle_bench!(sm, is[2], is[1]), enumerate(sizes))
181+
@showprogress pmap(is -> logdettriangle_bench!(sm, is[2], is[1]), enumerate(sizes))
183182
br = BenchmarkResult(Matrix(sm), tests, sizes)
184183
stop_workers()
185184
br
@@ -189,7 +188,7 @@ function benchmark_filter2d(sizes, K)
189188
INTEL_BENCH && push!(tests, "icc", "ifort")
190189
start_workers()
191190
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
192-
pmap(is -> filter2d_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
191+
@showprogress pmap(is -> filter2d_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
193192
br = BenchmarkResult(Matrix(sm), tests, sizes)
194193
stop_workers()
195194
br
@@ -208,7 +207,7 @@ function benchmark_filter2dunrolled(sizes)
208207
start_workers()
209208
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
210209
K = SizedOffsetMatrix{Float64,-1,1,-1,1}(rand(3,3))
211-
pmap(is -> filter2dunrolled_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
210+
@showprogress pmap(is -> filter2dunrolled_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
212211
br = BenchmarkResult(Matrix(sm), tests, sizes)
213212
stop_workers()
214213
br
@@ -234,21 +233,22 @@ println("A' * b benchmark results:"); Atmulvb_bench = benchmark_Atmulvb(sizes);
234233
println("a' * b benchmark results:"); dot_bench = benchmark_dot(longsizes); println(dot_bench)
235234
println("a' * a benchmark results:"); selfdot_bench = benchmark_selfdot(longsizes); println(selfdot_bench)
236235

237-
println("Benchmark resutls of summing squared error:"); sse_bench = benchmark_sse(sizes); println(sse_bench)
238236
println("Benchmark results of a .+ B .* c':"); aplusBc_bench = benchmark_aplusBc(sizes); println(aplusBc_bench)
239237
println("Benchmark results of A .+ A':"); AplusAt_bench = benchmark_AplusAt(sizes); println(AplusAt_bench)
240238

241239
println("Benchmark results for dynamically sized 3x3 convolution:"); filter2d_dynamic_bench = benchmark_filter2ddynamic(sizes); println(filter2d_dynamic_bench)
242240
println("Benchmark results for statically sized 3x3 convolution:"); filter2d_3x3_bench = benchmark_filter2d3x3(sizes); println(filter2d_3x3_bench)
243241
println("Benchmark results for unrolled 3x3 convolution:"); filter2d_unrolled_bench = benchmark_filter2dunrolled(sizes); println(filter2d_unrolled_bench)
244242

243+
println("Benchmark resutls of summing squared error:"); sse_bench = benchmark_sse(sizes); println(sse_bench)
245244
println("Benchmark results of exponentiating a vector:"); vexp_bench = benchmark_exp(sizes); println(vexp_bench)
246245
println("Benchmark results from using a vector of indices:"); randomaccess_bench = benchmark_random_access(sizes); println(randomaccess_bench)
247246

248247
const v = 2
249-
using Cairo, Fontconfig
248+
# using Cairo, Fontconfig
250249
const PICTURES = joinpath(pkgdir(LoopVectorization), "docs", "src", "assets")
251-
saveplot(f, br) = draw(PNG(joinpath(PICTURES, f * "$v.png"), 12inch, 8inch), plot(br))
250+
# saveplot(f, br) = draw(PNG(joinpath(PICTURES, f * "$v.png"), 12inch, 8inch), plot(br))
251+
saveplot(f, br) = draw(SVG(joinpath(PICTURES, f * "$v.svg"), 12inch, 8inch), plot(br))
252252

253253
# If only rerunning a few, remove them from load.
254254
# @load "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot_bench selfdot_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench

benchmark/looptests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ function ArrayInterface.strides(A::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR
2020
(Static{1}(), (Static{UR}() - Static{LR}() + Static{1}()))
2121
end
2222
ArrayInterface.offsets(A::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR,UR,LC,UC} = (Static{LR}(), Static{LC}())
23+
ArrayInterface.parent_type(::Type{<:SizedOffsetMatrix{T}}) where {T} = Matrix{T}
2324
Base.getindex(A::SizedOffsetMatrix, i, j) = LoopVectorization.vload(LoopVectorization.stridedpointer(A), (i,j))
2425

2526

benchmark/setup_worker.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using LoopVectorization
1+
using LoopVectorization, ProgressMeter
22
const LOOPVECBENCHDIR = joinpath(pkgdir(LoopVectorization), "benchmark")
33
include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
44

docs/src/assets/bench_AmulB_v1.png

-516 KB
Binary file not shown.

docs/src/assets/bench_AmulB_v2.png

-501 KB
Binary file not shown.

docs/src/assets/bench_AmulBt_v1.png

-489 KB
Binary file not shown.

docs/src/assets/bench_AmulBt_v2.png

-486 KB
Binary file not shown.

docs/src/assets/bench_Amulvb_v1.png

-360 KB
Binary file not shown.

0 commit comments

Comments
 (0)