Skip to content

Commit 7cba99f

Browse files
committed
Some tweaks.
1 parent fffb6e5 commit 7cba99f

29 files changed

+133
-82
lines changed

benchmark/driver.jl

Lines changed: 22 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -22,34 +22,32 @@ end
2222
# sizes = 23:23
2323
sizes = 256:-1:2
2424

25-
AmulB_bench = benchmark_AmulB(sizes)
26-
AmulBt_bench = benchmark_AmulBt(sizes)
27-
AtmulBt_bench = benchmark_AtmulBt(sizes)
28-
AtmulB_bench = benchmark_AtmulB(sizes)
29-
30-
Amulvb_bench = benchmark_Amulvb(sizes)
31-
Atmulvb_bench = benchmark_Atmulvb(sizes)
32-
33-
filter2d_dynamic_bench = benchmark_filter2ddynamic(sizes)#512:-1:2)
34-
filter2d_3x3_bench = benchmark_filter2d3x3(sizes)#512:-1:2)
35-
filter2d_unrolled_bench = benchmark_filter2dunrolled(sizes)#512:-1:2)
36-
37-
dot3_bench = benchmark_dot3(sizes)
38-
dot_bench = benchmark_dot(sizes)
39-
selfdot_bench = benchmark_selfdot(sizes)
40-
sse_bench = benchmark_sse(sizes)
41-
aplusBc_bench = benchmark_aplusBc(sizes)
42-
AplusAt_bench = benchmark_AplusAt(sizes)
43-
vexp_bench = benchmark_exp(sizes)
44-
randomaccess_bench = benchmark_random_access(sizes)
45-
logdettriangle_bench = benchmark_logdettriangle(sizes)
25+
@show AmulB_bench = benchmark_AmulB(sizes);
26+
@show AmulBt_bench = benchmark_AmulBt(sizes);
27+
@show AtmulBt_bench = benchmark_AtmulBt(sizes);
28+
@show AtmulB_bench = benchmark_AtmulB(sizes);
29+
30+
@show Amulvb_bench = benchmark_Amulvb(sizes);
31+
@show Atmulvb_bench = benchmark_Atmulvb(sizes);
32+
33+
@show filter2d_dynamic_bench = benchmark_filter2ddynamic(sizes);
34+
@show filter2d_3x3_bench = benchmark_filter2d3x3(sizes);
35+
@show filter2d_unrolled_bench = benchmark_filter2dunrolled(sizes);
36+
37+
@show dot3_bench = benchmark_dot3(sizes);
38+
@show dot_bench = benchmark_dot(sizes);
39+
@show selfdot_bench = benchmark_selfdot(sizes);
40+
@show sse_bench = benchmark_sse(sizes);
41+
@show aplusBc_bench = benchmark_aplusBc(sizes);
42+
@show AplusAt_bench = benchmark_AplusAt(sizes);
43+
@show vexp_bench = benchmark_exp(sizes);
44+
@show randomaccess_bench = benchmark_random_access(sizes);
45+
@show logdettriangle_bench = benchmark_logdettriangle(sizes);
4646

4747
const v = 1
4848
using Cairo, Fontconfig
4949
const PICTURES = joinpath(pkgdir(LoopVectorization), "docs", "src", "assets")
50-
function saveplot(f, br)
51-
draw(PNG(joinpath(PICTURES, f * "$v.png"), 12inch, 8inch), plot(br))
52-
end
50+
saveplot(f, br) = draw(PNG(joinpath(PICTURES, f * "$v.png"), 12inch, 8inch), plot(br))
5351

5452
saveplot("bench_filter2d_dynamic_v", filter2d_dynamic_bench);
5553
saveplot("bench_filter2d_3x3_v", filter2d_3x3_bench);
@@ -70,19 +68,3 @@ saveplot("bench_AtmulBt_v", AtmulBt_bench);
7068
saveplot("bench_Amulvb_v", Amulvb_bench);
7169
saveplot("bench_Atmulvb_v", Atmulvb_bench);
7270

73-
74-
75-
76-
# plot(gemm_bench)
77-
# plot(AtmulB_bench)
78-
# plot(dot_bench)
79-
# plot(selfdot_bench)
80-
# plot(gemv_bench)
81-
# plot(dot3_bench)
82-
# plot(sse_bench)
83-
# plot(vexp_bench)
84-
# plot(aplusBc_bench)
85-
# plot(AplusAt_bench)
86-
87-
88-

benchmark/looptests.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
using LoopVectorization, LinearAlgebra, OffsetArrays
22
BLAS.set_num_threads(1)
33

4+
using LoopVectorization.VectorizationBase: StaticUnitRange
45
struct SizedOffsetMatrix{T,LR,UR,LC,RC} <: DenseMatrix{T}
56
data::Matrix{T}
67
end
7-
using LoopVectorization.VectorizationBase: StaticUnitRange
88
Base.axes(::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR,UR,LC,UC} = (StaticUnitRange{LR,UR}(),StaticUnitRange{LC,UC}())
9+
Base.parent(A::SizedOffsetMatrix) = A.data
910
@generated function LoopVectorization.stridedpointer(A::SizedOffsetMatrix{T,LR,UR,LC,RC}) where {T,LR,UR,LC,RC}
1011
quote
1112
$(Expr(:meta,:inline))
1213
LoopVectorization.OffsetStridedPointer(
13-
LoopVectorization.StaticStridedPointer{$T,Tuple{1,$(UR-LR+1)}}(pointer(A.data)),
14-
($(LR-2), $(LC-2))
14+
LoopVectorization.StaticStridedPointer{$T,Tuple{1,$(UR-LR+1)}}(pointer(parent(A))),
15+
($(LR-1), $(LC-1))
1516
)
1617
end
1718
end
19+
Base.getindex(A::SizedOffsetMatrix, i, j) = LoopVectorization.vload(LoopVectorization.stridedpointer(A), (i-1,j-1))
20+
Base.axes(::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR,UR,LC,UC} = (StaticUnitRange{LR,UR}(),StaticUnitRange{LC,UC}())
1821
Base.size(A::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR,UR,LC,UC} = (1 + UR-LR, 1 + UC-LC)
19-
Base.getindex(A::SizedOffsetMatrix, i, j) = LoopVectorization.vload(LoopVectorization.stridedpointer(A), (i,j)) # only needed to print
2022
Base.unsafe_convert(::Type{Ptr{Float64}}, A::SizedOffsetMatrix) = Base.unsafe_convert(Ptr{Float64}, A.data)
2123

2224

docs/src/assets/bench_AmulB_v1.png

21.4 KB
Loading

docs/src/assets/bench_AmulBt_v1.png

27 KB
Loading

docs/src/assets/bench_Amulvb_v1.png

-2.07 KB
Loading

docs/src/assets/bench_AplusAt_v1.png

-151 KB
Loading

docs/src/assets/bench_AtmulB_v1.png

33.3 KB
Loading

docs/src/assets/bench_AtmulBt_v1.png

15.7 KB
Loading

docs/src/assets/bench_Atmulvb_v1.png

-10.5 KB
Loading

docs/src/assets/bench_aplusBc_v1.png

5.39 KB
Loading

0 commit comments

Comments
 (0)