Skip to content

Commit a3393e7

Browse files
committed
Misc updates, bumping minor version due to new assumption that loop iterables are not isempty.
1 parent 023cba0 commit a3393e7

32 files changed

+144
-51
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.7.8"
4+
version = "0.8.0"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -15,7 +15,7 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1515
[compat]
1616
DocStringExtensions = "0.8"
1717
OffsetArrays = "1"
18-
SIMDPirates = "0.7.23"
18+
SIMDPirates = "0.7.24"
1919
SLEEFPirates = "0.4.8"
2020
UnPack = "0"
2121
VectorizationBase = "0.11.3"

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ Pkg.add("LoopVectorization")
1414
```
1515
LoopVectorization is supported on Julia 1.1 and later. It is tested on Julia 1.1, 1.3, and nightly.
1616

17+
## Warning
18+
19+
Misusing LoopVectorization can have [serious consequences](http://catb.org/jargon/html/N/nasal-demons.html). Like `@inbounds`, misusing it can lead to segfaults and memory corruption.
20+
We expect that any time you use the `@avx` macro with a given block of code that you:
21+
1. Are not indexing an array out of bounds. `@avx` does not perform any bounds checking.
22+
2. Are not iterationg over an empty collection. Iterating over an empty loop such as `for i ∈ eachindex(Float64[])` is undefined behavior, and will likely result in the out of bounds memory accesses. Ensure that loops behave correctly.
23+
3. Are not relying on a specific execution order. `@avx` can and will re-order operations and loops inside its scope, so the correctness cannot depend on a particular order. You cannot implement `cumsum` with `@avx`.
24+
1725
## Usage
1826

1927
This library provides the `@avx` macro, which may be used to prefix a `for` loop or broadcast statement.

benchmark/benchmarkflops.jl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,10 @@ function matmul_bench!(br, C, A, B, i)
6161
@assert C Cblas "Fort builtin gemm wrong?"; fill!(C, NaN)
6262
br[10,i] = n_gflop / @belapsed ifgemm_builtin!($C, $A, $B)
6363
@assert C Cblas "ifort builtin gemm wrong?"; fill!(C, NaN)
64-
br[11,i] = n_gflop / @belapsed mul!($C, $A, $B)
64+
br[11,i] = n_gflop / @belapsed mul!($C, $A, $B);
65+
fill!(C, NaN)
6566
br[12,i] = n_gflop / @belapsed dgemmmkl!($C, $A, $B)
66-
@assert C Cblas "MKL JIT gemm wrong?"; fill!(C, NaN)
67+
@assert C Cblas "MKL JIT gemm wrong?"
6768
# br[12,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
6869
end
6970
function A_mul_B_bench!(br, s, i)
@@ -222,7 +223,7 @@ function gemv_bench!(br, x, A, y, i)
222223
br[10,i] = n_gflop / @belapsed ifgemv_builtin!($x, $A, $y)
223224
@assert x xblas "ifort wrong?"; fill!(x, NaN);
224225
br[11,i] = n_gflop / @belapsed mul!($x, $A, $y)
225-
br[11,i] = n_gflop / @belapsed dgemvmkl!($x, $A, $y)
226+
br[12,i] = n_gflop / @belapsed dgemvmkl!($x, $A, $y)
226227
@assert x xblas "gemvmkl wrong?"; fill!(x, NaN);
227228
end
228229
function A_mul_vb_bench!(br, s, i)
@@ -455,7 +456,7 @@ function logdettriangle_bench!(br, s, i)
455456
br[7,i] = n_gflop / @belapsed logdet($U)
456457
end
457458
function benchmark_logdettriangle(sizes)
458-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "Julia-builtin"]
459+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "LinearAlgebra"]
459460
br = BenchmarkResult(tests, sizes)
460461
sm = br.sizedresults.results
461462
pmap(is -> logdettriangle_bench!(sm, is[2], is[1]), enumerate(sizes))

benchmark/looptests.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ function jgemm!(𝐂, 𝐀ᵀ::Adjoint, 𝐁ᵀ::Adjoint)
6161
end
6262
end
6363
function gemmavx!(𝐂, 𝐀, 𝐁)
64-
@avx for m 1:size(𝐀,1), n 1:size(𝐁,2)
64+
@avx for m axes(𝐀,1), n axes(𝐁,2)
6565
𝐂ₘₙ = zero(eltype(𝐂))
66-
for k 1:size(𝐀,2)
66+
for k axes(𝐀,2)
6767
𝐂ₘₙ += 𝐀[m,k] * 𝐁[k,n]
6868
end
6969
𝐂[m,n] = 𝐂ₘₙ

benchmark/plotbenchmarks.jl

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,43 @@ function Base.show(io::IO, br::BenchmarkResult)
1111
end
1212

1313

14-
using Colors, Gadfly
15-
const COLORS = distinguishable_colors(21, [RGB(1,1,1), RGB(0,0,0)])
16-
const COLOR_MAP = Dict{String,RGB{Colors.N0f8}}()
14+
using Colors, ColorSchemes, Gadfly
15+
const COLORS = [RGB(0.0,0.0,0.0),RGB(1.0,0.0,0.0)]
16+
# const COLORS = [RGB(0.0,0.0,0.0),RGB(0.0,1.0,0.0)]
17+
# const COLORS = distinguishable_colors(14, pushfirst!(get(ColorSchemes.Paired_12, (0.5:11.5) ./ 12), RGB(0.0,0.0,0.0)))
18+
for i 1:12 # 11 is number of tested libs - 2
19+
push!(COLORS, get(ColorSchemes.cyclic_mygbm_30_95_c78_n256_s25, i/12))
20+
# push!(COLORS, get(ColorSchemes.vikO, (i-0.5)/12))
21+
end
22+
# const COLOR_MAP = Dict{String,RGB{Float64}}()
23+
# const COLOR_MAP = Dict{String,RGB{Colors.N0f8}}()
24+
const COLOR_MAP64 = Dict{String,RGB{Float64}}()
1725
function getcolor(s::String)
18-
get!(COLOR_MAP, s) do
19-
COLORS[length(COLOR_MAP) + 2]
26+
get!(COLOR_MAP64, s) do
27+
COLORS[length(COLOR_MAP64) + 1]
2028
end
2129
end
22-
30+
replace_and(str) = replace(str, '&' => "with")
2331

2432
function Gadfly.plot(br::BenchmarkResult)
2533
res = br.sizedresults.results
2634
sizes = br.sizedresults.sizes
2735
# sizes = Vector{eltype(brsizes)}(undef, length(res))
28-
tests = @view(br.tests[2:end])
29-
ntests = length(tests)
36+
tests = replace_and.(@view(br.tests[2:end]))
3037
colors = getcolor.(tests)
3138

3239
xt = 0:20:260
3340
maxres = maximum(res)
3441
maxtick = 10round(Int, 0.1maxres)
3542
yt = if iszero(maxtick)
3643
maxtick = 10round(0.1maxres)
37-
range(0, maxtick, length = 20)
38-
elseif maxtick < 50
39-
0:5:maxtick
40-
elseif maxtick < 20
41-
0:2:maxtick
44+
range(0, maxres, length = 20)
4245
elseif maxtick < 10
4346
0:1:maxtick
47+
elseif maxtick < 20
48+
0:2:maxtick
49+
elseif maxtick < 50
50+
0:5:maxtick
4451
else
4552
0:10:maxtick
4653
end

docs/src/assets/bench_AmulB_v1.png

-6.46 KB
Loading

docs/src/assets/bench_AmulBt_v1.png

-12.7 KB
Loading

docs/src/assets/bench_Amulvb_v1.png

35.8 KB
Loading

docs/src/assets/bench_AplusAt_v1.png

73 KB
Loading

docs/src/assets/bench_AtmulB_v1.png

-8.64 KB
Loading

0 commit comments

Comments
 (0)