Skip to content

Commit 9e347ff

Browse files
committed
Added tests, removed some dead code.
1 parent 1f67453 commit 9e347ff

File tree

9 files changed

+46
-62
lines changed

9 files changed

+46
-62
lines changed

.appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ environment:
44
- julia_version: 1.4
55
- julia_version: nightly
66
platform:
7-
# - x86
7+
- x86
88
- x64
99
matrix:
1010
allow_failures:

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1515
[compat]
1616
DocStringExtensions = "0.8"
1717
OffsetArrays = "1"
18-
SIMDPirates = "0.7.20"
19-
SLEEFPirates = "0.4.4"
18+
SIMDPirates = "0.7.23"
19+
SLEEFPirates = "0.4.8"
2020
UnPack = "0"
21-
VectorizationBase = "0.11.2"
21+
VectorizationBase = "0.11.3"
2222
julia = "1.1"
2323

2424
[extras]

benchmark/driver.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ end
2121

2222
# sizes = 23:23
2323
sizes = 256:-1:2
24-
selfdot_bench = benchmark_selfdot(sizes)
2524

2625
AmulB_bench = benchmark_AmulB(sizes)
2726
AmulBt_bench = benchmark_AmulBt(sizes)
28-
AtmulB_bench = benchmark_AtmulB(sizes)
2927
AtmulBt_bench = benchmark_AtmulBt(sizes)
28+
AtmulB_bench = benchmark_AtmulB(sizes)
29+
3030
Amulvb_bench = benchmark_Amulvb(sizes)
3131
Atmulvb_bench = benchmark_Atmulvb(sizes)
3232

@@ -36,14 +36,15 @@ filter2d_unrolled_bench = benchmark_filter2dunrolled(sizes)#512:-1:2)
3636

3737
dot3_bench = benchmark_dot3(sizes)
3838
dot_bench = benchmark_dot(sizes)
39+
selfdot_bench = benchmark_selfdot(sizes)
3940
sse_bench = benchmark_sse(sizes)
4041
aplusBc_bench = benchmark_aplusBc(sizes)
4142
AplusAt_bench = benchmark_AplusAt(sizes)
4243
vexp_bench = benchmark_exp(sizes)
4344
randomaccess_bench = benchmark_random_access(sizes)
4445
logdettriangle_bench = benchmark_logdettriangle(sizes)
4546

46-
v = 1
47+
const v = 1
4748
using Cairo, Fontconfig
4849
const PICTURES = joinpath(pkgdir(LoopVectorization), "docs", "src", "assets")
4950
function saveplot(f, br)

src/condense_loopset.jl

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -195,13 +195,6 @@ function add_external_functions!(q::Expr, ls::LoopSet)
195195
end
196196
end
197197

198-
@inline unwrap_array(A) = A
199-
@inline unwrap_array(A::Union{SubArray,Transpose,Adjoint}) = parent(A)
200-
@inline array_wrapper(A) = nothing
201-
@inline array_wrapper(A::Transpose) = Transpose
202-
@inline array_wrapper(A::Adjoint) = Adjoint
203-
@inline array_wrapper(A::SubArray) = A.indices
204-
205198
# Try to condense in type stable manner
206199
function generate_call(ls::LoopSet, inline_unroll::NTuple{3,Int8}, debug::Bool = false)
207200
operation_descriptions = Expr(:curly, :Tuple)
@@ -235,16 +228,6 @@ function generate_call(ls::LoopSet, inline_unroll::NTuple{3,Int8}, debug::Bool =
235228
add_external_functions!(q, ls)
236229
q
237230
end
238-
concat_vals() = Val{()}()
239-
# @generated concat_vals(::Val{N}) where {N} = Val{(N,)}()
240-
# @generated concat_vals(::Val{M}, ::Val{N}) where {M, N} = Val{(M,N)}()
241-
@generated function concat_vals(args...)
242-
tup = Expr(:tuple)
243-
for n in eachindex(args)
244-
push!(tup.args, args[n].parameters[1])
245-
end
246-
Expr(:call, Expr(:curly, :Val, tup))
247-
end
248231

249232

250233
"""

src/lowering.jl

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -146,45 +146,42 @@ function lower_block(
146146
blockq
147147
end
148148

149-
function lower_llvm_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, loop::Loop)
150-
loopsym = names(ls)[n]
151-
loop = getloop(ls, loopsym)
152-
# loopsym = mangletiledsym(loopsym, us, n)
153-
nisvectorized = false#isvectorized(us, n)
154-
sl = startloop(loop, nisvectorized, loopsym)
155-
# tc = terminatecondition(loop, us, n, loopsym, inclmask, 1)
156-
looprange = if loop.startexact
157-
if loop.stopexact
158-
Expr(:(=), loopsym, Expr(:call, :(:), loop.starthint, loop.stophint))
159-
else
160-
Expr(:(=), loopsym, Expr(:call, :(:), loop.starthint, loop.stopsym))
161-
end
162-
elseif loop.stopexact
163-
Expr(:(=), loopsym, Expr(:call, :(:), loop.startsym, loop.stophint))
164-
else
165-
Expr(:(=), loopsym, Expr(:call, :(:), loop.startsym, loop.stopsym))
166-
end
167-
body = lower_block(ls, us, n, false, 1)
168-
push!(body.args, Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 4)))
169-
# q = Expr( :block, sl, Expr(:while, tc, body))
170-
q = Expr(:for, looprange, body)
171-
# if nisvectorized
172-
# tc = terminatecondition(loop, us, n, loopsym, true, 1)
173-
# body = lower_block(ls, us, n, true, 1)
174-
# push!(q.args, Expr(:if, tc, body))
175-
# end
176-
q
177-
end
178-
# tiledsym(s::Symbol) = Symbol("##outer##", s, "##outer##")
179-
# mangletiledsym(s::Symbol, us::UnrollSpecification, n::Int) = isunrolled2(us, n) ? tiledsym(s) : s
149+
# function lower_llvm_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, loop::Loop)
150+
# loopsym = names(ls)[n]
151+
# loop = getloop(ls, loopsym)
152+
# # loopsym = mangletiledsym(loopsym, us, n)
153+
# nisvectorized = false#isvectorized(us, n)
154+
# sl = startloop(loop, nisvectorized, loopsym)
155+
# # tc = terminatecondition(loop, us, n, loopsym, inclmask, 1)
156+
# looprange = if loop.startexact
157+
# if loop.stopexact
158+
# Expr(:(=), loopsym, Expr(:call, :(:), loop.starthint, loop.stophint))
159+
# else
160+
# Expr(:(=), loopsym, Expr(:call, :(:), loop.starthint, loop.stopsym))
161+
# end
162+
# elseif loop.stopexact
163+
# Expr(:(=), loopsym, Expr(:call, :(:), loop.startsym, loop.stophint))
164+
# else
165+
# Expr(:(=), loopsym, Expr(:call, :(:), loop.startsym, loop.stopsym))
166+
# end
167+
# body = lower_block(ls, us, n, false, 1)
168+
# push!(body.args, Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 4)))
169+
# q = Expr(:for, looprange, body)
170+
# # if nisvectorized
171+
# # tc = terminatecondition(loop, us, n, loopsym, true, 1)
172+
# # body = lower_block(ls, us, n, true, 1)
173+
# # push!(q.args, Expr(:if, tc, body))
174+
# # end
175+
# q
176+
# end
180177
function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask::Bool)
181178
usorig = ls.unrollspecification[]
182179
nisvectorized = isvectorized(us, n)
183180
loopsym = names(ls)[n]
184181
loop = getloop(ls, loopsym)
185-
if VERSION v"1.4" && !nisvectorized && !inclmask && isone(n) && !ls.loadelimination[] && (us.u₁ > 1) && (usorig.u₁ == us.u₁) && (usorig.u₂ == us.u₂) && length(loop) > 7
186-
return lower_llvm_unroll(ls, us, n, loop)
187-
end
182+
# if VERSION ≥ v"1.4" && !nisvectorized && !inclmask && isone(n) && !ls.loadelimination[] && (us.u₁ > 1) && (usorig.u₁ == us.u₁) && (usorig.u₂ == us.u₂) && length(loop) > 7
183+
# return lower_llvm_unroll(ls, us, n, loop)
184+
# end
188185
sl = startloop(loop, nisvectorized, loopsym)
189186
tc = terminatecondition(loop, us, n, loopsym, inclmask, 1)
190187
body = lower_block(ls, us, n, inclmask, 1)

src/operations.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ function sameref(x::ArrayReference, y::ArrayReference)
2525
yinds = y.indices
2626
nrefs = length(xinds)
2727
nrefs == length(yinds) || return false
28-
xoffs = x.offsets; yoffs = y.offsets
2928
for n 1:nrefs
3029
xinds[n] === yinds[n] || return false
3130
end

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ Execute an `@avx` block. The block's code is represented via the arguments:
461461
- `vargs...` holds the encoded pointers of all the arrays (see `VectorizationBase`'s various pointer types).
462462
"""
463463
@generated function _avx_!(::Val{UNROLL}, ::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM}, lb::LB, vargs...) where {UNROLL, OPS, ARF, AM, LPSYM, LB}
464-
1 + 1 # Irrelevant line you can comment out/in to force recompilation...
464+
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
465465
ls = _avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LPSYM.parameters, LB.parameters, vargs)
466466
# @show avx_body(ls, UNROLL)
467467
avx_body(ls, UNROLL)

test/broadcast.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@
6666
@test D1 D2
6767
fill!(D2, -999999); D2 = @avx C .+ At' *ˡ B;
6868
@test D1 D2
69+
D1 .= view(C, 1, :) .+ A * B;
70+
fill!(D2, -999999);
71+
@avx D2 .= view(C1, 1, :) .+ A *ˡ B;
72+
@test D1 D2
6973

7074
if VERSION > v"1.2"
7175
b = rand(T,K); x = rand(R,N);

test/dot.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ using Test
3838
function myselfdot(a)
3939
s = zero(eltype(a))
4040
@inbounds @simd for i eachindex(a)
41-
s += a[i]*a[i]
41+
s += getindex(a, i) * a[i]
4242
end
4343
s
4444
end
@@ -142,7 +142,7 @@ using Test
142142
function complex_mul_with_index_offset_avx!(c_re, c_im, a_re, a_im, b_re, b_im)
143143
@_avx for i = 1:length(a_re) - 1
144144
c_re[i] = b_re[i] * a_re[i + 1] - b_im[i] * a_im[i + 1]
145-
c_im[i] = b_re[i] * a_im[i + 1] + b_im[i] * a_re[i + 1]
145+
setindex!(c_im, b_re[i] * a_im[i + 1] + b_im[i] * a_re[i + 1], i)
146146
end
147147
end
148148

0 commit comments

Comments
 (0)