Skip to content

Commit 9cabbf6

Browse files
committed
Bug fixes
1 parent 2d81972 commit 9cabbf6

File tree

9 files changed

+115
-17
lines changed

9 files changed

+115
-17
lines changed

β€ŽProject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.5"
4+
version = "0.12.6"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -28,7 +28,7 @@ SLEEFPirates = "0.6.12"
2828
Static = "0.2"
2929
ThreadingUtilities = "0.4.1"
3030
UnPack = "1"
31-
VectorizationBase = "0.19.9"
31+
VectorizationBase = "0.19.14"
3232
julia = "1.5"
3333

3434
[extras]

β€Žsrc/LoopVectorization.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,17 @@ using VectorizationBase: register_size, register_count, cache_linesize, cache_si
1111
vfma_fast, vmuladd_fast, vdiv_fast, vadd_fast, vsub_fast, vmul_fast,
1212
relu, stridedpointer, StridedPointer, StridedBitPointer, AbstractStridedPointer, _vload, _vstore!,
1313
reduced_add, reduced_prod, reduce_to_add, reduce_to_prod, reduced_max, reduced_min, reduce_to_max, reduce_to_min,
14-
vsum, vprod, vmaximum, vminimum, unwrap, Unroll, VecUnroll,
14+
reduced_all, reduced_any, reduce_to_all, reduce_to_any,
15+
vsum, vprod, vmaximum, vminimum, vany, vall, unwrap, Unroll, VecUnroll,
1516
preserve_buffer, zero_vecunroll, vbroadcast_vecunroll, _vzero, _vbroadcast,
1617
contract_add, collapse_add,
1718
contract_mul, collapse_mul,
1819
contract_max, collapse_max,
1920
contract_min, collapse_min,
2021
contract_and, collapse_and,
2122
contract_or, collapse_or,
22-
num_threads, num_cores
23+
num_threads, num_cores,
24+
max_mask
2325

2426

2527
using IfElse: ifelse

β€Žsrc/codegen/loopstartstopmanager.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ function use_loop_induct_var!(ls::LoopSet, q::Expr, ar::ArrayReferenceMeta, alla
117117
# Not doing normal offset indexing
118118
uliv[i] = -findfirst(Base.Fix2(===,ind), looporder)::Int
119119
# push!(gespinds.args, Expr(:call, lv(:Zero)))
120-
# push!(gespinds.args, staticexpr(1))
121-
push!(gespinds.args, staticexpr(convert(Int, strides[i])))
120+
push!(gespinds.args, staticexpr(1))
121+
# push!(gespinds.args, staticexpr(convert(Int, strides[i])))
122122

123123
push!(offsetprecalc_descript.args, 0) # not doing offset indexing, so push 0
124124
else

β€Žsrc/codegen/lower_memory_common.jl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,13 @@ function mem_offset(op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Vect
135135
indvectorized = _mm & (ind === vloopsym)
136136
offset = offsets[n] % Int
137137
stride = strides[n] % Int
138+
ind_by_offset = inds_calc_by_ptr_offset[n] | (ind === CONSTANTZEROINDEX)
139+
if !ind_by_offset
140+
offset += (stride - 1)
141+
end
138142
@unpack vstep = td
139143
if loopedindex[n]
140-
addoffset!(ret, indvectorized, vstep, stride, ind, offset, inds_calc_by_ptr_offset[n] | (ind === CONSTANTZEROINDEX)) # 7 arg
144+
addoffset!(ret, indvectorized, vstep, stride, ind, offset, ind_by_offset) # 7 arg
141145
else
142146
offset -= 1
143147
newname, parent = symbolind(ind, op, td)
@@ -279,6 +283,9 @@ function mem_offset_u(op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Ve
279283
stride = convert(Int, strides[n])
280284
indvectorized = ind === vloopsym
281285
indvectorizedmm = _mm & indvectorized
286+
if !ind_by_offset
287+
offset += (stride - 1)
288+
end
282289
if ind === u₁loopsym
283290
addvectoroffset!(ret, indvectorizedmm, incr₁, u₁step, vstep, stride, ind, offset, ind_by_offset, indvectorized) # 9 arg
284291
elseif ind === uβ‚‚loopsym

β€Žsrc/codegen/lower_store.jl

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,12 @@ function lower_store!(
114114
# isunrolledβ‚‚ = isuβ‚‚unrolled(op)
115115
falseexpr = Expr(:call, lv(:False)); trueexpr = Expr(:call, lv(:True)); rs = staticexpr(reg_size(ls));
116116
opp = first(parents(op))
117-
if (opp.instruction.instr === reductfunc) && isone(length(parents(opp)))
117+
if ((opp.instruction.instr === reductfunc) || (opp.instruction.instr === :identity)) && isone(length(parents(opp)))
118118
opp = only(parents(opp))
119119
end
120120
# __uβ‚‚max = ls.unrollspecification[].uβ‚‚
121121
isu₁, isuβ‚‚ = isunrolled_sym(opp, u₁loopsym, uβ‚‚loopsym, vloopsym)#, __uβ‚‚max)
122+
# @show isu₁, isuβ‚‚, opp, u₁loopsym, uβ‚‚loopsym, vloopsym
122123
u = isu₁ ? u₁ : 1
123124
mvar = Symbol(variable_name(opp, ifelse(isuβ‚‚, suffix, -1)), '_', u)
124125
if all(op.ref.loopedindex)
@@ -133,14 +134,21 @@ function lower_store!(
133134
push!(q.args, storeexpr)
134135
elseif u₁ > 1
135136
mvard = Symbol(mvar, "##data##")
136-
push!(q.args, Expr(:(=), mvard, Expr(:call, lv(:data), mvar)))
137+
isu₁ && push!(q.args, Expr(:(=), mvard, Expr(:call, lv(:data), mvar)))
137138
for u ∈ 1:u₁
138139
mvaru = :(getfield($mvard, $u, false))
139140
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, u-1)
140-
storeexpr = if reductfunc === Symbol("")
141-
Expr(:call, lv(:_vstore!), vptr(op), mvaru, inds)
141+
# @show isu₁unrolled(opp), opp
142+
storeexpr = if isu₁
143+
if reductfunc === Symbol("")
144+
Expr(:call, lv(:_vstore!), vptr(op), mvaru, inds)
145+
else
146+
Expr(:call, lv(:_vstore!), lv(reductfunc), vptr(op), mvaru, inds)
147+
end
148+
elseif reductfunc === Symbol("")
149+
Expr(:call, lv(:_vstore!), vptr(op), mvar, inds)
142150
else
143-
Expr(:call, lv(:_vstore!), lv(reductfunc), vptr(op), mvaru, inds)
151+
Expr(:call, lv(:_vstore!), lv(reductfunc), vptr(op), mvar, inds)
144152
end
145153
domask = mask && (isvectorized(op) & ((u == u₁) | (vloopsym !== u₁loopsym)))
146154
add_memory_mask!(storeexpr, op, ua, domask)# & ((u == u₁) | isvectorized(op)))

β€Žsrc/condense_loopset.jl

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,14 +172,17 @@ function argmeta_and_consts_description(ls::LoopSet, arraysymbolinds)
172172
tuple_expr(ls.preamble_funcofeltypes)
173173
)
174174
end
175+
@inline vdata(v::Vec) = getfield(v, :data)
176+
@inline vdata(v::VecUnroll) = getfield(v, :data)
177+
@inline vdata(x) = x
175178

176179
function loopset_return_value(ls::LoopSet, ::Val{extract}) where {extract}
177180
@assert !iszero(length(ls.outer_reductions))
178181
if isone(length(ls.outer_reductions))
179182
op = getop(ls, ls.outer_reductions[1])
180183
if extract
181184
# if (isu₁unrolled(op) | isuβ‚‚unrolled(op))
182-
Expr(:call, :data, Symbol(mangledvar(op), "##onevec##"))
185+
Expr(:call, :vdata, Symbol(mangledvar(op), "##onevec##"))
183186
# else
184187
# Expr(:call, :data, mangledvar(op))
185188
# end
@@ -192,7 +195,7 @@ function loopset_return_value(ls::LoopSet, ::Val{extract}) where {extract}
192195
for or ∈ ls.outer_reductions
193196
op = ops[or]
194197
if extract
195-
push!(ret.args, Expr(:call, :data, Symbol(mangledvar(op), "##onevec##")))
198+
push!(ret.args, Expr(:call, :vdata, Symbol(mangledvar(op), "##onevec##")))
196199
else
197200
push!(ret.args, Symbol(mangledvar(ops[or]), "##onevec##"))
198201
end
@@ -421,6 +424,7 @@ make_crashy(q) = Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,
421424
@inline vecmemaybe(x::NativeTypes) = x
422425
@inline vecmemaybe(x::VectorizationBase._Vec) = Vec(x)
423426
@inline vecmemaybe(x::Tuple) = VectorizationBase.VecUnroll(x)
427+
@inline vecmemaybe(x::Mask) = x
424428

425429
function gc_preserve(call::Expr, preserve::Vector{Symbol})
426430
q = Expr(:gc_preserve, call)

β€Žsrc/modeling/operations.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,16 @@ function ArrayReference(array, indices)
3131
ArrayReference(array, indices, zeros(Int8, ninds), ones(Int8, ninds))
3232
end
3333
function sameref(x::ArrayReference, y::ArrayReference)
34-
(x.array === y.array) && (x.indices == y.indices)
34+
(x.array === y.array) && (x.indices == y.indices) && (x.strides == y.strides)
3535
end
3636
function Base.isequal(x::ArrayReference, y::ArrayReference)
3737
sameref(x, y) || return false
3838
xoffs = x.offsets; yoffs = y.offsets
39-
xmult = x.strides; ymult = y.strides
39+
# xmult = x.strides; ymult = y.strides
4040
length(xoffs) == length(yoffs) || return false
4141
for n ∈ eachindex(xoffs)
42-
((xoffs[n] == yoffs[n]) & (xmult[n] == ymult[n])) || return false
42+
# ((xoffs[n] == yoffs[n]) & (xmult[n] == ymult[n])) || return false
43+
(xoffs[n] == yoffs[n]) || return false
4344
end
4445
true
4546
end

β€Žtest/runtests.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ const START_TIME = time()
6060
@time include("gemm.jl")
6161

6262
@time include("threading.jl")
63+
64+
@time include("tullio.jl")
6365
end
6466

6567
const ELAPSED_MINUTES = (time() - START_TIME)/60

β€Žtest/tullio.jl

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
using LoopVectorization, Test
2+
# Tests for Tullio
3+
4+
@testset "Tullio Tests" begin
5+
A = (1:10) .^ 2; K = rand(10);
6+
7+
function act!(β„›::AbstractArray, A, 𝒢𝓍i = axes(A,1), 𝒢𝓍j = axes(β„›,2))
8+
@avx for j in 𝒢𝓍j
9+
for i in 𝒢𝓍i
10+
β„›[i, j] = A[i] / j
11+
end
12+
end
13+
β„›
14+
end
15+
function act_noavx!(β„›::AbstractArray, A, 𝒢𝓍i = axes(A,1), 𝒢𝓍j = axes(β„›,2))
16+
for j in 𝒢𝓍j
17+
for i in 𝒢𝓍i
18+
β„›[i, j] = A[i] / j
19+
end
20+
end
21+
β„›
22+
end
23+
@test act!(rand(10,10), A) β‰ˆ act_noavx!(rand(10,10), A)
24+
25+
D = similar(A, 10, 10) .= 999;
26+
27+
inds = [2,3,5,2];
28+
29+
function two!(β„›::AbstractArray, inds, A, 𝒢𝓍j = axes(β„›,2), 𝒢𝓍i = axes(inds,1))
30+
@avx for i = 𝒢𝓍i
31+
for j = 𝒢𝓍j
32+
β„›[inds[i], j] = A[j]
33+
end
34+
end
35+
β„›
36+
end
37+
function two_noavx!(β„›::AbstractArray, inds, A, 𝒢𝓍j = axes(β„›,2), 𝒢𝓍i = axes(inds,1))
38+
for i = 𝒢𝓍i
39+
for j = 𝒢𝓍j
40+
β„›[inds[i], j] = A[j]
41+
end
42+
end
43+
β„›
44+
end
45+
@test two!(copy(D), inds, A) == two!(copy(D), inds, A)
46+
47+
function three!(β„›::AbstractArray, A, 𝒢𝓍i = axes(β„›,1))
48+
@avx for i = 𝒢𝓍i
49+
β„›[i] = A[2i + 1] + A[i]
50+
end
51+
β„›
52+
end
53+
function three_noavx!(β„›::AbstractArray, A, 𝒢𝓍i = axes(β„›,1))
54+
for i = 𝒢𝓍i
55+
β„›[i] = A[2i + 1] + A[i]
56+
end
57+
β„›
58+
end
59+
@test three!(rand(4), A) == three_noavx!(rand(4), A)
60+
61+
function and(A, 𝒢𝓍i = axes(A,1))
62+
π’œπ’Έπ’Έ = true
63+
@avx for i = 𝒢𝓍i
64+
π’œπ’Έπ’Έ = π’œπ’Έπ’Έ & (A[i] > 0)
65+
end
66+
π’œπ’Έπ’Έ
67+
end
68+
@test and(A)
69+
A[3] = -1
70+
@test !and(A)
71+
72+
end
73+
74+

0 commit comments

Comments
Β (0)