Skip to content

Commit 649817b

Browse files
committed
Fix second example from #295
1 parent e620a8c commit 649817b

File tree

4 files changed

+76
-7
lines changed

4 files changed

+76
-7
lines changed

β€ŽProject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.48"
4+
version = "0.12.49"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

β€Žsrc/codegen/lower_compute.jl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,21 +176,19 @@ function ifelselastexpr(hasf::Bool, M::Int, vargtypes, K::Int, S::Int, maskearly
176176
q
177177
end
178178
@generated function ifelselast(f::F, m::AbstractMask{W}, ::StaticInt{M}, ::StaticInt{S}, vargs::Vararg{Any,K}) where {F,W,K,M,S}
179-
1+1
180179
ifelselastexpr(true, M, vargs, K, S, false)
181180
end
182181
@generated function ifelselast(m::AbstractMask{W}, ::StaticInt{M}, ::StaticInt{S}, varg_1::V1, varg_2::V2) where {W,V1,V2,M,S}
183-
1+1
184182
ifelselastexpr(false, M, (V1,V2), 2, S, false)
185183
end
186184
@generated function ifelsepartial(f::F, m::AbstractMask{W}, ::StaticInt{M}, ::StaticInt{S}, vargs::Vararg{Any,K}) where {F,W,K,M,S}
187-
1+1
188185
ifelselastexpr(true, M, vargs, K, S, true)
189186
end
190187
@generated function ifelsepartial(m::AbstractMask{W}, ::StaticInt{M}, ::StaticInt{S}, varg_1::V1, varg_2::V2) where {W,V1,V2,M,S}
191-
1+1
192188
ifelselastexpr(false, M, (V1,V2), 2, S, true)
193189
end
190+
# @inline ifelselast(f::F, m::AbstractMask{W}, ::StaticInt{M}, ::StaticInt{S}, vargs::Vararg{NativeTypes,K}) where {F,W,K,M,S} = f(vargs...)
191+
# @inline ifelsepartial(f::F, m::AbstractMask{W}, ::StaticInt{M}, ::StaticInt{S}, vargs::Vararg{NativeTypes,K}) where {F,W,K,M,S} = f(vargs...)
194192
@generated function subset_vec_unroll(vu::VecUnroll{N}, ::StaticInt{S}) where {N,S}
195193
(1 ≀ S ≀ N + 1) || throw(ArgumentError("`vu` isa `VecUnroll` of `$(N+1)` elements, but trying to subset $S of them."))
196194
t = Expr(:tuple)

β€Žsrc/modeling/determinestrategy.jl

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,26 @@ function choose_tile(ls::LoopSet, sld::Vector{Vector{Symbol}} = store_load_deps(
13181318
shouldinline = (looplengthprod(ls) < 4097.0) #|| any(op -> iscompute(op) && iszero(length(loopdependencies(op))), operations(ls))
13191319
best_order, bestu₁, bestuβ‚‚, best_vec, u₁, uβ‚‚, lowest_cost, shouldinline
13201320
end
1321+
function mismatchedstorereductions(ls::LoopSet)
1322+
reduceddeps = Vector{Symbol}[]
1323+
nreduceddeps = 0
1324+
for op ∈ operations(ls)
1325+
isstore(op) || continue
1326+
rd = reduceddependencies(first(parents(op)))
1327+
if nreduceddeps β‰  0
1328+
length(rd) == nreduceddeps || return true
1329+
else
1330+
nreduceddeps = length(rd)
1331+
end
1332+
for rdo ∈ reduceddeps
1333+
for s ∈ rdo
1334+
s ∈ rd || return true
1335+
end
1336+
end
1337+
push!(reduceddeps, rd)
1338+
end
1339+
false
1340+
end
13211341
# Last in order is the inner most loop
13221342
function choose_order_cost(ls::LoopSet)
13231343
resize!(ls.loop_order, length(ls.loopsymbols))
@@ -1331,15 +1351,16 @@ function choose_order_cost(ls::LoopSet)
13311351
tc = Inf
13321352
end
13331353
uorder, uvec, uc = choose_unroll_order(ls, tc, sld)
1354+
mismatched = mismatchedstorereductions(ls)
13341355
if num_loops(ls) > 1 && tc ≀ uc
13351356
@assert ls.loop_order.bestorder === torder
13361357
# copyto!(ls.loop_order.bestorder, torder)
1337-
return torder, tunroll, ttile, tvec, tU, tT, tc, shouldinline
1358+
return torder, tunroll, ttile, tvec, tU, tT, Core.ifelse(mismatched, Inf, tc), shouldinline
13381359
# return torder, tvec, 4, 4#5, 5
13391360
else
13401361
copyto!(ls.loop_order.bestorder, uorder)
13411362
UF, uunroll = determine_unroll_factor(ls, uorder, uvec)
1342-
return uorder, uunroll, Symbol("##undefined##"), uvec, UF, -1, uc, true
1363+
return uorder, uunroll, Symbol("##undefined##"), uvec, UF, -1, Core.ifelse(mismatched, Inf, uc), true
13431364
end
13441365
end
13451366
function choose_order(ls::LoopSet)

β€Žtest/manyloopreductions.jl

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,43 @@
11

2+
function mismatchedreductions_noturbo!(π›₯r392, π›₯x923, π›₯β„›, β„›, r392, x923, 𝒢𝓍k=1:2, 𝒢𝓍n=1:3, 𝒢𝓍j=1:9, 𝒢𝓍m=1:9, 𝒢𝓍i=1:3)
3+
@inbounds @fastmath for k = 𝒢𝓍k
4+
for i = 𝒢𝓍i
5+
for m = 𝒢𝓍m
6+
for j = 𝒢𝓍j
7+
for n = 𝒢𝓍n
8+
ℰ𝓍1 = conj(x923[m, k, n])
9+
ℰ𝓍2 = π›₯β„›[n, j, m, i] * ℰ𝓍1
10+
ℰ𝓍3 = conj(r392[i, j, k])
11+
ℰ𝓍4 = π›₯β„›[n, j, m, i] * ℰ𝓍3
12+
π›₯r392[i, j, k] = π›₯r392[i, j, k] + ℰ𝓍2
13+
π›₯x923[m, k, n] = π›₯x923[m, k, n] + ℰ𝓍4
14+
end
15+
end
16+
end
17+
end
18+
end
19+
π›₯r392, π›₯x923
20+
end
21+
function mismatchedreductions!(π›₯r392, π›₯x923, π›₯β„›, β„›, r392, x923, 𝒢𝓍k=1:2, 𝒢𝓍n=1:3, 𝒢𝓍j=1:9, 𝒢𝓍m=1:9, 𝒢𝓍i=1:3)
22+
@turbo for k = 𝒢𝓍k
23+
for i = 𝒢𝓍i
24+
for m = 𝒢𝓍m
25+
for j = 𝒢𝓍j
26+
for n = 𝒢𝓍n
27+
ℰ𝓍1 = conj(x923[m, k, n])
28+
ℰ𝓍2 = π›₯β„›[n, j, m, i] * ℰ𝓍1
29+
ℰ𝓍3 = conj(r392[i, j, k])
30+
ℰ𝓍4 = π›₯β„›[n, j, m, i] * ℰ𝓍3
31+
π›₯r392[i, j, k] = π›₯r392[i, j, k] + ℰ𝓍2
32+
π›₯x923[m, k, n] = π›₯x923[m, k, n] + ℰ𝓍4
33+
end
34+
end
35+
end
36+
end
37+
end
38+
π›₯r392, π›₯x923
39+
end
40+
241
@testset "Many Loop Reductions" begin
342
A = rand((2:6)...);
443
N = ndims(A)
@@ -53,5 +92,16 @@
5392
end
5493
@test B β‰ˆ sum(A, dims = dims)
5594
end
95+
96+
r392 = rand(3,9,2);
97+
x923 = rand(9,2,3);
98+
K = rand(3,9,9,3);
99+
π›₯r392_1, π›₯x923_1, π›₯r392_2, π›₯x923_2, π›₯β„› = similar(r392), similar(x923), similar(r392), similar(x923), copy(K);
100+
π›₯r392_1 .= -1; π›₯x923_1 .= -1; π›₯r392_2 .= -1; π›₯x923_2 .= -1;
101+
102+
mismatchedreductions_noturbo!(π›₯r392_1, π›₯x923_1, π›₯β„›, K, r392, x923)
103+
@time mismatchedreductions!(π›₯r392_2, π›₯x923_2, π›₯β„›, K, r392, x923)
104+
@test π›₯r392_1 β‰ˆ π›₯r392_2
105+
@test π›₯x923_1 β‰ˆ π›₯x923_2
56106
end
57107

0 commit comments

Comments
Β (0)