Skip to content

Commit 3a98d25

Browse files
committed
Don't consider loops statically known to have a single iteration as valid for threading.
1 parent 3a7dc65 commit 3a98d25

File tree

3 files changed

+56
-48
lines changed

3 files changed

+56
-48
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.31"
4+
version = "0.12.32"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/codegen/lower_threads.jl

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -644,26 +644,32 @@ function thread_two_loops_expr(
644644
end
645645

646646
function valid_thread_loops(ls::LoopSet)
647-
order, u₁loop, u₂loop, vectorized, u₁, u₂, c, shouldinline = choose_order_cost(ls)
648-
# NOTE: `names` are being placed in the opposite order here versus normal lowering!
649-
copyto!(names(ls), order); init_loop_map!(ls)
650-
u₁loop = getloop(ls, u₁loop)
651-
_u₂loop = getloopid_or_nothing(ls, u₂loop)
652-
u₂loop = _u₂loop === nothing ? u₁loop : getloop_from_id(ls, _u₂loop)
653-
ua = UnrollArgs(u₁loop, u₂loop, getloop(ls, vectorized), u₁, u₂, u₂)
654-
valid_thread_loop = fill(true, length(order))
655-
for op operations(ls)
656-
if isstore(op) && (length(reduceddependencies(op)) > 0)
657-
for reduceddep reduceddependencies(op)
658-
for (i,o) enumerate(order)
659-
if o === reduceddep
660-
valid_thread_loop[i] = false
661-
end
662-
end
663-
end
647+
order, u₁loop, u₂loop, vectorized, u₁, u₂, c, shouldinline = choose_order_cost(ls)
648+
# NOTE: `names` are being placed in the opposite order here versus normal lowering!
649+
copyto!(names(ls), order); init_loop_map!(ls)
650+
u₁loop = getloop(ls, u₁loop)
651+
_u₂loop = getloopid_or_nothing(ls, u₂loop)
652+
u₂loop = _u₂loop === nothing ? u₁loop : getloop_from_id(ls, _u₂loop)
653+
ua = UnrollArgs(u₁loop, u₂loop, getloop(ls, vectorized), u₁, u₂, u₂)
654+
valid_thread_loop = fill(true, length(order))
655+
for op operations(ls)
656+
if isstore(op) && (length(reduceddependencies(op)) > 0)
657+
for reduceddep reduceddependencies(op)
658+
for (i,o) enumerate(order)
659+
if o === reduceddep
660+
valid_thread_loop[i] = false
661+
end
664662
end
663+
end
665664
end
666-
valid_thread_loop, ua, c
665+
end
666+
for (i,o) enumerate(order)
667+
loop = getloop(ls, o)
668+
if isstaticloop(loop) & length(loop) 1
669+
valid_thread_loop[i] = false
670+
end
671+
end
672+
valid_thread_loop, ua, c
667673
end
668674
function avx_threads_expr(
669675
ls::LoopSet, UNROLL::Tuple{Bool,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt},

test/threading.jl

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -96,40 +96,42 @@ function convlayer_direct!(
9696
end
9797

9898
@testset "Threading" begin
99-
@show @__LINE__
100-
dcd = DenseConvDims{2,(5,5),3,6}()
101-
kern4 = rand(Float32, 5, 5, 3, 6);
102-
@time for M 17:50:267
103-
img = rand(Float32, M, M, 3, 100);
104-
outimage1 = Array{Float32}(undef, size(img,1)+1-size(kern4,1), size(img,2)+1-size(kern4,2), size(kern4,4), size(img,4));
105-
outimage2 = similar(outimage1);
99+
@show @__LINE__
100+
dcd = DenseConvDims{2,(5,5),3,6}()
101+
kern4 = rand(Float32, 5, 5, 3, 6);
102+
@time for M 17:50:267
103+
img = rand(Float32, M, M, 3, 100);
104+
outimage1 = Array{Float32}(undef, size(img,1)+1-size(kern4,1), size(img,2)+1-size(kern4,2), size(kern4,4), size(img,4));
105+
outimage2 = similar(outimage1);
106106

107-
convlayer!(outimage1, img, kern4, dcd);
108-
convlayer_direct!(outimage2, img, kern4, dcd);
109-
@test outimage1 outimage2
110-
end
111-
112-
@time for M 17:399
113-
# @show M
114-
K = M; N = M;
115-
A = rand(M,K); B = rand(K,N);
116-
@test dot(A,B) mydotavx(A,B)
107+
convlayer!(outimage1, img, kern4, dcd);
108+
convlayer_direct!(outimage2, img, kern4, dcd);
109+
@test outimage1 outimage2
110+
end
117111

118-
C1 = A * B; C0 = similar(C1);
119-
@test AmulB!(C0, A, B) C1
112+
@time for M 17:399
113+
# @show M
114+
K = M; N = M;
115+
A = rand(M,K); B = rand(K,N); b = rand(K);
116+
@test dot(A,B) mydotavx(A,B)
120117

121-
if VERSION v"1.6"
122-
x = randn(Complex{Float64}, 3M-1);
123-
W = randn(Complex{Float64}, 3M-1, 3M+1);
124-
y = randn(Complex{Float64}, 3M+1);
125-
@test dot(x,W,y) dot3(x,W,y)
126-
end
118+
C1 = A * B;
119+
@test AmulB!(similar(C1), A, B) C1
120+
c1 = A * b;
121+
@test AmulB!(similar(c1), A, b) c1
127122

128-
kern = OffsetArray(randn(3,3),-2,-2)
129-
out1 = OffsetArray(randn(size(A) .- 2), 1, 1)
130-
out2 = similar(out1);
131-
@test conv!(out1, A, kern) conv_baseline!(out2, A, kern)
123+
if VERSION v"1.6"
124+
x = randn(Complex{Float64}, 3M-1);
125+
W = randn(Complex{Float64}, 3M-1, 3M+1);
126+
y = randn(Complex{Float64}, 3M+1);
127+
@test dot(x,W,y) dot3(x,W,y)
132128
end
129+
130+
kern = OffsetArray(randn(3,3),-2,-2)
131+
out1 = OffsetArray(randn(size(A) .- 2), 1, 1)
132+
out2 = similar(out1);
133+
@test conv!(out1, A, kern) conv_baseline!(out2, A, kern)
134+
end
133135
end
134136

135137

0 commit comments

Comments
 (0)