Skip to content

Commit 4461f3a

Browse files
committed
Fix conv accidentally broken just before release, fixes #207
1 parent e64841b commit 4461f3a

File tree

3 files changed

+56
-4
lines changed

3 files changed

+56
-4
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.0"
4+
version = "0.12.1"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -26,9 +26,9 @@ OffsetArrays = "1.4.1, 1.5"
2626
Requires = "1"
2727
SLEEFPirates = "0.6.12"
2828
Static = "0.2"
29-
ThreadingUtilities = "0.4"
29+
ThreadingUtilities = "0.4.1"
3030
UnPack = "1"
31-
VectorizationBase = "0.19.8"
31+
VectorizationBase = "0.19.9"
3232
julia = "1.5"
3333

3434
[extras]

src/codegen/lower_threads.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ function define_thread_blocks(threadedloop1, threadedloop2, vloop, u₁loop, u
437437
elseif vloop === threadedloop2
438438
define_vthread_blocks(threadedloop2, u₁loop, u₂loop, u₁, u₂, ntmax, 1)
439439
else
440-
:(choose_num_blocks(var"#nthreads#", StaticInt{$(Int(ntmax))}()))
440+
:((var"#thread#factor#0#", var"#thread#factor#1#") = choose_num_blocks(var"#nthreads#", StaticInt{$(Int(ntmax))}()))
441441
end
442442
end
443443
function thread_two_loops_expr(

test/threading.jl

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,49 @@ function conv_baseline!(out, A, kern)
5555
out
5656
end
5757

58+
59+
struct DenseConvDims{N,K,C_in,C_out} end
60+
61+
function kernaxes(::DenseConvDims{2,K,C_in, C_out}) where {K,C_in, C_out}
62+
K₁ = LoopVectorization.StaticInt(1):LoopVectorization.StaticInt(K[1])
63+
K₂ = LoopVectorization.StaticInt(1):LoopVectorization.StaticInt(K[2])
64+
Cᵢₙ = LoopVectorization.StaticInt(1):LoopVectorization.StaticInt(C_in)
65+
Cₒᵤₜ = LoopVectorization.StaticInt(1):LoopVectorization.StaticInt(C_out)
66+
(K₁, K₂, Cᵢₙ, Cₒᵤₜ)
67+
end
68+
69+
function convlayer!(
70+
out::AbstractArray{<:Any,4}, img, kern,
71+
dcd::DenseConvDims{2, <:Any, <:Any, <:Any}
72+
)
73+
(K₁, K₂, Cᵢₙ, Cₒᵤₜ) = kernaxes(dcd)
74+
@avxt for j₁ axes(out,1), j₂ axes(out,2), d axes(out,4), o Cₒᵤₜ
75+
s = zero(eltype(out))
76+
for k₁ K₁, k₂ K₂, i Cᵢₙ
77+
s += img[j₁ + k₁ - 1, j₂ + k₂ - 1, i, d] * kern[k₁, k₂, i, o]
78+
end
79+
out[j₁, j₂, o, d] = s
80+
end
81+
out
82+
end
83+
function convlayer_direct!(
84+
out::AbstractArray{<:Any,4}, img, kern,
85+
dcd::DenseConvDims{2, <:Any, <:Any, <:Any}
86+
)
87+
(K₁, K₂, Cᵢₙ, Cₒᵤₜ) = kernaxes(dcd)
88+
@inbounds @fastmath for j₁ axes(out,1), j₂ axes(out,2), d axes(out,4), o Cₒᵤₜ
89+
s = zero(eltype(out))
90+
for k₁ K₁, k₂ K₂, i Cᵢₙ
91+
s += img[j₁ + k₁ - 1, j₂ + k₂ - 1, i, d] * kern[k₁, k₂, i, o]
92+
end
93+
out[j₁, j₂, o, d] = s
94+
end
95+
out
96+
end
97+
5898
@testset "Threading" begin
99+
dcd = DenseConvDims{2,(5,5),3,6}()
100+
kern4 = rand(Float32, 5, 5, 3, 6);
59101
for M 17:399
60102
# @show M
61103
K = M; N = M;
@@ -74,6 +116,16 @@ end
74116
out1 = OffsetArray(randn(size(A) .- 2), 1, 1)
75117
out2 = similar(out1);
76118
@test conv!(out1, A, kern) conv_baseline!(out2, A, kern)
119+
120+
121+
img = rand(Float32, M, M, 3, 100);
122+
out1 = Array{Float32}(undef, size(img,1)+1-size(kern4,1), size(img,2)+1-size(kern4,2), size(kern4,4), size(img,4));
123+
out2 = similar(out1);
124+
125+
convlayer!(out1, img, kern4, dcd);
126+
convlayer_direct!(out2, img, kern4, dcd);
127+
@test out1 out2
128+
77129
end
78130
end
79131

0 commit comments

Comments
 (0)