Merge pull request #54 from tejank10/conv_transpose

staticfloat · web-flow · commit 8546f3caa452 · 2019-02-01T09:09:27.000+08:00
2D Transpose Convolutions
diff --git a/src/conv.jl b/src/conv.jl
@@ -17,30 +17,74 @@ function cdims(x::NTuple{N}, w::NTuple{N}, pad, stride) where N
   end
 end
 
+
+# Conv Transpose dims
+
+function ctdims(x::NTuple{N}, w::NTuple{N}, pad, stride, dilation) where N
+  ntuple(Val(N)) do i
+    if i < N-1
+      (x[i] - 1) * stride[i] + dilation[i] * (w[i] - 1) - 2*pad[i] + 1
+    elseif i == N-1
+      w[N-1]
+    else # i == N
+      x[N]
+    end
+  end
+end
+
+
+# Kernel dims
+
+function wdims(x::NTuple{N}, y::NTuple{N}, pad, stride, dilation) where N
+  ntuple(Val(N)) do i
+    if i < N-1
+      1 + div((1 - y[i]) * stride[i] + x[i] + 2pad[i] - 1, dilation[i])
+    elseif i == N-1
+      x[i]
+    else # i == N
+      y[i-1]
+    end
+  end
+end
+
 # Interface
 
 head(x) = reverse(Base.tail(reverse(x)))
 padtuple(x::Tuple,p::Integer) = map(_->p, head(head(x)))
 padtuple(x::Tuple,p::Tuple) = p
 padtuple(x::AbstractArray,p) = padtuple(size(x),p)
 
-function conv(x::A, w::A; pad = 0, stride = 1, dilation = 1) where A<:AbstractArray
+function conv(x::A, w::A; size=nothing, pad = 0, stride = 1, dilation = 1) where A<:AbstractArray
   pad_, stride_ = padtuple(x, pad), padtuple(x, stride)
-  conv!(similar(x, cdims(size(x), dilation_dims(w, dilation), pad_, stride_)),
-        x, w, pad = pad_, stride = stride_, dilation = dilation)
+  if size === nothing
+    size = cdims(Base.size(x), dilation_dims(w, dilation), pad_, stride_)
+  end
+  conv!(similar(x, size), x, w, pad = pad_, stride = stride_, dilation = dilation)
 end
 
-function crosscor(x::A, w::A; pad = 0, stride = 1, dilation = 1) where A<:AbstractArray
+function crosscor(x::A, w::A; size=nothing, pad = 0, stride = 1, dilation = 1) where A<:AbstractArray
   pad_, stride_ = padtuple(x, pad), padtuple(x, stride)
-  crosscor!(similar(x, cdims(size(x), dilation_dims(w, dilation), pad_, stride_)),
-        x, w, pad = pad_, stride = stride_, dilation = dilation)
+  if size === nothing
+    size = cdims(Base.size(x), dilation_dims(w, dilation), pad_, stride_)
+  end
+  crosscor!(similar(x, size), x, w, pad = pad_, stride = stride_, dilation = dilation)
 end
 
-∇conv_data(dy::A, x::A, w::A; pad = 0, stride = 1, dilation = 1, flipkernel = 0) where A<:AbstractArray =
-  ∇conv_data!(zero(x), dy, x, w; pad = pad, stride = stride, dilation = dilation, flipkernel=flipkernel)
+function ∇conv_data(dy::A, w::A; size=nothing, pad = 0, stride = 1, dilation = 1, flipkernel = 0) where A<:AbstractArray
+  pad_, stride_, dilation_ = padtuple(dy, pad), padtuple(dy, stride), padtuple(dy, dilation)
+  if size === nothing
+    size = ctdims(Base.size(dy), Base.size(w), pad_, stride_, dilation_)
+  end
+  ∇conv_data!(similar(dy, size), dy, w, pad = pad_, stride = stride_, dilation = dilation_, flipkernel=flipkernel)
+end
 
-∇conv_filter(dy::A, x::A, w::A; pad = 0, stride = 1, dilation = 1, flipkernel=0) where A<:AbstractArray =
-  ∇conv_filter!(zero(w), dy, x, w; pad = pad, stride = stride, dilation = dilation, flipkernel=flipkernel)
+function ∇conv_filter(dy::A, x::A; size = nothing, pad = 0, stride = 1, dilation = 1, flipkernel=0) where A<:AbstractArray
+  pad_, stride_, dilation_ = padtuple(dy, pad), padtuple(dy, stride), padtuple(dy, dilation)
+  if size === nothing
+    size = wdims(Base.size(x), Base.size(dy), pad_, stride_, dilation_)
+  end
+  ∇conv_filter!(zero(similar(dy, size)), dy, x; pad = pad, stride = stride, dilation = dilation, flipkernel=flipkernel)
+end
 
 # N-D dispatch
 
@@ -56,18 +100,16 @@ function crosscor!(y::AbstractArray, x::AbstractArray, w::AbstractArray;
     conv!(y, x, w, pad=pad, stride=stride, dilation=dilation, flipkernel=1)
 end
 
-function ∇conv_filter!(dw::AbstractArray{T,3}, dy::AbstractArray{T,3},
-                       x::AbstractArray{T,3}, w::AbstractArray{T,3};
+function ∇conv_filter!(dw::AbstractArray{T,3}, dy::AbstractArray{T,3}, x::AbstractArray{T,3};
                        pad = 0, stride = 1, dilation = 1, flipkernel=0) where T
-    args = map(x -> reshape(x, size(x,1),1,size(x,2),size(x,3)), (dw, dy, x, w))
+    args = map(x -> reshape(x, size(x,1),1,size(x,2),size(x,3)), (dw, dy, x))
     ∇conv_filter!(args..., pad = (pad...,0), stride = (stride...,1), dilation = (dilation...,1), flipkernel=flipkernel)
     return dw
 end
 
-function ∇conv_data!(dx::AbstractArray{T,3}, dy::AbstractArray{T,3},
-                     x::AbstractArray{T,3}, w::AbstractArray{T,3};
-                     pad = 0, stride = 1, dilation = 1, flipkernel = 0) where T
-    args = map(x -> reshape(x, size(x,1),1,size(x,2),size(x,3)), (dx, dy, x, w))
+function ∇conv_data!(dx::AbstractArray{T,3}, dy::AbstractArray{T,3}, w::AbstractArray{T,3}; 
+		     pad = 0, stride = 1, dilation = 1, flipkernel = 0) where T
+    args = map(x -> reshape(x, size(x,1),1,size(x,2),size(x,3)), (dx, dy, w))
     ∇conv_data!(args..., pad = (pad...,0), stride = (stride...,1), dilation = (dilation..., 1), flipkernel = flipkernel)
     return dx
 end
@@ -76,25 +118,25 @@ conv!(y::AbstractArray{T,4}, x::AbstractArray{T,4}, w::AbstractArray{T,4};
       pad = 0, stride = 1, dilation = 1, flipkernel=0) where T =
   conv2d!(y, x, w, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
 
-∇conv_filter!(dw::AbstractArray{T,4}, dy::AbstractArray{T,4}, x::AbstractArray{T,4}, w::AbstractArray{T,4};
+∇conv_filter!(dw::AbstractArray{T,4}, dy::AbstractArray{T,4}, x::AbstractArray{T,4};
               pad = 0, stride = 1, dilation = 1, flipkernel=0) where T =
-  conv2d_grad_w!(dw, x, w, dy, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
+  conv2d_grad_w!(dw, x, dy, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
 
-∇conv_data!(dx::AbstractArray{T,4}, dy::AbstractArray{T,4}, x::AbstractArray{T,4}, w::AbstractArray{T,4};
+∇conv_data!(dx::AbstractArray{T,4}, dy::AbstractArray{T,4}, w::AbstractArray{T,4};
             pad = 0, stride = 1, dilation = 1, flipkernel=0) where T =
-  conv2d_grad_x!(dx, x, w, dy, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
+  conv2d_grad_x!(dx, w, dy, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
 
 conv!(y::AbstractArray{T,5}, x::AbstractArray{T,5}, w::AbstractArray{T,5};
       pad = 0, stride = 1, dilation = 1, flipkernel=0) where T =
   conv3d!(y, x, w, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
 
-∇conv_filter!(dw::AbstractArray{T,5}, dy::AbstractArray{T,5}, x::AbstractArray{T,5}, w::AbstractArray{T,5};
+∇conv_filter!(dw::AbstractArray{T,5}, dy::AbstractArray{T,5}, x::AbstractArray{T,5};
               pad = 0, stride = 1, dilation = 1, flipkernel=0) where T =
-  conv3d_grad_w!(dw, x, w, dy, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
+  conv3d_grad_w!(dw, x, dy, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
 
-∇conv_data!(dx::AbstractArray{T,5}, dy::AbstractArray{T,5}, x::AbstractArray{T,5}, w::AbstractArray{T,5};
+∇conv_data!(dx::AbstractArray{T,5}, dy::AbstractArray{T,5}, w::AbstractArray{T,5};
             pad = 0, stride = 1, dilation = 1, flipkernel=0) where T =
-  conv3d_grad_x!(dx, x, w, dy, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
+  conv3d_grad_x!(dx, w, dy, padding = pad, stride = stride, dilation = dilation, mode=flipkernel)
 
   # Depthwise Conv
 
@@ -216,3 +258,9 @@ meanpool_cpu!(y::AbstractArray{<:Real,5}, x::AbstractArray{<:Real,5}, k::Dims{3}
               k::Dims{3}; pad = (0,0), stride = k) =
   meanpool3d_grad!(dx, dy, y, x,
                    window = k, padding = pad, stride = stride)
+
+# Deprecated
+
+# 0.4.2
+@deprecate ∇conv_data(dy::A, x::A, w::A; kw...) where A<:AbstractArray ∇conv_data(dy, w; size=size(x), kw...)
+@deprecate ∇conv_filter(dy::A, x::A, w::A; kw...) where A<:AbstractArray ∇conv_filter(dy, x; size=size(w), kw...)
diff --git a/src/impl/conv.jl b/src/impl/conv.jl
@@ -278,15 +278,15 @@ function conv2d!(y::AbstractArray{T,4}, x::AbstractArray{T,4}, w::AbstractArray{
     return y
 end
 
-function conv2d_grad_w!(dw::AbstractArray{T,4}, x::AbstractArray{T,4}, w::AbstractArray{T,4}, dy::AbstractArray{T,4};
+function conv2d_grad_w!(dw::AbstractArray{T,4}, x::AbstractArray{T,4}, dy::AbstractArray{T,4};
                    padding=0, stride=1, dilation=1, mode=0, alpha=1) where T
     # dw = x'*dy
     Wx,Hx,Cx,Nx = size(x)
-    Ww,Hw,C1,C2 = size(w)
+    Ww,Hw,C1,C2 = size(dw)
     Wy,Hy,Cy,Ny = size(dy)
     # if mode != 0 && mode != 1; throw(ArgumentError("conv2d only supports mode=0 or 1.")); end
-    # @assert Cx==C1 && Cy==C2 && Ny==Nx
-    x2dims = im2col_dims(w,dy)
+    @assert Cx==C1 && Cy==C2 && Ny==Nx
+    x2dims = im2col_dims(dw,dy)
     x2 = similar(x, x2dims)
     # op(A) is an m-by-k matrix, op(B) is a k-by-n matrix, C is an m-by-n matrix.
     Y,M,N,K = Wy*Hy*Cy,Ww*Hw*Cx,Cy,Wy*Hy
@@ -296,29 +296,29 @@ function conv2d_grad_w!(dw::AbstractArray{T,4}, x::AbstractArray{T,4}, w::Abstra
     (d1,d2) = psize(dilation,x)
     dyi = 1
     @inbounds for n in 1:Nx
-        im2col2d!(w, x, x2, n, p1, p2, s1, s2, d1, d2, mode)
+        im2col2d!(dw, x, x2, n, p1, p2, s1, s2, d1, d2, mode)
         gemm!('T','N',M,N,K,alpha,pointer(x2),pointer(dy,dyi),beta,pointer(dw))
         dyi += Y
     end
     return dw
 end
 
-function conv2d_grad_x!(dx::AbstractArray{T,4}, x::AbstractArray{T,4}, w::AbstractArray{T,4}, dy::AbstractArray{T,4};
+function conv2d_grad_x!(dx::AbstractArray{T,4}, w::AbstractArray{T,4}, dy::AbstractArray{T,4};
                    padding=0, stride=1, dilation=1, mode=0, alpha=1) where T
     # dx = dy*w'
-    Wx,Hx,Cx,Nx = size(x)
+    Wx,Hx,Cx,Nx = size(dx)
     Ww,Hw,C1,C2 = size(w)
     Wy,Hy,Cy,Ny = size(dy)
     # if mode != 0 && mode != 1; throw(ArgumentError("conv2d only supports mode=0 or 1.")); end
     @assert Cx==C1 && Cy==C2 && Ny==Nx
     x2dims = im2col_dims(w,dy)
-    x2 = similar(x, x2dims)
+    x2 = similar(dx, x2dims)
     # op(A) is an m-by-k matrix, op(B) is a k-by-n matrix, C is an m-by-n matrix.
     Y,M,N,K = Wy*Hy*Cy,Wy*Hy,Ww*Hw*Cx,Cy
     alpha,beta = T(alpha),T(0)
-    (p1,p2) = psize(padding,x)
-    (s1,s2) = psize(stride,x)
-    (d1,d2) = psize(dilation,x)
+    (p1,p2) = psize(padding,dx)
+    (s1,s2) = psize(stride,dx)
+    (d1,d2) = psize(dilation,dx)
     dyi = 1
     @inbounds for n in 1:Nx
         gemm!('N','T',M,N,K,alpha,pointer(dy,dyi),pointer(w),beta,pointer(x2))
@@ -352,7 +352,7 @@ function col2im2d!(w::NTuple{4,Int}, x::AbstractArray{T,4}, x2::AbstractArray{T,
     Ww,Hw,C1,C2 = w
     xn = x[:, :, :, n]
     col2im_2d!(x2,xn,Wx,Hx,Cx,Ww,Hw,p1,p2,s1,s2,1,1,mode)
-    x[:, :, :, n] = xn
+    x[:, :, :, n] .= xn
     return x
 end
 
@@ -362,7 +362,7 @@ function col2im2d!(w::AbstractArray{T,4}, x::AbstractArray{T,4}, x2::AbstractArr
     Ww,Hw,C1,C2 = size(w)
     xn = x[:, :, :, n]
     col2im_2d!(x2,xn,Wx,Hx,Cx,Ww,Hw,p1,p2,s1,s2,d1,d2,mode)
-    x[:, :, :, n] = xn
+    x[:, :, :, n] .= xn
     return x
 end
 
@@ -390,15 +390,15 @@ function conv3d!(y::AbstractArray{T,5}, x::AbstractArray{T,5}, w::AbstractArray{
     return y
 end
 
-function conv3d_grad_w!(dw::AbstractArray{T,5}, x::AbstractArray{T,5}, w::AbstractArray{T,5}, dy::AbstractArray{T,5};
+function conv3d_grad_w!(dw::AbstractArray{T,5}, x::AbstractArray{T,5}, dy::AbstractArray{T,5};
                    padding=0, stride=1, dilation = 1, mode=0, alpha=1) where T
     # dw = x'*dy
     Wx,Hx,Dx,Cx,Nx = size(x)
-    Ww,Hw,Dw,C1,C2 = size(w)
+    Ww,Hw,Dw,C1,C2 = size(dw)
     Wy,Hy,Dy,Cy,Ny = size(dy)
     # if mode != 0 && mode != 1; throw(ArgumentError("conv2d only supports mode=0 or 1.")); end
-    # @assert Cx==C1 && Cy==C2 && Ny==Nx
-    x2dims = im2col_dims(w,dy)
+    @assert Cx==C1 && Cy==C2 && Ny==Nx
+    x2dims = im2col_dims(dw,dy)
     x2 = similar(x, x2dims)
     # op(A) is an m-by-k matrix, op(B) is a k-by-n matrix, C is an m-by-n matrix.
     Y,M,N,K = Wy*Hy*Dy*Cy,Ww*Hw*Dw*Cx,Cy,Wy*Hy*Dy
@@ -408,29 +408,29 @@ function conv3d_grad_w!(dw::AbstractArray{T,5}, x::AbstractArray{T,5}, w::Abstra
     (d1,d2,d3) = psize(dilation,x)
     dyi = 1
     @inbounds for n in 1:Nx
-        im2col3d!(w, x, x2, n, p1, p2, p3, s1, s2, s3, d1, d2, d3, mode)
+        im2col3d!(dw, x, x2, n, p1, p2, p3, s1, s2, s3, d1, d2, d3, mode)
         gemm!('T','N',M,N,K,alpha,pointer(x2),pointer(dy,dyi),beta,pointer(dw))
         dyi += Y
     end
     return dw
 end
 
-function conv3d_grad_x!(dx::AbstractArray{T,5}, x::AbstractArray{T,5}, w::AbstractArray{T,5}, dy::AbstractArray{T,5};
+function conv3d_grad_x!(dx::AbstractArray{T,5}, w::AbstractArray{T,5}, dy::AbstractArray{T,5};
                    padding=0, stride=1, dilation = 1, mode=0, alpha=1) where T
     # dx = dy*w'
-    Wx,Hx,Dx,Cx,Nx = size(x)
+    Wx,Hx,Dx,Cx,Nx = size(dx)
     Ww,Hw,Dw,C1,C2 = size(w)
     Wy,Hy,Dy,Cy,Ny = size(dy)
     # if mode != 0 && mode != 1; throw(ArgumentError("conv2d only supports mode=0 or 1.")); end
     @assert Cx==C1 && Cy==C2 && Ny==Nx
     x2dims = im2col_dims(w,dy)
-    x2 = similar(x, x2dims)
+    x2 = similar(dx, x2dims)
     # op(A) is an m-by-k matrix, op(B) is a k-by-n matrix, C is an m-by-n matrix.
     Y,M,N,K = Wy*Hy*Dy*Cy,Wy*Hy*Dy,Ww*Hw*Dw*Cx,Cy
     alpha,beta = T(alpha),T(0)
-    (p1,p2,p3) = psize(padding,x)
-    (s1,s2,s3) = psize(stride,x)
-    (d1,d2,d3) = psize(dilation,x)
+    (p1,p2,p3) = psize(padding,dx)
+    (s1,s2,s3) = psize(stride,dx)
+    (d1,d2,d3) = psize(dilation,dx)
     dyi = 1
     @inbounds for n in 1:Nx
         gemm!('N','T',M,N,K,alpha,pointer(dy,dyi),pointer(w),beta,pointer(x2))
diff --git a/test/conv.jl b/test/conv.jl
@@ -1,4 +1,4 @@
-using NNlib: conv, ∇conv_filter, ∇conv_data, ∇maxpool, maxpool, depthwiseconv, ∇depthwiseconv_filter, ∇depthwiseconv_data
+using NNlib: conv, crosscor, ∇conv_filter, ∇conv_data, ∇maxpool, maxpool, depthwiseconv, ∇depthwiseconv_filter, ∇depthwiseconv_data
 
 @testset "conv2d" begin
     x = reshape(Float64[1:20;], 5, 4, 1, 1)
@@ -10,6 +10,12 @@ using NNlib: conv, ∇conv_filter, ∇conv_data, ∇maxpool, maxpool, depthwisec
         49 99 149;
         59 109 159.]
 
+    @test dropdims(crosscor(x, w), dims = (3,4)) == [
+	 51  101  151;
+	 61  111  161;
+ 	 71  121  171;
+	 81  131  181.]
+
     @test dropdims(conv(Float32.(x), Float32.(w)), dims=(3,4)) == Float32.([
         29 79 129;
         39 89 139;
@@ -59,26 +65,26 @@ using NNlib: conv, ∇conv_filter, ∇conv_data, ∇maxpool, maxpool, depthwisec
     # correctness of gradients is cross-checked with CUDNN.jl
     # (it's assumed convolution code won't change often)
 
-    @test size(∇conv_filter(reshape(rand(4,3), 4, 3, 1, 1), x, w)) == size(w)
-    @test size(∇conv_data(reshape(rand(4,3), 4, 3, 1, 1), x, w)) == size(x)
+    @test size(∇conv_filter(reshape(rand(4,3), 4, 3, 1, 1), x)) == size(w)
+    @test size(∇conv_data(reshape(rand(4,3), 4, 3, 1, 1), w)) == size(x)
 
     # Test that stride/pad work backward as well
     y = conv(x, w; stride=2, pad=1, dilation=2)
     @test size(y) == (3, 2, 1, 1)
-    @test size(∇conv_filter(y, x, w; stride=2, pad=1, dilation=2)) == size(w)
-    @test size(∇conv_data(y, x, w; stride=2, pad=1, dilation=2)) == size(x)
+    @test size(∇conv_filter(y, x; size=size(w), stride=2, pad=1, dilation=2)) == size(w)
+    @test size(∇conv_data(y, w; size=size(x), stride=2, pad=1, dilation=2)) == size(x)
 
 	# NaN tests for dilation backward pass: filters
 	dy = randn(size(ys[1]))
 	dws = []
 	for idx in 1:1000
-	    push!(dws, ∇conv_filter(dy, x, w; dilation=2))
+	    push!(dws, ∇conv_filter(dy, x; size=size(w), dilation=2))
 	end
 
 	# NaN tests for dilation backward pass: input
 	dxs = []
 	for idx in 1:1000
-	    push!(dxs, ∇conv_data(dy, x, w; dilation=2))
+	    push!(dxs, ∇conv_data(dy, w; size=size(x), dilation=2))
 	end
 
 	@test !any([any(isnan.(dws[idx])) for idx in 1:1000])
@@ -107,7 +113,7 @@ end
         X = copy(x[:,:,i:i,:]);
         W = copy(permutedims(w[:,:,:,i:i],[1,2,4,3]));
         DY = copy(dy[:,:,2i-1:2i,:]);
-        res = ∇conv_data(DY,X,W)
+        res = ∇conv_data(DY,W;size=size(X))
         @test dropdims(z[:,:,i:i,:], dims=(3,4)) == dropdims(res, dims=(3,4))
     end
 
@@ -116,7 +122,7 @@ end
         X = copy(x[:,:,i:i,:]);
         W = copy(permutedims(w[:,:,:,i:i],[1,2,4,3]))
         DY = copy(dy[:,:,2i-1:2i,:])
-        res = ∇conv_filter(DY,X,W)
+        res = ∇conv_filter(DY,X; size=size(W))
         @test dropdims(z[:,:,:,i:i]; dims=(4)) == dropdims(res; dims=(3))
     end
 
@@ -236,20 +242,20 @@ end
     # correctness of gradients is cross-checked with CUDNN.jl
     # (it's assumed convolution code won't change often)
 
-    @test size(∇conv_filter(reshape(rand(4,3,2), 4, 3, 2, 1, 1), x, w)) == size(w)
-    @test size(∇conv_data(reshape(rand(4,3,2), 4, 3, 2, 1, 1), x, w)) == size(x)
+    @test size(∇conv_filter(reshape(rand(4,3,2), 4, 3, 2, 1, 1), x; size=size(w))) == size(w)
+    @test size(∇conv_data(reshape(rand(4,3,2), 4, 3, 2, 1, 1), w; size=size(x))) == size(x)
 
 	# NaN tests for dilation backward pass: filters
 	dy = randn(size(ys[1]))
 	dws = []
 	for idx in 1:1000
-	    push!(dws, ∇conv_filter(dy, x, w; dilation=2))
+	    push!(dws, ∇conv_filter(dy, x; size=size(w), dilation=2))
 	end
 
 	# NaN tests for dilation backward pass: input
 	dxs = []
 	for idx in 1:1000
-	    push!(dxs, ∇conv_data(dy, x, w; dilation=2))
+	    push!(dxs, ∇conv_data(dy, w; size=size(x), dilation=2))
 	end
 
 	@test !any([any(isnan.(dws[idx])) for idx in 1:1000])