Merge pull request #34 from DrChainsaw/asymmetric_conv_pad

ToucheSir · web-flow · commit 6ac54c0a4ad4 · 2022-01-07T09:49:02.000-05:00
Add support for asymmetric padding for convlayers
diff --git a/ext/NNlibCUDA/src/cudnn/conv.jl b/ext/NNlibCUDA/src/cudnn/conv.jl
@@ -10,9 +10,42 @@ using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims,
 
 const CUDNNFloat = Union{Float16,Float32,Float64}
 
-function cudnnConvolutionDescriptor(cdims::DenseConvDims, x::DenseCuArray{T}) where T
+function cudnnConvolutionDescriptorAndPaddedInput(cdims::DenseConvDims, x::DenseCuArray{T}) where T
+    # The main purpose of this function is to catch asymmetric padding which cudnn does not support
+    # If we find asymmetric padding we'll make a copy of x which is manually padded so that we can
+    # call cudnn with symmetric padding.
+    pad = NNlib.padding(cdims)
+    sdims = NNlib.spatial_dims(cdims)
+    all(i -> pad[i] .== pad[i+1], 1:2:2sdims) && return (cudnnConvolutionDescriptor(cdims, x), x, identity)
+
+    # Naive implementation, is there a faster way?
+    # How much we need to pad x manually: The absolute difference between pad_left and pad_right, pad_top
+    # and pad_bottom etc. respectively. We keep the sign here though because we use it below to figure out
+    # which side of x to pad. Oh, and we use a CartesianIndex as we will mainly use this to index in x
+    pad_manual = CartesianIndex(ntuple(i -> i > sdims ? 0 : pad[2(i-1)+1] - pad[2(i-1)+2], ndims(x)))
+
+    # How much we can let cudnn pad: The smallest padding amount between pad_left and pad_right, pad_top 
+    # and pad_bottom etc. respectively
+    pad_cudnn = ntuple(i -> min(pad[2(i-1)+1], pad[2(i-1)+2]), sdims) 
+
+    x_padded_size = ntuple(i -> i <= sdims ? size(x, i) + abs(pad_manual[i]) : size(x ,i), ndims(x))
+    x_padded = similar(x, x_padded_size)
+    fill!(x_padded, 0)
+    # This is a bit yucky, but we are basically figuring out where in x_padded we shall insert x
+    # Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim 
+    # by dim to an array in a loop
+    xIs = CartesianIndices(x)
+    xI_first = first(xIs)
+    xI_last = last(xIs)
+    xIs_pad = max(xI_first, xI_first + pad_manual) : max(xI_last, xI_last + pad_manual)
+    x_padded[xIs_pad] = x 
+    
+    return cudnnConvolutionDescriptor(cdims, x_padded, pad_cudnn), x_padded, _x -> _x[xIs_pad]
+end
+
+function cudnnConvolutionDescriptor(cdims::DenseConvDims, x::DenseCuArray{T}, pad = nnlibPadding(cdims)) where T
     mode=(NNlib.flipkernel(cdims) ? CUDNN_CROSS_CORRELATION : CUDNN_CONVOLUTION)
-    cudnnConvolutionDescriptor(convdims(nnlibPadding(cdims),size(x),0),
+    cudnnConvolutionDescriptor(convdims(pad, size(x),0),
                                convdims(NNlib.stride(cdims),size(x),1),
                                convdims(NNlib.dilation(cdims),size(x),1),
                                mode,
@@ -30,7 +63,7 @@ function conv!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, cdims
     if algo != -1
         @warn "algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
     end
-    d = cudnnConvolutionDescriptor(cdims, x)
+    d, x, _ = cudnnConvolutionDescriptorAndPaddedInput(cdims, x)
     cudnnConvolutionForward!(y, w, x, d; alpha, beta, z=y)
 end
 
@@ -43,7 +76,7 @@ function conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{
     if algo != -1
         @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
     end
-    d = cudnnConvolutionDescriptor(cdims, x)
+    d, x, _ = cudnnConvolutionDescriptorAndPaddedInput(cdims, x)
     # only relu and identity are supported by cudnnConvolutionForward!
     activation = (σ == NNlib.relu ? CUDNN_ACTIVATION_RELU : CUDNN_ACTIVATION_IDENTITY)
     cudnnConvolutionForward!(y, w, x, d; z, bias, activation, alpha, beta)
@@ -62,13 +95,13 @@ function ∇conv_data!(dx::DenseCuArray{T}, dy::DenseCuArray{T}, w::DenseCuArray
         @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
     end
     alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta);
+    convDesc, dx, depad = cudnnConvolutionDescriptorAndPaddedInput(cdims, dx)
     xDesc, yDesc, wDesc = cudnnTensorDescriptor(dx), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(w)
-    convDesc = cudnnConvolutionDescriptor(cdims, dx)
     p = cudnnConvolutionBwdDataAlgoPerf(wDesc, w, yDesc, dy, convDesc, xDesc, dx)
     with_workspace(p.memory) do workspace
         cudnnConvolutionBackwardData(handle(), alpha, wDesc, w, yDesc, dy, convDesc, p.algo, workspace, sizeof(workspace), beta, xDesc, dx)
     end
-    return dx
+    return depad(dx) 
 end
 
 function ∇conv_filter!(dw::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArray{T},
@@ -80,8 +113,8 @@ function ∇conv_filter!(dw::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArr
         @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
     end
     alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta);
+    convDesc, x, _ = cudnnConvolutionDescriptorAndPaddedInput(cdims, x)
     xDesc, yDesc, wDesc = cudnnTensorDescriptor(x), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(dw)
-    convDesc = cudnnConvolutionDescriptor(cdims, x)
     p = cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, yDesc, dy, convDesc, wDesc, dw);
     with_workspace(p.memory) do workspace
         cudnnConvolutionBackwardFilter(handle(), alpha, xDesc, x, yDesc, dy, convDesc, p.algo, workspace, sizeof(workspace), beta, wDesc, dw);
diff --git a/ext/NNlibCUDA/test/conv.jl b/ext/NNlibCUDA/test/conv.jl
@@ -12,6 +12,9 @@ using NNlib: DenseConvDims
     options = Dict{Any, Any}.((
         (), (:dilation => 2), (:flipkernel => true), (:stride => 2),
         (:padding => 1),
+        (:padding => (1,0)),
+        (:padding => (0,1)),
+        (:padding => (2,3)),
     ))
     C_in_ = 3
     C_out = 4
@@ -26,6 +29,14 @@ using NNlib: DenseConvDims
 
         for opts in options
             opts[:groups] = groups
+            
+            if :padding in keys(opts)
+                padding = opts[:padding]
+                if 1 < length(padding) && length(padding) != 2num_spatial_dims
+                    opts[:padding] = ntuple(i -> padding[mod1(i,2)] .+ 2div(i-1,2), 2num_spatial_dims)   
+                end
+            end
+
             cdims = DenseConvDims(x, w; opts...)
             y = NNlib.conv(x, w, cdims)
 
@@ -44,5 +55,4 @@ using NNlib: DenseConvDims
             gputest((w, x, y) -> NNlib.∇conv_filter!(copy(w), x, y, cdims; beta=2.0), w, x, y, checkgrad=false) # TODO
         end
     end
-
 end