@@ -14,29 +14,26 @@ function cudnnConvolutionDescriptorAndPaddedInput(cdims::DenseConvDims, x::Dense
14
14
# The main purpose of this function is to catch asymmetric padding which cudnn does not support
15
15
# If we find asymmetric padding we'll make a copy of x which is manually padded so that we can
16
16
# call cudnn with symmetric padding.
17
- pad = collect (NNlib. padding (cdims)) # work with an array to make things more type stable
18
- all (pad[1 : 2 : end ] .== pad[2 : 2 : end ]) && return (cudnnConvolutionDescriptor (cdims, x), x, identity)
19
-
20
- # Maybe we should warn the user that this copies data, but other ML libs generally don't warn
17
+ pad = NNlib. padding (cdims)
21
18
sdims = NNlib. spatial_dims (cdims)
22
-
19
+ all (i -> pad[i] .== pad[i+ 1 ], 1 : 2 : 2 sdims) && return (cudnnConvolutionDescriptor (cdims, x), x, identity)
20
+
23
21
# Naive implementation, is there a faster way?
24
22
# How much we need to pad x manually: The absolute difference between pad_left and pad_right, pad_top
25
23
# and pad_bottom etc. respectively. We keep the sign here though because we use it below to figure out
26
24
# which side of x to pad.
27
- pad_manual = pad[1 : 2 : 2 sdims] . - pad[2 : 2 : 2 sdims]
25
+ pad_manual = ntuple (i -> pad[2 (i - 1 ) + 1 ] - pad[2 (i - 1 ) + 2 ], sdims)
28
26
# How much we can let cudnn pad: The smallest padding amount between pad_left and pad_right, pad_top
29
27
# and pad_bottom etc. respectively
30
- pad_cudnn = min . (pad[1 : 2 : 2 sdims ], pad[2 : 2 : 2 sdims])
28
+ pad_cudnn = ntuple (i -> min (pad[2 (i - 1 ) + 1 ], pad[2 (i - 1 ) + 2 ]), sdims)
31
29
30
+ x_padded_size = ntuple (i -> i <= sdims ? size (x, i) + abs (pad_manual[i]) : size (x ,i), ndims (x))
32
31
33
- x_padded = similar (x, (size (x)[1 : sdims] .+ abs .(pad_manual)). .. , size (x)[end - 1 : end ]. .. )
34
- # We could do the same yucky indexing stuff for the zeros too so we don't have to write zeros in the whole array.
35
- # Not sure if it is worth it though...
32
+ x_padded = similar (x, x_padded_size)
36
33
fill! (x_padded, 0 )
37
34
# This is a bit yucky, but we are basically figuring out where in x_padded we shall insert x_inds
38
- # Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim by dim in a loop
39
- x_inds = range . (1 . + max . (0 , pad_manual), size (x)[ 1 : sdims] . - min . (0 , . - pad_manual) )
35
+ # Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim by dim in a loop
36
+ x_inds = ntuple (i -> range (1 + max (0 , pad_manual[i] ), size (x,i) - min (0 , - pad_manual[i])), sdims )
40
37
x_padded[x_inds... , :, :] = x
41
38
return cudnnConvolutionDescriptor (cdims, x_padded, pad_cudnn), x_padded, _x -> _x[x_inds... ,:,:]
42
39
end
0 commit comments