@@ -21,21 +21,26 @@ function cudnnConvolutionDescriptorAndPaddedInput(cdims::DenseConvDims, x::Dense
21
21
# Naive implementation, is there a faster way?
22
22
# How much we need to pad x manually: The absolute difference between pad_left and pad_right, pad_top
23
23
# and pad_bottom etc. respectively. We keep the sign here though because we use it below to figure out
24
- # which side of x to pad.
25
- pad_manual = ntuple (i -> pad[2 (i- 1 )+ 1 ] - pad[2 (i- 1 )+ 2 ], sdims)
24
+ # which side of x to pad. Oh, and we use a CartesianIndex as we will mainly use this to index in x
25
+ pad_manual = CartesianIndex (ntuple (i -> i > sdims ? 0 : pad[2 (i- 1 )+ 1 ] - pad[2 (i- 1 )+ 2 ], ndims (x)))
26
+
26
27
# How much we can let cudnn pad: The smallest padding amount between pad_left and pad_right, pad_top
27
28
# and pad_bottom etc. respectively
28
29
pad_cudnn = ntuple (i -> min (pad[2 (i- 1 )+ 1 ], pad[2 (i- 1 )+ 2 ]), sdims)
29
30
30
31
x_padded_size = ntuple (i -> i <= sdims ? size (x, i) + abs (pad_manual[i]) : size (x ,i), ndims (x))
31
-
32
32
x_padded = similar (x, x_padded_size)
33
33
fill! (x_padded, 0 )
34
- # This is a bit yucky, but we are basically figuring out where in x_padded we shall insert x_inds
35
- # Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim by dim in a loop
36
- x_inds = ntuple (i -> range (1 + max (0 , pad_manual[i]), size (x,i) - min (0 , - pad_manual[i])), sdims)
37
- x_padded[x_inds... , :, :] = x
38
- return cudnnConvolutionDescriptor (cdims, x_padded, pad_cudnn), x_padded, _x -> _x[x_inds... ,:,:]
34
+ # This is a bit yucky, but we are basically figuring out where in x_padded we shall insert x
35
+ # Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim
36
+ # by dim to an array in a loop
37
+ xIs = CartesianIndices (x)
38
+ xI_first = first (xIs)
39
+ xI_last = last (xIs)
40
+ xIs_pad = max (xI_first, xI_first + pad_manual) : max (xI_last, xI_last + pad_manual)
41
+ x_padded[xIs_pad] = x
42
+
43
+ return cudnnConvolutionDescriptor (cdims, x_padded, pad_cudnn), x_padded, _x -> _x[xIs_pad]
39
44
end
40
45
41
46
function cudnnConvolutionDescriptor (cdims:: DenseConvDims , x:: DenseCuArray{T} , pad = nnlibPadding (cdims)) where T
0 commit comments