Skip to content

Commit fa5ae79

Browse files
committed
Use CartesianIndex for inserting x in x_padded
1 parent f3edb3b commit fa5ae79

File tree

1 file changed

+13
-8
lines changed

1 file changed

+13
-8
lines changed

ext/NNlibCUDA/src/cudnn/conv.jl

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,26 @@ function cudnnConvolutionDescriptorAndPaddedInput(cdims::DenseConvDims, x::Dense
2121
# Naive implementation, is there a faster way?
2222
# How much we need to pad x manually: The absolute difference between pad_left and pad_right, pad_top
2323
# and pad_bottom etc. respectively. We keep the sign here though because we use it below to figure out
24-
# which side of x to pad.
25-
pad_manual = ntuple(i -> pad[2(i-1)+1] - pad[2(i-1)+2], sdims)
24+
# which side of x to pad. Oh, and we use a CartesianIndex as we will mainly use this to index in x
25+
pad_manual = CartesianIndex(ntuple(i -> i > sdims ? 0 : pad[2(i-1)+1] - pad[2(i-1)+2], ndims(x)))
26+
2627
# How much we can let cudnn pad: The smallest padding amount between pad_left and pad_right, pad_top
2728
# and pad_bottom etc. respectively
2829
pad_cudnn = ntuple(i -> min(pad[2(i-1)+1], pad[2(i-1)+2]), sdims)
2930

3031
x_padded_size = ntuple(i -> i <= sdims ? size(x, i) + abs(pad_manual[i]) : size(x ,i), ndims(x))
31-
3232
x_padded = similar(x, x_padded_size)
3333
fill!(x_padded, 0)
34-
# This is a bit yucky, but we are basically figuring out where in x_padded we shall insert x_inds
35-
# Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim by dim in a loop
36-
x_inds = ntuple(i -> range(1 + max(0, pad_manual[i]), size(x,i) - min(0, -pad_manual[i])), sdims)
37-
x_padded[x_inds..., :, :] = x
38-
return cudnnConvolutionDescriptor(cdims, x_padded, pad_cudnn), x_padded, _x -> _x[x_inds...,:,:]
34+
# This is a bit yucky, but we are basically figuring out where in x_padded we shall insert x
35+
# Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim
36+
# by dim to an array in a loop
37+
xIs = CartesianIndices(x)
38+
xI_first = first(xIs)
39+
xI_last = last(xIs)
40+
xIs_pad = max(xI_first, xI_first + pad_manual) : max(xI_last, xI_last + pad_manual)
41+
x_padded[xIs_pad] = x
42+
43+
return cudnnConvolutionDescriptor(cdims, x_padded, pad_cudnn), x_padded, _x -> _x[xIs_pad]
3944
end
4045

4146
function cudnnConvolutionDescriptor(cdims::DenseConvDims, x::DenseCuArray{T}, pad = nnlibPadding(cdims)) where T

0 commit comments

Comments
 (0)