Skip to content

Commit 267ff62

Browse files
maxfreuMax Freudenberg
authored andcommitted
save allocs during algorithm search
Sets allocateTmpBuf in `cudnnConvolutionXXAlgoPerf` to false if beta is zero to save an allocation in this case.
1 parent 24cd95d commit 267ff62

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

ext/NNlibCUDA/src/cudnn/conv.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ function ∇conv_data!(dx::DenseCuArray{T}, dy::DenseCuArray{T}, w::DenseCuArray
9797
alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta);
9898
convDesc, dx, depad = cudnnConvolutionDescriptorAndPaddedInput(cdims, dx)
9999
xDesc, yDesc, wDesc = cudnnTensorDescriptor(dx), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(w)
100-
p = cudnnConvolutionBwdDataAlgoPerf(wDesc, w, yDesc, dy, convDesc, xDesc, dx)
100+
p = cudnnConvolutionBwdDataAlgoPerf(wDesc, w, yDesc, dy, convDesc, xDesc, dx, beta!=0)
101101
with_workspace(p.memory) do workspace
102102
cudnnConvolutionBackwardData(handle(), alpha, wDesc, w, yDesc, dy, convDesc, p.algo, workspace, sizeof(workspace), beta, xDesc, dx)
103103
end
@@ -115,7 +115,7 @@ function ∇conv_filter!(dw::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArr
115115
alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta);
116116
convDesc, x, _ = cudnnConvolutionDescriptorAndPaddedInput(cdims, x)
117117
xDesc, yDesc, wDesc = cudnnTensorDescriptor(x), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(dw)
118-
p = cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, yDesc, dy, convDesc, wDesc, dw);
118+
p = cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, yDesc, dy, convDesc, wDesc, dw, beta!=0);
119119
with_workspace(p.memory) do workspace
120120
cudnnConvolutionBackwardFilter(handle(), alpha, xDesc, x, yDesc, dy, convDesc, p.algo, workspace, sizeof(workspace), beta, wDesc, dw);
121121
end

0 commit comments

Comments
 (0)