Skip to content

Commit c7c18a9

Browse files
Merge pull request #58 from maleadt/cuda4
Updates for CUDA.jl 4.
2 parents bf49b9d + 0a970a5 commit c7c18a9

File tree

10 files changed

+42
-40
lines changed

10 files changed

+42
-40
lines changed

ext/NNlibCUDA/Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
99
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
1010
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1111
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
12+
cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
1213

1314
[compat]
1415
Adapt = "3.3"
15-
CUDA = "3.11"
16+
cuDNN = "1"
17+
CUDA = "4"
1618
NNlib = "0.8.15"
1719
julia = "1.6"
1820

ext/NNlibCUDA/src/NNlibCUDA.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module NNlibCUDA
22

33
using NNlib
4-
using CUDA
4+
using CUDA, cuDNN
55
using Random, Statistics
66

77
const IntOrIntTuple = Union{Integer, NTuple{N,<:Integer} where N}

ext/NNlibCUDA/src/cudnn/activations.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
# Activation
33

44
using Base.Broadcast
5-
using CUDA.CUDNN: cudnnActivationForward!, cudnnOpTensor!,
6-
CUDNN_ACTIVATION_TANH,CUDNN_ACTIVATION_SIGMOID,CUDNN_ACTIVATION_ELU,
7-
CUDNN_ACTIVATION_RELU,CUDNN_ACTIVATION_CLIPPED_RELU,CUDNN_OP_TENSOR_MAX,
8-
CUDNN_ACTIVATION_IDENTITY
5+
using cuDNN: cudnnActivationForward!, cudnnOpTensor!,
6+
CUDNN_ACTIVATION_TANH, CUDNN_ACTIVATION_SIGMOID, CUDNN_ACTIVATION_ELU,
7+
CUDNN_ACTIVATION_RELU, CUDNN_ACTIVATION_CLIPPED_RELU, CUDNN_OP_TENSOR_MAX,
8+
CUDNN_ACTIVATION_IDENTITY
99

1010
for (f, op) in [
1111
CUDA.tanh => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_TANH),
@@ -15,7 +15,7 @@ for (f, op) in [
1515
# NNlib.relu6 => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_CLIPPED_RELU, coef=6.0),
1616
# NNlib.leakyrelu => (src,dst)->cudnnOpTensor!(dst, src, src; op=CUDNN_OP_TENSOR_MAX, alpha1=0.01),
1717
]
18-
18+
1919
@eval begin
2020
# in-place
2121
function Base.materialize!(dst::DenseCuArray{<:CUDNNFloat},

ext/NNlibCUDA/src/cudnn/batchnorm.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
using CUDA.CUDNN: CUDNN_BN_MIN_EPSILON, cudnnBatchNormalizationBackward,
2-
cudnnBatchNormalizationForwardInference, CUDNN_BATCHNORM_SPATIAL,
3-
cudnnBatchNormalizationForwardTraining
1+
using cuDNN: CUDNN_BN_MIN_EPSILON, cudnnBatchNormalizationBackward,
2+
cudnnBatchNormalizationForwardInference, CUDNN_BATCHNORM_SPATIAL,
3+
cudnnBatchNormalizationForwardTraining
44

55

66
# TODO: replace with new cudnn normalization interface
@@ -116,7 +116,7 @@ function ∇batchnorm(g::DenseCuArray{T}, b::DenseCuArray{T}, x::DenseCuArray{T}
116116
if affine
117117
(dg, db, dx)
118118
else
119-
# CUDNN always calculates dg and db, therefore we just have to drop them
119+
# cuDNN always calculates dg and db, therefore we just have to drop them
120120
(nothing, nothing, dx)
121121
end
122122
end

ext/NNlibCUDA/src/cudnn/conv.jl

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
using NNlib: DenseConvDims
33
import NNlib: conv!, ∇conv_filter!, ∇conv_data!, conv_bias_act!
44

5-
using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims,
6-
cudnnConvolutionDescriptor, cudnnConvolutionBwdDataAlgoPerf,
7-
cudnnConvolutionForward!, cudnnConvolutionBwdFilterAlgoPerf,
8-
cudnnConvolutionBackwardData, cudnnConvolutionBackwardFilter,
9-
cudnnConvolutionBackwardBias
5+
using cuDNN: scalingParameter, CUDNN_CONVOLUTION, convdims,
6+
cudnnConvolutionDescriptor, cudnnConvolutionBwdDataAlgoPerf,
7+
cudnnConvolutionForward!, cudnnConvolutionBwdFilterAlgoPerf,
8+
cudnnConvolutionBackwardData, cudnnConvolutionBackwardFilter,
9+
cudnnConvolutionBackwardBias
1010

1111
const CUDNNFloat = Union{Float16,Float32,Float64}
1212

@@ -24,22 +24,22 @@ function cudnnConvolutionDescriptorAndPaddedInput(cdims::DenseConvDims, x::Dense
2424
# which side of x to pad. Oh, and we use a CartesianIndex as we will mainly use this to index in x
2525
pad_manual = CartesianIndex(ntuple(i -> i > sdims ? 0 : pad[2(i-1)+1] - pad[2(i-1)+2], ndims(x)))
2626

27-
# How much we can let cudnn pad: The smallest padding amount between pad_left and pad_right, pad_top
27+
# How much we can let cudnn pad: The smallest padding amount between pad_left and pad_right, pad_top
2828
# and pad_bottom etc. respectively
29-
pad_cudnn = ntuple(i -> min(pad[2(i-1)+1], pad[2(i-1)+2]), sdims)
29+
pad_cudnn = ntuple(i -> min(pad[2(i-1)+1], pad[2(i-1)+2]), sdims)
3030

3131
x_padded_size = ntuple(i -> i <= sdims ? size(x, i) + abs(pad_manual[i]) : size(x ,i), ndims(x))
3232
x_padded = similar(x, x_padded_size)
3333
fill!(x_padded, 0)
3434
# This is a bit yucky, but we are basically figuring out where in x_padded we shall insert x
35-
# Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim
35+
# Haven't benchmarked if this has any advantages over a more readable solution, e.g. writing dim
3636
# by dim to an array in a loop
3737
xIs = CartesianIndices(x)
3838
xI_first = first(xIs)
3939
xI_last = last(xIs)
4040
xIs_pad = max(xI_first, xI_first + pad_manual) : max(xI_last, xI_last + pad_manual)
41-
x_padded[xIs_pad] = x
42-
41+
x_padded[xIs_pad] = x
42+
4343
return cudnnConvolutionDescriptor(cdims, x_padded, pad_cudnn), x_padded, _x -> _x[xIs_pad]
4444
end
4545

@@ -101,7 +101,7 @@ function ∇conv_data!(dx::DenseCuArray{T}, dy::DenseCuArray{T}, w::DenseCuArray
101101
with_workspace(p.memory) do workspace
102102
cudnnConvolutionBackwardData(handle(), alpha, wDesc, w, yDesc, dy, convDesc, p.algo, workspace, sizeof(workspace), beta, xDesc, dx)
103103
end
104-
return depad(dx)
104+
return depad(dx)
105105
end
106106

107107
function ∇conv_filter!(dw::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArray{T},

ext/NNlibCUDA/src/cudnn/cudnn.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
using CUDA.CUDNN: handle, with_workspace, cudnnTensorDescriptor, cudnnFilterDescriptor,
2-
cudnnDataType, math_mode, CUDNN_DEFAULT_REORDER, CUDNN_CROSS_CORRELATION,
3-
CUDNN_NOT_PROPAGATE_NAN, CUDNN_TENSOR_NCHW, dim4
1+
using cuDNN: handle, with_workspace, cudnnTensorDescriptor, cudnnFilterDescriptor,
2+
cudnnDataType, math_mode, CUDNN_DEFAULT_REORDER, CUDNN_CROSS_CORRELATION,
3+
CUDNN_NOT_PROPAGATE_NAN, CUDNN_TENSOR_NCHW, dim4
44

5-
cudnnversion() = CUDA.CUDNN.version()
5+
cudnnversion() = cuDNN.version()
66

77
function nnlibPadding(dims)
88
pd = NNlib.padding(dims)

ext/NNlibCUDA/src/cudnn/pooling.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
using CUDA.CUDNN: cudnnPoolingMode_t, CUDNN_POOLING_MAX,
2-
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING,
3-
cudnnPoolingForward!, pooldims, cudnnPoolingBackward
4-
1+
using cuDNN: cudnnPoolingMode_t, CUDNN_POOLING_MAX,
2+
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING,
3+
cudnnPoolingForward!, pooldims, cudnnPoolingBackward
4+
55
import NNlib: maxpool!, ∇maxpool!, meanpool!, ∇meanpool!
6-
import CUDA.CUDNN: cudnnPoolingDescriptor
6+
import cuDNN: cudnnPoolingDescriptor
77

88
function cudnnPoolingDescriptor(pdims::PoolDims, x::DenseCuArray{T}, mode::cudnnPoolingMode_t) where T
99
window, padding, stride = NNlib.kernel_size(pdims), nnlibPadding(pdims), NNlib.stride(pdims)

ext/NNlibCUDA/src/cudnn/softmax.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import NNlib: softmax, softmax!, ∇softmax, ∇softmax!,
22
logsoftmax, logsoftmax!, ∇logsoftmax, ∇logsoftmax!
33

4-
using CUDA.CUDNN: CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_MODE_CHANNEL,
5-
CUDNN_SOFTMAX_FAST, CUDNN_SOFTMAX_ACCURATE, cudnnSoftmaxForward!,
6-
cudnnSoftmaxBackward
4+
using cuDNN: CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_MODE_CHANNEL,
5+
CUDNN_SOFTMAX_FAST, CUDNN_SOFTMAX_ACCURATE, cudnnSoftmaxForward!,
6+
cudnnSoftmaxBackward
77

88
# Softmax
99

@@ -43,8 +43,8 @@ function _∇logsoftmax!(dx::T, dy::T, x::T, y::T; dims) where {T<:DenseCuArray}
4343
dx .= dy .- sum(dy; dims) .* exp.(y)
4444
end
4545

46-
# Trick by @norci to use cudnn for softmax dims args that are contiguous:
47-
# If dims=(dmin:dmax) then CUDNN_SOFTMAX_MODE_CHANNEL does the trick with reshape
46+
# Trick by @norci to use cudnn for softmax dims args that are contiguous:
47+
# If dims=(dmin:dmax) then CUDNN_SOFTMAX_MODE_CHANNEL does the trick with reshape
4848
# (1, prod(size(x)[1:dmin-1]), prod(size(x)[dmin:dmax]), :)
4949
# softmaxdims returns nothing when the backup implementation should be used.
5050

@@ -79,7 +79,7 @@ function ∇softmax!(dx::T, dy::T, x::T, y::T; dims=1) where {R,T<:DenseCuArray{
7979
s === nothing && return _∇softmax!(dx, dy, x, y; dims)
8080
xDesc = cudnnTensorDescriptor(reshape(x,s))
8181
alpha, beta = scalingParameter(R,1), scalingParameter(R,0)
82-
cudnnSoftmaxBackward(handle(), softmaxalgo(), CUDNN_SOFTMAX_MODE_CHANNEL,
82+
cudnnSoftmaxBackward(handle(), softmaxalgo(), CUDNN_SOFTMAX_MODE_CHANNEL,
8383
alpha, xDesc, y, xDesc, dy, beta, xDesc, dx)
8484
return dx
8585
end
@@ -96,7 +96,7 @@ function ∇logsoftmax!(dx::T, dy::T, x::T, y::T; dims=1) where {R,T<:DenseCuArr
9696
s === nothing && return _∇logsoftmax!(dx, dy, x, y; dims)
9797
xDesc = cudnnTensorDescriptor(reshape(x,s))
9898
alpha, beta = scalingParameter(R,1), scalingParameter(R,0)
99-
cudnnSoftmaxBackward(handle(), CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_MODE_CHANNEL,
99+
cudnnSoftmaxBackward(handle(), CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_MODE_CHANNEL,
100100
alpha, xDesc, y, xDesc, dy, beta, xDesc, dx)
101101
return dx
102102
end

ext/NNlibCUDA/test/activations.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ end
1616

1717
# Broadcasting over complex CuArray works without NNlibCUDA, this test checks that
1818
# NNlibCUDA does not cause such operations to take a fast path which does not support
19-
# complex numbers (e.g. CUDNN)
19+
# complex numbers (e.g. cuDNN)
2020
@testset "complex" begin
2121
f(x) = tanh.(x)
2222
cs = rand(ComplexF64, 5)

ext/NNlibCUDA/test/softmax.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
gputest(NNlib.∇logsoftmax_data, dy, y2; dims=dims)
1313

1414
# From NNlib 0.8.3, ∇softmax! is not used in the gradient.
15-
# But NNlibCUDA still knows how to call CUDNN routines, let's test they agree:
15+
# But NNlibCUDA still knows how to call cuDNN routines, let's test they agree:
1616
@test NNlib.∇softmax_data(dy, y; dims=dims) collect(∇softmax!(similar(cu(x)), cu(dy), cu(x), cu(y); dims=dims)) atol=1e-4
1717
@test NNlib.∇logsoftmax_data(dy, y2; dims=dims) collect(∇logsoftmax!(similar(cu(x)), cu(dy), cu(x), cu(y2); dims=dims)) atol=1e-4
1818
# (Note that ∇softmax! does not depend on x, it's just there to disambiguate from an even older signature.)

0 commit comments

Comments
 (0)