Skip to content

Commit 83d44d3

Browse files
committed
Limit time held by CUDNN locks.
They inhibit finalizers, resulting in OOMs.
1 parent 554dcc4 commit 83d44d3

File tree

2 files changed

+63
-41
lines changed

2 files changed

+63
-41
lines changed

lib/cudnn/convolution.jl

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -172,52 +172,70 @@ end
172172
const cudnnConvolutionFwdAlgoPerfCache = Dict{Tuple,cudnnConvolutionFwdAlgoPerf_t}()
173173
const cudnnConvolutionFwdAlgoPerfCacheLock = ReentrantLock()
174174
function cudnnConvolutionFwdAlgoPerf(xDesc, x, wDesc, w, convDesc, yDesc, y, biasDesc, activation)
175-
lock(cudnnConvolutionFwdAlgoPerfCacheLock) do
176-
get!(cudnnConvolutionFwdAlgoPerfCache, (xDesc, wDesc, convDesc, biasDesc, activation)) do
177-
requestedAlgoCount = Int(CUDNN_CONVOLUTION_FWD_ALGO_COUNT)
178-
returnedAlgoCount = Cint[0]
179-
perfResults = Array{cudnnConvolutionFwdAlgoPerf_t}(undef,requestedAlgoCount)
180-
workspaceSize() = cudnnFindConvolutionAlgorithmWorkspaceSize(x)
181-
with_workspace(workspaceSize) do workspace
182-
cudnnFindConvolutionForwardAlgorithmEx(handle(),xDesc,x,wDesc,w,convDesc,yDesc,y,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof(workspace))
183-
end
184-
cudnnConvolutionAlgoPerfChoose(perfResults, returnedAlgoCount[1])
175+
key = (xDesc, wDesc, convDesc, biasDesc, activation)
176+
val = lock(cudnnConvolutionFwdAlgoPerfCacheLock) do
177+
get(cudnnConvolutionFwdAlgoPerfCache, key, nothing)
178+
end
179+
if val === nothing
180+
requestedAlgoCount = Int(CUDNN_CONVOLUTION_FWD_ALGO_COUNT)
181+
returnedAlgoCount = Cint[0]
182+
perfResults = Array{cudnnConvolutionFwdAlgoPerf_t}(undef,requestedAlgoCount)
183+
workspaceSize() = cudnnFindConvolutionAlgorithmWorkspaceSize(x)
184+
with_workspace(workspaceSize) do workspace
185+
cudnnFindConvolutionForwardAlgorithmEx(handle(),xDesc,x,wDesc,w,convDesc,yDesc,y,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof(workspace))
186+
end
187+
val = cudnnConvolutionAlgoPerfChoose(perfResults, returnedAlgoCount[1])
188+
lock(cudnnConvolutionFwdAlgoPerfCacheLock) do
189+
cudnnConvolutionFwdAlgoPerfCache[key] = val
185190
end
186191
end
192+
return val
187193
end
188194

189195
const cudnnConvolutionBwdDataAlgoPerfCache = Dict{Tuple,cudnnConvolutionBwdDataAlgoPerf_t}()
190196
const cudnnConvolutionBwdDataAlgoPerfCacheLock = ReentrantLock()
191197
function cudnnConvolutionBwdDataAlgoPerf(wDesc, w, dyDesc, dy, convDesc, dxDesc, dx)
192-
lock(cudnnConvolutionBwdDataAlgoPerfCacheLock) do
193-
get!(cudnnConvolutionBwdDataAlgoPerfCache, (wDesc, dyDesc, convDesc)) do
194-
requestedAlgoCount = Int(CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT)
195-
returnedAlgoCount = Cint[0]
196-
perfResults = Array{cudnnConvolutionBwdDataAlgoPerf_t}(undef,requestedAlgoCount)
197-
workspaceSize() = cudnnFindConvolutionAlgorithmWorkspaceSize(dx)
198-
with_workspace(workspaceSize) do workspace
199-
cudnnFindConvolutionBackwardDataAlgorithmEx(handle(),wDesc,w,dyDesc,dy,convDesc,dxDesc,dx,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof(workspace))
200-
end
201-
cudnnConvolutionAlgoPerfChoose(perfResults, returnedAlgoCount[1])
198+
key = (wDesc, dyDesc, convDesc)
199+
val = lock(cudnnConvolutionBwdDataAlgoPerfCacheLock) do
200+
get(cudnnConvolutionBwdDataAlgoPerfCache, key, nothing)
201+
end
202+
if val === nothing
203+
requestedAlgoCount = Int(CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT)
204+
returnedAlgoCount = Cint[0]
205+
perfResults = Array{cudnnConvolutionBwdDataAlgoPerf_t}(undef,requestedAlgoCount)
206+
workspaceSize() = cudnnFindConvolutionAlgorithmWorkspaceSize(dx)
207+
with_workspace(workspaceSize) do workspace
208+
cudnnFindConvolutionBackwardDataAlgorithmEx(handle(),wDesc,w,dyDesc,dy,convDesc,dxDesc,dx,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof(workspace))
209+
end
210+
val = cudnnConvolutionAlgoPerfChoose(perfResults, returnedAlgoCount[1])
211+
lock(cudnnConvolutionBwdDataAlgoPerfCacheLock) do
212+
cudnnConvolutionBwdDataAlgoPerfCache[key] = val
202213
end
203214
end
215+
val
204216
end
205217

206218
const cudnnConvolutionBwdFilterAlgoPerfCache = Dict{Tuple,cudnnConvolutionBwdFilterAlgoPerf_t}()
207219
const cudnnConvolutionBwdFilterAlgoPerfCacheLock = ReentrantLock()
208220
function cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, dyDesc, dy, convDesc, dwDesc, dw)
209-
lock(cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
210-
get!(cudnnConvolutionBwdFilterAlgoPerfCache, (xDesc, dyDesc, convDesc)) do
211-
requestedAlgoCount = Int(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT)
212-
returnedAlgoCount = Cint[0]
213-
perfResults = Array{cudnnConvolutionBwdFilterAlgoPerf_t}(undef,requestedAlgoCount)
214-
workspaceSize() = cudnnFindConvolutionAlgorithmWorkspaceSize(x)
215-
with_workspace(workspaceSize) do workspace
216-
cudnnFindConvolutionBackwardFilterAlgorithmEx(handle(),xDesc,x,dyDesc,dy,convDesc,dwDesc,dw,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof(workspace))
217-
end
218-
cudnnConvolutionAlgoPerfChoose(perfResults, returnedAlgoCount[1])
221+
key = (xDesc, dyDesc, convDesc)
222+
val = lock(cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
223+
get(cudnnConvolutionBwdFilterAlgoPerfCache, (xDesc, dyDesc, convDesc), nothing)
224+
end
225+
if val === nothing
226+
requestedAlgoCount = Int(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT)
227+
returnedAlgoCount = Cint[0]
228+
perfResults = Array{cudnnConvolutionBwdFilterAlgoPerf_t}(undef,requestedAlgoCount)
229+
workspaceSize() = cudnnFindConvolutionAlgorithmWorkspaceSize(x)
230+
with_workspace(workspaceSize) do workspace
231+
cudnnFindConvolutionBackwardFilterAlgorithmEx(handle(),xDesc,x,dyDesc,dy,convDesc,dwDesc,dw,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof(workspace))
232+
end
233+
val = cudnnConvolutionAlgoPerfChoose(perfResults, returnedAlgoCount[1])
234+
lock(cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
235+
cudnnConvolutionBwdFilterAlgoPerfCache[key] = val
219236
end
220237
end
238+
val
221239
end
222240

223241

lib/cudnn/descriptors.jl

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,27 @@ macro cudnnDescriptor(x, set = Symbol("cudnnSet$(x)Descriptor"))
3030
const $cache = Dict{Tuple,$sname}() # Dict is 3x faster than IdDict!
3131
const $cache_lock = ReentrantLock()
3232
function $sname(args...)
33-
lock($cache_lock) do
34-
get!($cache, args) do
35-
ptr = $tname[C_NULL]
36-
$create(ptr)
37-
$set(ptr[1], args...)
38-
d = $sname(ptr[1])
39-
finalizer(x->$destroy(x.ptr), d)
40-
return d
33+
d = lock($cache_lock) do
34+
get($cache, args, nothing)
35+
end
36+
if d === nothing
37+
ptr = $tname[C_NULL]
38+
$create(ptr)
39+
$set(ptr[1], args...)
40+
d = $sname(ptr[1])
41+
finalizer(x->$destroy(x.ptr), d)
42+
lock($cache_lock) do
43+
$cache[args] = d
4144
end
4245
end
46+
return d
4347
end
4448
end |> esc
4549
end
4650

4751

4852
"""
49-
cudnnActivationDescriptor(mode::cudnnActivationMode_t,
53+
cudnnActivationDescriptor(mode::cudnnActivationMode_t,
5054
reluNanOpt::cudnnNanPropagation_t,
5155
coef::Cfloat)
5256
"""
@@ -116,8 +120,8 @@ cudnnConvolutionDescriptor(pad::Vector{Cint},
116120

117121
"""
118122
cudnnLRNDescriptor(lrnN::Cuint,
119-
lrnAlpha::Cdouble,
120-
lrnBeta::Cdouble,
123+
lrnAlpha::Cdouble,
124+
lrnBeta::Cdouble,
121125
lrnK::Cdouble)
122126
"""
123127
@cudnnDescriptor(LRN)

0 commit comments

Comments
 (0)