Skip to content

Commit 5b35148

Browse files
author
Avik Pal
committed
Some heuristics to choose threadpool for pooling
1 parent bc19012 commit 5b35148

File tree

4 files changed

+38
-3
lines changed

4 files changed

+38
-3
lines changed

src/NNlib.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ include("impl/depthwiseconv_im2col.jl")
2424
# Direct implementations of pooling
2525
include("impl/pooling_direct.jl")
2626

27-
to = TimerOutput()
28-
2927
if Sys.islinux()
3028
include("nnpack/NNPACK.jl")
3129
else

src/nnpack/NNPACK.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,10 @@ end
5050
try
5151
global NNPACK_CPU_THREADS = parse(UInt64, ENV["NNPACK_CPU_THREADS"])
5252
catch
53-
global NNPACK_CPU_THREADS = Sys.CPU_THREADS
53+
# Sys.CPU_THREADS should be a better default if we are tuning the benchmark suite on
54+
# a particular machine. However, we fix the runtime threadpool here to have a max of
55+
# 4 threads so anything above will be ignored anyways
56+
global NNPACK_CPU_THREADS = UInt64(4)
5457
end
5558
allocate_threadpool()
5659
end

src/nnpack/interface.jl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,33 @@ for (front_name, backend) in (
1919
end
2020

2121

22+
function conv_nnpack(x::Array{T1, 4}, w::Array{T2, 4}, cdims::ConvDims; kwargs...) where {T1, T2}
23+
y = similar(x, output_size(cdims), channels_out(cdims), size(x, 4))
24+
return conv_nnpack!(y, x, w, cdims; kwargs...)
25+
end
26+
27+
28+
function ∇conv_data(dy::Array{T1, 4}, w::Array{T2, 4}, cdims::ConvDims; kwargs...) where {T1, T2}
29+
dx = similar(dy, input_size(cdims), channels_in(cdims), size(dy, 4))
30+
return ∇conv_data!(dx, dy, w, cdims; kwargs...)
31+
end
32+
33+
34+
function ∇conv_filter(x::Array{T1, 4}, dy::Array{T2, 4}, cdims::ConvDims; kwargs...) where {T1, T2}
35+
dw = similar(x, kernel_size(cdims), channels_in(cdims), channels_out(cdims))
36+
return ∇conv_filter!(dw, x, dy, cdims; kwargs...)
37+
end
38+
39+
2240
function maxpool_nnpack!(y::Array{T1, 4}, x::Array{T2, 4}, pdims::PoolDims;
2341
kwargs...) where {T1, T2}
2442
@warn "Automatically converting $(size(x)) input tensor to Float32" maxlog=1
2543
# We want the output to be of the same type as desired
2644
T1.(maxpool_nnpack!(Float32.(y), Float32.(x), pdims; kwargs...))
2745
end
46+
47+
48+
function maxpool_nnpack(x::Array{T, 4}, pdims::PoolDims; kwargs...) where {T}
49+
y = similar(x, output_size(pdims)..., channels_out(pdims), size(x, 4))
50+
return maxpool_nnpack!(y, x, pdims; kwargs...)
51+
end

src/nnpack/performance.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,15 @@ function select_threadpool(cdims::DenseConvDims, batch_size::Int)
33
end
44

55
function select_threadpool(pdims::PoolDims, batch_size::Int)
6+
inp_size = input_size(pdims)[1]
7+
if batch_size >= 32
8+
return shared_threadpool_dict[4][]
9+
elseif batch_size >= 16 && inp_size >= 64
10+
return shared_threadpool_dict[4][]
11+
elseif inp_size >= 128
12+
return shared_threadpool_dict[4][]
13+
elseif inp_size * batch_size >= 256
14+
return shared_threadpool_dict[4][]
15+
end
616
return C_NULL
717
end

0 commit comments

Comments
 (0)