Expose usage of NNPACK conv and maxpool operations

Avik Pal · Avik Pal · commit a30c7a74f692 · 2019-04-13T13:15:34.000+05:30
diff --git a/src/NNlib.jl b/src/NNlib.jl
@@ -3,8 +3,17 @@ using Requires, TimerOutputs
 
 const to = TimerOutput()
 
+
 # Include APIs
 include("dim_helpers.jl")
+
+# NNPACK support
+if Sys.islinux()
+    include("nnpack/NNPACK.jl")
+else
+    is_nnpack_available() = false
+end
+
 include("activation.jl")
 include("softmax.jl")
 include("gemm.jl")
@@ -24,10 +33,4 @@ include("impl/depthwiseconv_im2col.jl")
 # Direct implementations of pooling
 include("impl/pooling_direct.jl")
 
-if Sys.islinux()
-    include("nnpack/NNPACK.jl")
-else
-    is_nnpack_available() = false
-end
-
 end # module NNlib
diff --git a/src/conv.jl b/src/conv.jl
@@ -151,3 +151,15 @@ for backend in (Symbol(), :_direct, :_im2col)
         end
     end
 end
+
+
+# Use NNPACK if it is available and the operation is supported
+if is_nnpack_available()
+    function conv(x::Array{xT, 4}, w::Array{wT, 4},
+                  cdims::DenseConvDims{2, K, C_in, C_out, S, P, (1, 1), F};
+                  kwargs...) where {xT, wT, K, C_in, C_out, S, P, F}
+        func = check_supported_operation(x, cdims) ? conv_nnpack : 
+               xT == wT ? conv_im2col : conv_direct
+        return func(x, w, cdims; kwargs...)
+    end
+end
diff --git a/src/nnpack/interface.jl b/src/nnpack/interface.jl
@@ -10,7 +10,7 @@ for (front_name, backend) in (
         @timeit_debug to function $(Symbol("$(front_name)$(backend)!"))(
                         out::Array{T1,4}, in1::Array{T2,4}, in2::Array{T3,4},
                         cdims::ConvDims; kwargs...) where {T1, T2, T3}
-            @warn "Automatically converting $(size(in1)) input tensor to Float32" maxlog=1
+            @warn "Automatically converting input tensor to Float32. This will have performance implications" maxlog=1
             # Output must of the same type as in the function signature
             T1.($(Symbol("$(front_name)$(backend)!"))(Float32.(out), Float32.(in1),
                                                       Float32.(in2), cdims; kwargs...))
@@ -20,26 +20,26 @@ end
 
 
 function conv_nnpack(x::Array{T1, 4}, w::Array{T2, 4}, cdims::ConvDims; kwargs...) where {T1, T2}
-    y = similar(x, output_size(cdims), channels_out(cdims), size(x, 4))
+    y = similar(x, output_size(cdims)..., channels_out(cdims), size(x, 4))
     return conv_nnpack!(y, x, w, cdims; kwargs...)
 end
 
 
 function ∇conv_data(dy::Array{T1, 4}, w::Array{T2, 4}, cdims::ConvDims; kwargs...) where {T1, T2}
-    dx = similar(dy, input_size(cdims), channels_in(cdims), size(dy, 4))
+    dx = similar(dy, input_size(cdims)..., channels_in(cdims), size(dy, 4))
     return ∇conv_data!(dx, dy, w, cdims; kwargs...)
 end
 
 
 function ∇conv_filter(x::Array{T1, 4}, dy::Array{T2, 4}, cdims::ConvDims; kwargs...) where {T1, T2}
-    dw = similar(x, kernel_size(cdims), channels_in(cdims), channels_out(cdims))
+    dw = similar(x, kernel_size(cdims)..., channels_in(cdims), channels_out(cdims))
     return ∇conv_filter!(dw, x, dy, cdims; kwargs...)
 end
 
 
 function maxpool_nnpack!(y::Array{T1, 4}, x::Array{T2, 4}, pdims::PoolDims;
                          kwargs...) where {T1, T2}
-    @warn "Automatically converting $(size(x)) input tensor to Float32" maxlog=1
+    @warn "Automatically converting input tensor to Float32. This will have performance implications" maxlog=1
     # We want the output to be of the same type as desired
     T1.(maxpool_nnpack!(Float32.(y), Float32.(x), pdims; kwargs...))
 end
@@ -49,3 +49,26 @@ function maxpool_nnpack(x::Array{T, 4}, pdims::PoolDims; kwargs...) where {T}
     y = similar(x, output_size(pdims)..., channels_out(pdims), size(x, 4))
     return maxpool_nnpack!(y, x, pdims; kwargs...)
 end
+
+
+"""
+    check_supported_operation(x::Array, cdims::DenseConvDims)
+
+Returns `true` if nnpack supports the convolution operation for the given input.
+"""
+function check_supported_operation(x::Array{T, 4}, cdims::DenseConvDims{2, K, C_in,
+                                   C_out, S, P, (1, 1), F}) where {T, K, C_in, C_out, S, P, F}
+    val = size(x)[1:2] .+ (P[1] + P[2], P[3] + P[4]) .- K
+    return val .% S == (0, 0) ? true : false
+end
+
+
+"""
+    check_supported_operation(x::Array, pdims::PoolDims)
+
+Returns `true` if nnpack supports the pooling operation for the given input.
+"""
+function check_supported_operation(x::Array{T, 4}, pdims::PoolDims{2, K, S, P, (1, 1)}) where {T, K, S, P}
+    val = size(x)[1:2] .+ (P[1] + P[2], P[3] + P[4]) .- K
+    return val .% S == (0, 0) ? true : false
+end
diff --git a/src/nnpack/performance.jl b/src/nnpack/performance.jl
@@ -8,6 +8,8 @@ function select_threadpool(pdims::PoolDims, batch_size::Int)
         return shared_threadpool_dict[4][]
     elseif batch_size >= 16 && inp_size >= 64
         return shared_threadpool_dict[4][]
+    elseif inp_size <= 32
+        return C_NULL
     elseif inp_size >= 128
         return shared_threadpool_dict[4][]
     elseif inp_size * batch_size >= 256
diff --git a/src/pooling.jl b/src/pooling.jl
@@ -127,3 +127,12 @@ for backend in (Symbol(), :_direct, :_im2col)
         end
     end
 end
+
+
+# Use NNPACK if it is available and operation is supported
+if is_nnpack_available()
+    function maxpool(x::Array{T, 4}, pdims::PoolDims{2, K, S, P, (1, 1)}; kwargs...) where {T, K, S, P}
+        func = check_supported_operation(x, pdims) ? maxpool_nnpack : maxpool_im2col
+        return func(x, pdims; kwargs...)
+    end
+end