Modify libnnpack signatures

Avik Pal · Avik Pal · commit 08d514f45b9e · 2018-12-01T17:47:38.000+05:30
diff --git a/src/nnpack/interface.jl b/src/nnpack/interface.jl
@@ -8,8 +8,7 @@ function check_support(x, k, pad, stride, dilation = 1)
     return pad_, stride_, fallback
 end
 
-softmax!(y::A, x::A) where A<:AbstractVecOrMat{Float32} =
-    nnp_softmax_output(x, y)
+softmax!(y::A, x::A) where A<:AbstractVecOrMat{Float32} = nnp_softmax_output(x, y)
 
 function maxpool!(y::A, x::A, k; pad = map(_->0,k), stride = k) where A<:Array{Float32, 4}
     pad_, stride_, fallback = check_support(x, k, pad, stride)
diff --git a/src/nnpack/libnnpack.jl b/src/nnpack/libnnpack.jl
@@ -15,7 +15,7 @@ function nnp_relu_output(batch_size, channels, input, output, negative_slope, th
     @check ccall((:nnp_relu_output, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, input, output, negative_slope, threadpool)
 end
 
-function nnp_relu_output(x::AbstractArray{Float32,N}, y::AbstractArray{Float32,N}; negative_slope::AbstractFloat = 0.0, threadpool = shared_threadpool[]) where {N}
+function nnp_relu_output(x::Array{Float32,N}, y::Array{Float32,N}; negative_slope::Float = 0.0, threadpool = shared_threadpool[]) where {N}
     # Investigate why the channel and batch dims need to specified like this
     nnp_relu_output(prod(size(x)[N-1:N]), prod(size(x)[1:N-2]), x, y, negative_slope, threadpool)
     y
@@ -25,7 +25,7 @@ function nnp_relu_input_gradient(batch_size, channels, grad_output, input, grad_
     @check ccall((:nnp_relu_input_gradient, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
 end
 
-function nnp_relu_input_gradient(x::AbstractArray{Float32,N}, dy::AbstractArray{Float32,N}, dx::AbstractArray{Float32,N}; negative_slope::AbstractFloat = 0.0, threadpool = shared_threadpool[]) where {N}
+function nnp_relu_input_gradient(x::Array{Float32,N}, dy::Array{Float32,N}, dx::Array{Float32,N}; negative_slope::Float = 0.0, threadpool = shared_threadpool[]) where {N}
     # Investigate why the channel and batch dims need to specified like this
     nnp_relu_input_gradient(Csize_t(prod(size(x)[N-1:N])), prod(size(x)[1:N-2]), dy, x, dx, negative_slope, threadpool)
     dx
@@ -35,7 +35,7 @@ function nnp_softmax_output(batch_size, channels, input, output, threadpool)
     @check ccall((:nnp_softmax_output, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input, output, threadpool)
 end
 
-function nnp_softmax_output(x::AbstractVecOrMat{Float32}, y::AbstractVecOrMat{Float32}; threadpool = shared_threadpool[])
+function nnp_softmax_output(x::VecOrMat{Float32}, y::VecOrMat{Float32}; threadpool = shared_threadpool[])
     nnp_softmax_output(ndims(x) == 2 ? size(x, 2) : 1, size(x, 1), x, y, threadpool)
     y
 end
@@ -47,7 +47,7 @@ function nnp_fully_connected_output(batch_size, input_channels, output_channels,
     @check ccall((:nnp_fully_connected_output, libnnpack), nnp_status, (Csize_t, Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t, Ptr{Cvoid}), batch_size, input_channels, output_channels, input, kernel, output, threadpool, C_NULL)
 end
 
-function nnp_fully_connected_output(x::AbstractArray{Float32,2}, w::AbstractArray{Float32,2}, y::AbstractArray{Float32,2}; profile = nothing, threadpool = shared_threadpool[])
+function nnp_fully_connected_output(x::Array{Float32,2}, w::Array{Float32,2}, y::Array{Float32,2}; profile = nothing, threadpool = shared_threadpool[])
     profile = profile == nothing ? nnp_profile() : profile
     nnp_fully_connected_output(size(x, 2), size(x, 1), size(w, 1), x, w, y, threadpool, profile)
     y
@@ -57,10 +57,10 @@ function nnp_fully_connected_inference_f16f32(input_channels, output_channels, i
     @check ccall((:nnp_fully_connected_inference_f16f32, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cvoid}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
 end
 
-nnp_fully_connected_inference_f16f32(x::AbstractVector{Float32}, w::AbstractArray{Float16,2}, y::AbstractVector{Float32}; threadpool = shared_threadpool[]) =
+nnp_fully_connected_inference_f16f32(x::Array{Float32, 1}, w::Array{Float16,2}, y::Array{Float32, 1}; threadpool = shared_threadpool[]) =
     nnp_fully_connected_inference(reshape(x, size(x), 1), w, reshape(y, size(y), 1), threadpool = threadpool)
 
-function nnp_fully_connected_inference_f16f32(x::AbstractMatrix{Float32}, w::AbstractArray{Float16,2}, y::AbstractMatrix{Float32}; threadpool = shared_threadpool[])
+function nnp_fully_connected_inference_f16f32(x::Array{Float32, 2}, w::Array{Float16,2}, y::Array{Float32, 2}; threadpool = shared_threadpool[])
     nnp_fully_connected_inference(size(x, 1), size(y, 1), x, w, y, threadpool)
     y
 end
@@ -69,10 +69,10 @@ function nnp_fully_connected_inference(input_channels, output_channels, input, k
     @check ccall((:nnp_fully_connected_inference, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
 end
 
-nnp_fully_connected_inference(x::AbstractVector{Float32}, w::AbstractArray{Float32,2}; threadpool = shared_threadpool[]) =
+nnp_fully_connected_inference(x::Array{Float32, 1}, w::Array{Float32,2}; threadpool = shared_threadpool[]) =
     nnp_fully_connected_inference(reshape(x, size(x), 1), w, threadpool = threadpool)
 
-function nnp_fully_connected_inference(x::AbstractMatrix{Float32}, w::AbstractMatrix{Float32}, y::AbstractMatrix{Float32}; threadpool = shared_threadpool[])
+function nnp_fully_connected_inference(x::Array{Float32, 2}, w::Array{Float32, 2}, y::Array{Float32, 2}; threadpool = shared_threadpool[])
     nnp_fully_connected_inference(size(x, 1), size(y, 1), x, w, y, threadpool)
     y
 end
@@ -81,7 +81,7 @@ function nnp_max_pooling_output(batch_size, channels, input_size, input_padding,
     @check ccall((:nnp_max_pooling_output, libnnpack), nnp_status, (Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
 end
 
-function nnp_max_pooling_output(x::AbstractArray{Float32,4}, y::AbstractArray{Float32,4}, kernel::Tuple; padding = 0, stride = 1, threadpool = shared_threadpool[])
+function nnp_max_pooling_output(x::Array{Float32,4}, y::Array{Float32,4}, kernel::Tuple; padding = 0, stride = 1, threadpool = shared_threadpool[])
     input_size = nnp_size(Csize_t.((size(x, 1), size(x, 2)))...)
     pooling_size = nnp_size(Csize_t.(kernel)...)
     input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))
@@ -96,7 +96,7 @@ function nnp_convolution_input_gradient(algorithm, batch_size, input_channels, o
     @check ccall((:nnp_convolution_input_gradient, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
 end
 
-function nnp_convolution_input_gradient(dx::AbstractArray{Float32,4}, x::AbstractArray{Float32,4}, dy::AbstractArray{Float32,4}, w::AbstractArray{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
+function nnp_convolution_input_gradient(dx::Array{Float32,4}, x::Array{Float32,4}, dy::Array{Float32,4}, w::Array{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
     input_size = nnp_size(Csize_t.((size(x,1), size(x,2)))...)
     kernel_size = nnp_size(Csize_t.((size(w,1),size(w,2)))...)
     input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))
@@ -110,7 +110,7 @@ function nnp_convolution_kernel_gradient(algorithm, batch_size, input_channels,
     @check ccall((:nnp_convolution_kernel_gradient, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
 end
 
-function nnp_convolution_kernel_gradient(dw::AbstractArray{Float32,4}, x::AbstractArray{Float32,4}, dy::AbstractArray{Float32,4}, w::AbstractArray{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
+function nnp_convolution_kernel_gradient(dw::Array{Float32,4}, x::Array{Float32,4}, dy::Array{Float32,4}, w::Array{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
     input_size = nnp_size(Csize_t.((size(x,1), size(x,2)))...)
     kernel_size = nnp_size(Csize_t.((size(w,1),size(w,2)))...)
     input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))
@@ -124,7 +124,7 @@ function nnp_convolution_output(algorithm, batch_size, input_channels, output_ch
     @check ccall((:nnp_convolution_output, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
 end
 
-function nnp_convolution_output(y::AbstractArray{Float32,4}, x::AbstractArray{Float32,4}, w::AbstractArray{Float32,4}, b::AbstractArray{Float32,1}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
+function nnp_convolution_output(y::Array{Float32,4}, x::Array{Float32,4}, w::Array{Float32,4}, b::Array{Float32,1}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
     input_size = nnp_size(Csize_t.((size(x,1), size(x,2)))...)
     kernel_size = nnp_size(Csize_t.((size(w,1),size(w,2)))...)
     input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))