Skip to content

Commit 08d514f

Browse files
author
Avik Pal
committed
Modify libnnpack signatures
1 parent 838e88a commit 08d514f

File tree

2 files changed

+13
-14
lines changed

2 files changed

+13
-14
lines changed

src/nnpack/interface.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ function check_support(x, k, pad, stride, dilation = 1)
88
return pad_, stride_, fallback
99
end
1010

11-
softmax!(y::A, x::A) where A<:AbstractVecOrMat{Float32} =
12-
nnp_softmax_output(x, y)
11+
softmax!(y::A, x::A) where A<:AbstractVecOrMat{Float32} = nnp_softmax_output(x, y)
1312

1413
function maxpool!(y::A, x::A, k; pad = map(_->0,k), stride = k) where A<:Array{Float32, 4}
1514
pad_, stride_, fallback = check_support(x, k, pad, stride)

src/nnpack/libnnpack.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ function nnp_relu_output(batch_size, channels, input, output, negative_slope, th
1515
@check ccall((:nnp_relu_output, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, input, output, negative_slope, threadpool)
1616
end
1717

18-
function nnp_relu_output(x::AbstractArray{Float32,N}, y::AbstractArray{Float32,N}; negative_slope::AbstractFloat = 0.0, threadpool = shared_threadpool[]) where {N}
18+
function nnp_relu_output(x::Array{Float32,N}, y::Array{Float32,N}; negative_slope::Float = 0.0, threadpool = shared_threadpool[]) where {N}
1919
# Investigate why the channel and batch dims need to specified like this
2020
nnp_relu_output(prod(size(x)[N-1:N]), prod(size(x)[1:N-2]), x, y, negative_slope, threadpool)
2121
y
@@ -25,7 +25,7 @@ function nnp_relu_input_gradient(batch_size, channels, grad_output, input, grad_
2525
@check ccall((:nnp_relu_input_gradient, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
2626
end
2727

28-
function nnp_relu_input_gradient(x::AbstractArray{Float32,N}, dy::AbstractArray{Float32,N}, dx::AbstractArray{Float32,N}; negative_slope::AbstractFloat = 0.0, threadpool = shared_threadpool[]) where {N}
28+
function nnp_relu_input_gradient(x::Array{Float32,N}, dy::Array{Float32,N}, dx::Array{Float32,N}; negative_slope::Float = 0.0, threadpool = shared_threadpool[]) where {N}
2929
# Investigate why the channel and batch dims need to specified like this
3030
nnp_relu_input_gradient(Csize_t(prod(size(x)[N-1:N])), prod(size(x)[1:N-2]), dy, x, dx, negative_slope, threadpool)
3131
dx
@@ -35,7 +35,7 @@ function nnp_softmax_output(batch_size, channels, input, output, threadpool)
3535
@check ccall((:nnp_softmax_output, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input, output, threadpool)
3636
end
3737

38-
function nnp_softmax_output(x::AbstractVecOrMat{Float32}, y::AbstractVecOrMat{Float32}; threadpool = shared_threadpool[])
38+
function nnp_softmax_output(x::VecOrMat{Float32}, y::VecOrMat{Float32}; threadpool = shared_threadpool[])
3939
nnp_softmax_output(ndims(x) == 2 ? size(x, 2) : 1, size(x, 1), x, y, threadpool)
4040
y
4141
end
@@ -47,7 +47,7 @@ function nnp_fully_connected_output(batch_size, input_channels, output_channels,
4747
@check ccall((:nnp_fully_connected_output, libnnpack), nnp_status, (Csize_t, Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t, Ptr{Cvoid}), batch_size, input_channels, output_channels, input, kernel, output, threadpool, C_NULL)
4848
end
4949

50-
function nnp_fully_connected_output(x::AbstractArray{Float32,2}, w::AbstractArray{Float32,2}, y::AbstractArray{Float32,2}; profile = nothing, threadpool = shared_threadpool[])
50+
function nnp_fully_connected_output(x::Array{Float32,2}, w::Array{Float32,2}, y::Array{Float32,2}; profile = nothing, threadpool = shared_threadpool[])
5151
profile = profile == nothing ? nnp_profile() : profile
5252
nnp_fully_connected_output(size(x, 2), size(x, 1), size(w, 1), x, w, y, threadpool, profile)
5353
y
@@ -57,10 +57,10 @@ function nnp_fully_connected_inference_f16f32(input_channels, output_channels, i
5757
@check ccall((:nnp_fully_connected_inference_f16f32, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cvoid}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
5858
end
5959

60-
nnp_fully_connected_inference_f16f32(x::AbstractVector{Float32}, w::AbstractArray{Float16,2}, y::AbstractVector{Float32}; threadpool = shared_threadpool[]) =
60+
nnp_fully_connected_inference_f16f32(x::Array{Float32, 1}, w::Array{Float16,2}, y::Array{Float32, 1}; threadpool = shared_threadpool[]) =
6161
nnp_fully_connected_inference(reshape(x, size(x), 1), w, reshape(y, size(y), 1), threadpool = threadpool)
6262

63-
function nnp_fully_connected_inference_f16f32(x::AbstractMatrix{Float32}, w::AbstractArray{Float16,2}, y::AbstractMatrix{Float32}; threadpool = shared_threadpool[])
63+
function nnp_fully_connected_inference_f16f32(x::Array{Float32, 2}, w::Array{Float16,2}, y::Array{Float32, 2}; threadpool = shared_threadpool[])
6464
nnp_fully_connected_inference(size(x, 1), size(y, 1), x, w, y, threadpool)
6565
y
6666
end
@@ -69,10 +69,10 @@ function nnp_fully_connected_inference(input_channels, output_channels, input, k
6969
@check ccall((:nnp_fully_connected_inference, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
7070
end
7171

72-
nnp_fully_connected_inference(x::AbstractVector{Float32}, w::AbstractArray{Float32,2}; threadpool = shared_threadpool[]) =
72+
nnp_fully_connected_inference(x::Array{Float32, 1}, w::Array{Float32,2}; threadpool = shared_threadpool[]) =
7373
nnp_fully_connected_inference(reshape(x, size(x), 1), w, threadpool = threadpool)
7474

75-
function nnp_fully_connected_inference(x::AbstractMatrix{Float32}, w::AbstractMatrix{Float32}, y::AbstractMatrix{Float32}; threadpool = shared_threadpool[])
75+
function nnp_fully_connected_inference(x::Array{Float32, 2}, w::Array{Float32, 2}, y::Array{Float32, 2}; threadpool = shared_threadpool[])
7676
nnp_fully_connected_inference(size(x, 1), size(y, 1), x, w, y, threadpool)
7777
y
7878
end
@@ -81,7 +81,7 @@ function nnp_max_pooling_output(batch_size, channels, input_size, input_padding,
8181
@check ccall((:nnp_max_pooling_output, libnnpack), nnp_status, (Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
8282
end
8383

84-
function nnp_max_pooling_output(x::AbstractArray{Float32,4}, y::AbstractArray{Float32,4}, kernel::Tuple; padding = 0, stride = 1, threadpool = shared_threadpool[])
84+
function nnp_max_pooling_output(x::Array{Float32,4}, y::Array{Float32,4}, kernel::Tuple; padding = 0, stride = 1, threadpool = shared_threadpool[])
8585
input_size = nnp_size(Csize_t.((size(x, 1), size(x, 2)))...)
8686
pooling_size = nnp_size(Csize_t.(kernel)...)
8787
input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))
@@ -96,7 +96,7 @@ function nnp_convolution_input_gradient(algorithm, batch_size, input_channels, o
9696
@check ccall((:nnp_convolution_input_gradient, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
9797
end
9898

99-
function nnp_convolution_input_gradient(dx::AbstractArray{Float32,4}, x::AbstractArray{Float32,4}, dy::AbstractArray{Float32,4}, w::AbstractArray{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
99+
function nnp_convolution_input_gradient(dx::Array{Float32,4}, x::Array{Float32,4}, dy::Array{Float32,4}, w::Array{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
100100
input_size = nnp_size(Csize_t.((size(x,1), size(x,2)))...)
101101
kernel_size = nnp_size(Csize_t.((size(w,1),size(w,2)))...)
102102
input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))
@@ -110,7 +110,7 @@ function nnp_convolution_kernel_gradient(algorithm, batch_size, input_channels,
110110
@check ccall((:nnp_convolution_kernel_gradient, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
111111
end
112112

113-
function nnp_convolution_kernel_gradient(dw::AbstractArray{Float32,4}, x::AbstractArray{Float32,4}, dy::AbstractArray{Float32,4}, w::AbstractArray{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
113+
function nnp_convolution_kernel_gradient(dw::Array{Float32,4}, x::Array{Float32,4}, dy::Array{Float32,4}, w::Array{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
114114
input_size = nnp_size(Csize_t.((size(x,1), size(x,2)))...)
115115
kernel_size = nnp_size(Csize_t.((size(w,1),size(w,2)))...)
116116
input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))
@@ -124,7 +124,7 @@ function nnp_convolution_output(algorithm, batch_size, input_channels, output_ch
124124
@check ccall((:nnp_convolution_output, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
125125
end
126126

127-
function nnp_convolution_output(y::AbstractArray{Float32,4}, x::AbstractArray{Float32,4}, w::AbstractArray{Float32,4}, b::AbstractArray{Float32,1}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
127+
function nnp_convolution_output(y::Array{Float32,4}, x::Array{Float32,4}, w::Array{Float32,4}, b::Array{Float32,1}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
128128
input_size = nnp_size(Csize_t.((size(x,1), size(x,2)))...)
129129
kernel_size = nnp_size(Csize_t.((size(w,1),size(w,2)))...)
130130
input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))

0 commit comments

Comments
 (0)