Skip to content

Commit 2591d21

Browse files
author
Avik Pal
committed
Minor changes as per review
1 parent 43f759a commit 2591d21

File tree

5 files changed

+24
-39
lines changed

5 files changed

+24
-39
lines changed

src/nnpack/NNPACK.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ end
2525
check_deps()
2626
status = nnp_initialize()
2727
if status == nnp_status_unsupported_hardware
28-
@warn "HARDWARE is unsupported by NNPACK so falling back to default NNlib"
28+
@warn "Hardware is unsupported by NNPACK so falling back to default NNlib"
2929
else
3030
include(nnlib_interface_path)
3131
end
3232
try
3333
global NNPACK_CPU_THREADS = parse(UInt64, ENV["NNPACK_CPU_THREADS"])
3434
catch
35-
global NNPACK_CPU_THREADS = 4
35+
global NNPACK_CPU_THREADS = Sys.CPU_THREADS
3636
end
37-
shared_threadpool = pthreadpool_create(NNPACK_CPU_THREADS)
37+
shared_threadpool[] = pthreadpool_create(NNPACK_CPU_THREADS)
3838
end

src/nnpack/error.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ function NNPACKError(status::nnp_status)
7171
NNPACKError(status, msg)
7272
end
7373

74-
macro check(nnp_func)
74+
macro nnpack_check(nnp_func)
7575
quote
7676
local err::nnp_status
7777
err = $(esc(nnp_func))

src/nnpack/impl.jl

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,7 @@
22

33
function maxpool_nnpack!(y::A, x::A, pdims::PoolDims) where {A<:Array{Float32, 4}}
44
check_dims(size(x), size(y), pdims)
5-
6-
pad = padding(pdims)
7-
stride_ = stride(pdims)
8-
kernel = kernel_size(pdims)
9-
10-
nnp_max_pooling_output(y, x, kernel, padding = pad, stride = stride_)
5+
nnp_max_pooling_output(y, x, kernel_size(pdims), padding = padding(pdims), stride = stride(pdims))
116
end
127

138
@timeit_debug to function conv_nnpack!(y::A1, x::A1, w::A1, cdims::ConvDims;
@@ -16,43 +11,31 @@ end
1611
A2<:Array{Float32, 1}}
1712
check_dims(size(x), size(w), size(y), cdims)
1813

19-
flipkernel_ = flipkernel(cdims)
20-
if flipkernel_ == 0
14+
if flipkernel(cdims) == 0
2115
w .= flipweight(w)
2216
end
2317

24-
pad = padding(cdims)
25-
stride_ = stride(cdims)
26-
27-
nnp_convolution_output(y, x, w, b, algo = algo, padding = pad, stride = stride_)
18+
nnp_convolution_output(y, x, w, b, algo = algo, padding = padding(cdims), stride = stride(cdims))
2819
end
2920

3021
@timeit_debug to function ∇conv_data_nnpack!(dx::A, dy::A, w::A, cdims::ConvDims;
3122
algo = UInt32(0)) where{A<:Array{Float32, 4}}
3223
check_dims(size(dx), size(w), size(dy), cdims)
3324

34-
flipkernel_ = flipkernel(cdims)
35-
if flipkernel_ == 0
25+
if flipkernel(cdims) == 0
3626
w .= flipweight(w)
3727
end
3828

39-
pad = padding(cdims)
40-
stride_ = stride(cdims)
41-
42-
nnp_convolution_input_gradient(dx, dy, w, algo = algo, padding = pad, stride = stride_)
29+
nnp_convolution_input_gradient(dx, dy, w, algo = algo, padding = padding(cdims), stride = stride(cdims))
4330
end
4431

4532
@timeit_debug to function ∇conv_filter_nnpack!(dw::A, x::A, dy::A, cdims::ConvDims;
4633
algo = UInt32(0)) where{A<:Array{Float32, 4}}
4734
check_dims(size(x), size(dw), size(dy), cdims)
4835

49-
flipkernel_ = flipkernel(cdims)
50-
pad = padding(cdims)
51-
stride_ = stride(cdims)
52-
53-
nnp_convolution_kernel_gradient(dw, x, dy, algo = algo, padding = pad, stride = stride_)
36+
nnp_convolution_kernel_gradient(dw, x, dy, algo = algo, padding = padding(cdims), stride = stride(cdims))
5437

55-
if flipkernel_ == 0
38+
if flipkernel(cdims) == 0
5639
dw .= flipweight(dw)
5740
end
5841

src/nnpack/interface.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ for (front_name, backend) in (
1010
@timeit_debug to function $(Symbol("$(front_name)$(backend)!"))(
1111
out::Array{T1,4}, in1::Array{T2,4}, in2::Array{T3,4},
1212
cdims::ConvDims; kwargs...) where {T1, T2, T3}
13+
@warn "Automatically converting $(size(in1)) input tensor to Float32" maxlog=1
1314
# Output must of the same type as in the function signature
1415
T1.($(Symbol("$(front_name)$(backend)!"))(Float32.(out), Float32.(in1),
1516
Float32.(in2), cdims; kwargs...))
@@ -20,6 +21,7 @@ end
2021

2122
function maxpool_nnpack!(y::Array{T1, 4}, x::Array{T2, 4}, pdims::PoolDims;
2223
kwargs...) where {T1, T2}
24+
@warn "Automatically converting $(size(x)) input tensor to Float32" maxlog=1
2325
# We want the output to be of the same type as desired
2426
T1.(maxpool_nnpack!(Float32.(y), Float32.(x), pdims; kwargs...))
2527
end

src/nnpack/libnnpack.jl

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ function nnp_initialize()
44
end
55

66
function nnp_deinitialize()
7-
@check ccall((:nnp_deinitialize, libnnpack), nnp_status, (),)
7+
@nnpack_check ccall((:nnp_deinitialize, libnnpack), nnp_status, (),)
88
end
99

1010
function pthreadpool_create(n = 0)
1111
ccall((:pthreadpool_create, libnnpack), Ptr{Cvoid}, (Csize_t,), n)
1212
end
1313

1414
function nnp_relu_output(batch_size, channels, input, output, negative_slope, threadpool)
15-
@check ccall((:nnp_relu_output, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, input, output, negative_slope, threadpool)
15+
@nnpack_check ccall((:nnp_relu_output, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, input, output, negative_slope, threadpool)
1616
end
1717

1818
function nnp_relu_output(x::Array{Float32,N}, y::Array{Float32,N}; negative_slope::AbstractFloat = 0.0, threadpool = shared_threadpool[]) where {N}
@@ -22,7 +22,7 @@ function nnp_relu_output(x::Array{Float32,N}, y::Array{Float32,N}; negative_slop
2222
end
2323

2424
function nnp_relu_input_gradient(batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
25-
@check ccall((:nnp_relu_input_gradient, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
25+
@nnpack_check ccall((:nnp_relu_input_gradient, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
2626
end
2727

2828
function nnp_relu_input_gradient(x::Array{Float32,N}, dy::Array{Float32,N}, dx::Array{Float32,N}; negative_slope::AbstractFloat = 0.0, threadpool = shared_threadpool[]) where {N}
@@ -32,7 +32,7 @@ function nnp_relu_input_gradient(x::Array{Float32,N}, dy::Array{Float32,N}, dx::
3232
end
3333

3434
function nnp_softmax_output(batch_size, channels, input, output, threadpool)
35-
@check ccall((:nnp_softmax_output, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input, output, threadpool)
35+
@nnpack_check ccall((:nnp_softmax_output, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input, output, threadpool)
3636
end
3737

3838
function nnp_softmax_output(x::VecOrMat{Float32}, y::VecOrMat{Float32}; threadpool = shared_threadpool[])
@@ -44,7 +44,7 @@ end
4444
#NOTE: This most likely due to nnpack being row major. Investigate this.
4545

4646
function nnp_fully_connected_output(batch_size, input_channels, output_channels, input, kernel, output, threadpool, profile)
47-
@check ccall((:nnp_fully_connected_output, libnnpack), nnp_status, (Csize_t, Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t, Ptr{Cvoid}), batch_size, input_channels, output_channels, input, kernel, output, threadpool, C_NULL)
47+
@nnpack_check ccall((:nnp_fully_connected_output, libnnpack), nnp_status, (Csize_t, Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t, Ptr{Cvoid}), batch_size, input_channels, output_channels, input, kernel, output, threadpool, C_NULL)
4848
end
4949

5050
function nnp_fully_connected_output(x::Array{Float32,2}, w::Array{Float32,2}, y::Array{Float32,2}; profile = nothing, threadpool = shared_threadpool[])
@@ -54,7 +54,7 @@ function nnp_fully_connected_output(x::Array{Float32,2}, w::Array{Float32,2}, y:
5454
end
5555

5656
function nnp_fully_connected_inference_f16f32(input_channels, output_channels, input, kernel, output, threadpool)
57-
@check ccall((:nnp_fully_connected_inference_f16f32, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cvoid}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
57+
@nnpack_check ccall((:nnp_fully_connected_inference_f16f32, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cvoid}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
5858
end
5959

6060
nnp_fully_connected_inference_f16f32(x::Array{Float32, 1}, w::Array{Float16,2}, y::Array{Float32, 1}; threadpool = shared_threadpool[]) =
@@ -66,7 +66,7 @@ function nnp_fully_connected_inference_f16f32(x::Array{Float32, 2}, w::Array{Flo
6666
end
6767

6868
function nnp_fully_connected_inference(input_channels, output_channels, input, kernel, output, threadpool)
69-
@check ccall((:nnp_fully_connected_inference, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
69+
@nnpack_check ccall((:nnp_fully_connected_inference, libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
7070
end
7171

7272
nnp_fully_connected_inference(x::Array{Float32, 1}, w::Array{Float32,2}; threadpool = shared_threadpool[]) =
@@ -78,7 +78,7 @@ function nnp_fully_connected_inference(x::Array{Float32, 2}, w::Array{Float32, 2
7878
end
7979

8080
function nnp_max_pooling_output(batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
81-
@check ccall((:nnp_max_pooling_output, libnnpack), nnp_status, (Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
81+
@nnpack_check ccall((:nnp_max_pooling_output, libnnpack), nnp_status, (Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
8282
end
8383

8484
function nnp_max_pooling_output(y::Array{Float32,4}, x::Array{Float32,4}, kernel::Tuple; padding = 0, stride = 1, threadpool = shared_threadpool[])
@@ -93,7 +93,7 @@ end
9393
#TODO: Add wrapper for convolution inference
9494

9595
function nnp_convolution_input_gradient(algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, profile)
96-
@check ccall((:nnp_convolution_input_gradient, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
96+
@nnpack_check ccall((:nnp_convolution_input_gradient, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
9797
end
9898

9999
function nnp_convolution_input_gradient(dx::Array{Float32,4}, dy::Array{Float32,4}, w::Array{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
@@ -107,7 +107,7 @@ function nnp_convolution_input_gradient(dx::Array{Float32,4}, dy::Array{Float32,
107107
end
108108

109109
function nnp_convolution_kernel_gradient(algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, profile)
110-
@check ccall((:nnp_convolution_kernel_gradient, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
110+
@nnpack_check ccall((:nnp_convolution_kernel_gradient, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
111111
end
112112

113113
function nnp_convolution_kernel_gradient(dw::Array{Float32,4}, x::Array{Float32,4}, dy::Array{Float32,4}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)
@@ -121,7 +121,7 @@ function nnp_convolution_kernel_gradient(dw::Array{Float32,4}, x::Array{Float32,
121121
end
122122

123123
function nnp_convolution_output(algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, profile)
124-
@check ccall((:nnp_convolution_output, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
124+
@nnpack_check ccall((:nnp_convolution_output, libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL)
125125
end
126126

127127
function nnp_convolution_output(y::Array{Float32,4}, x::Array{Float32,4}, w::Array{Float32,4}, b::Array{Float32,1}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = shared_threadpool[], profile = nothing)

0 commit comments

Comments
 (0)