@@ -4,15 +4,15 @@ function nnp_initialize()
4
4
end
5
5
6
6
function nnp_deinitialize ()
7
- @check ccall ((:nnp_deinitialize , libnnpack), nnp_status, (),)
7
+ @nnpack_check ccall ((:nnp_deinitialize , libnnpack), nnp_status, (),)
8
8
end
9
9
10
10
function pthreadpool_create (n = 0 )
11
11
ccall ((:pthreadpool_create , libnnpack), Ptr{Cvoid}, (Csize_t,), n)
12
12
end
13
13
14
14
function nnp_relu_output (batch_size, channels, input, output, negative_slope, threadpool)
15
- @check ccall ((:nnp_relu_output , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, input, output, negative_slope, threadpool)
15
+ @nnpack_check ccall ((:nnp_relu_output , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, input, output, negative_slope, threadpool)
16
16
end
17
17
18
18
function nnp_relu_output (x:: Array{Float32,N} , y:: Array{Float32,N} ; negative_slope:: AbstractFloat = 0.0 , threadpool = shared_threadpool[]) where {N}
@@ -22,7 +22,7 @@ function nnp_relu_output(x::Array{Float32,N}, y::Array{Float32,N}; negative_slop
22
22
end
23
23
24
24
function nnp_relu_input_gradient (batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
25
- @check ccall ((:nnp_relu_input_gradient , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
25
+ @nnpack_check ccall ((:nnp_relu_input_gradient , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
26
26
end
27
27
28
28
function nnp_relu_input_gradient (x:: Array{Float32,N} , dy:: Array{Float32,N} , dx:: Array{Float32,N} ; negative_slope:: AbstractFloat = 0.0 , threadpool = shared_threadpool[]) where {N}
@@ -32,7 +32,7 @@ function nnp_relu_input_gradient(x::Array{Float32,N}, dy::Array{Float32,N}, dx::
32
32
end
33
33
34
34
function nnp_softmax_output (batch_size, channels, input, output, threadpool)
35
- @check ccall ((:nnp_softmax_output , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input, output, threadpool)
35
+ @nnpack_check ccall ((:nnp_softmax_output , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input, output, threadpool)
36
36
end
37
37
38
38
function nnp_softmax_output (x:: VecOrMat{Float32} , y:: VecOrMat{Float32} ; threadpool = shared_threadpool[])
44
44
# NOTE: This most likely due to nnpack being row major. Investigate this.
45
45
46
46
function nnp_fully_connected_output (batch_size, input_channels, output_channels, input, kernel, output, threadpool, profile)
47
- @check ccall ((:nnp_fully_connected_output , libnnpack), nnp_status, (Csize_t, Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t, Ptr{Cvoid}), batch_size, input_channels, output_channels, input, kernel, output, threadpool, C_NULL )
47
+ @nnpack_check ccall ((:nnp_fully_connected_output , libnnpack), nnp_status, (Csize_t, Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t, Ptr{Cvoid}), batch_size, input_channels, output_channels, input, kernel, output, threadpool, C_NULL )
48
48
end
49
49
50
50
function nnp_fully_connected_output (x:: Array{Float32,2} , w:: Array{Float32,2} , y:: Array{Float32,2} ; profile = nothing , threadpool = shared_threadpool[])
@@ -54,7 +54,7 @@ function nnp_fully_connected_output(x::Array{Float32,2}, w::Array{Float32,2}, y:
54
54
end
55
55
56
56
function nnp_fully_connected_inference_f16f32 (input_channels, output_channels, input, kernel, output, threadpool)
57
- @check ccall ((:nnp_fully_connected_inference_f16f32 , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cvoid}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
57
+ @nnpack_check ccall ((:nnp_fully_connected_inference_f16f32 , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cvoid}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
58
58
end
59
59
60
60
nnp_fully_connected_inference_f16f32 (x:: Array{Float32, 1} , w:: Array{Float16,2} , y:: Array{Float32, 1} ; threadpool = shared_threadpool[]) =
@@ -66,7 +66,7 @@ function nnp_fully_connected_inference_f16f32(x::Array{Float32, 2}, w::Array{Flo
66
66
end
67
67
68
68
function nnp_fully_connected_inference (input_channels, output_channels, input, kernel, output, threadpool)
69
- @check ccall ((:nnp_fully_connected_inference , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
69
+ @nnpack_check ccall ((:nnp_fully_connected_inference , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
70
70
end
71
71
72
72
nnp_fully_connected_inference (x:: Array{Float32, 1} , w:: Array{Float32,2} ; threadpool = shared_threadpool[]) =
@@ -78,7 +78,7 @@ function nnp_fully_connected_inference(x::Array{Float32, 2}, w::Array{Float32, 2
78
78
end
79
79
80
80
function nnp_max_pooling_output (batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
81
- @check ccall ((:nnp_max_pooling_output , libnnpack), nnp_status, (Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
81
+ @nnpack_check ccall ((:nnp_max_pooling_output , libnnpack), nnp_status, (Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
82
82
end
83
83
84
84
function nnp_max_pooling_output (y:: Array{Float32,4} , x:: Array{Float32,4} , kernel:: Tuple ; padding = 0 , stride = 1 , threadpool = shared_threadpool[])
93
93
# TODO : Add wrapper for convolution inference
94
94
95
95
function nnp_convolution_input_gradient (algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, profile)
96
- @check ccall ((:nnp_convolution_input_gradient , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
96
+ @nnpack_check ccall ((:nnp_convolution_input_gradient , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
97
97
end
98
98
99
99
function nnp_convolution_input_gradient (dx:: Array{Float32,4} , dy:: Array{Float32,4} , w:: Array{Float32,4} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
@@ -107,7 +107,7 @@ function nnp_convolution_input_gradient(dx::Array{Float32,4}, dy::Array{Float32,
107
107
end
108
108
109
109
function nnp_convolution_kernel_gradient (algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, profile)
110
- @check ccall ((:nnp_convolution_kernel_gradient , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
110
+ @nnpack_check ccall ((:nnp_convolution_kernel_gradient , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
111
111
end
112
112
113
113
function nnp_convolution_kernel_gradient (dw:: Array{Float32,4} , x:: Array{Float32,4} , dy:: Array{Float32,4} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
@@ -121,7 +121,7 @@ function nnp_convolution_kernel_gradient(dw::Array{Float32,4}, x::Array{Float32,
121
121
end
122
122
123
123
function nnp_convolution_output (algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, profile)
124
- @check ccall ((:nnp_convolution_output , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
124
+ @nnpack_check ccall ((:nnp_convolution_output , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
125
125
end
126
126
127
127
function nnp_convolution_output (y:: Array{Float32,4} , x:: Array{Float32,4} , w:: Array{Float32,4} , b:: Array{Float32,1} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
0 commit comments