@@ -15,7 +15,7 @@ function nnp_relu_output(batch_size, channels, input, output, negative_slope, th
15
15
@check ccall ((:nnp_relu_output , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, input, output, negative_slope, threadpool)
16
16
end
17
17
18
- function nnp_relu_output (x:: AbstractArray {Float32,N} , y:: AbstractArray {Float32,N} ; negative_slope:: AbstractFloat = 0.0 , threadpool = shared_threadpool[]) where {N}
18
+ function nnp_relu_output (x:: Array {Float32,N} , y:: Array {Float32,N} ; negative_slope:: Float = 0.0 , threadpool = shared_threadpool[]) where {N}
19
19
# Investigate why the channel and batch dims need to specified like this
20
20
nnp_relu_output (prod (size (x)[N- 1 : N]), prod (size (x)[1 : N- 2 ]), x, y, negative_slope, threadpool)
21
21
y
@@ -25,7 +25,7 @@ function nnp_relu_input_gradient(batch_size, channels, grad_output, input, grad_
25
25
@check ccall ((:nnp_relu_input_gradient , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Cfloat, pthreadpool_t), batch_size, channels, grad_output, input, grad_input, negative_slope, threadpool)
26
26
end
27
27
28
- function nnp_relu_input_gradient (x:: AbstractArray {Float32,N} , dy:: AbstractArray {Float32,N} , dx:: AbstractArray {Float32,N} ; negative_slope:: AbstractFloat = 0.0 , threadpool = shared_threadpool[]) where {N}
28
+ function nnp_relu_input_gradient (x:: Array {Float32,N} , dy:: Array {Float32,N} , dx:: Array {Float32,N} ; negative_slope:: Float = 0.0 , threadpool = shared_threadpool[]) where {N}
29
29
# Investigate why the channel and batch dims need to specified like this
30
30
nnp_relu_input_gradient (Csize_t (prod (size (x)[N- 1 : N])), prod (size (x)[1 : N- 2 ]), dy, x, dx, negative_slope, threadpool)
31
31
dx
@@ -35,7 +35,7 @@ function nnp_softmax_output(batch_size, channels, input, output, threadpool)
35
35
@check ccall ((:nnp_softmax_output , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input, output, threadpool)
36
36
end
37
37
38
- function nnp_softmax_output (x:: AbstractVecOrMat {Float32} , y:: AbstractVecOrMat {Float32} ; threadpool = shared_threadpool[])
38
+ function nnp_softmax_output (x:: VecOrMat {Float32} , y:: VecOrMat {Float32} ; threadpool = shared_threadpool[])
39
39
nnp_softmax_output (ndims (x) == 2 ? size (x, 2 ) : 1 , size (x, 1 ), x, y, threadpool)
40
40
y
41
41
end
@@ -47,7 +47,7 @@ function nnp_fully_connected_output(batch_size, input_channels, output_channels,
47
47
@check ccall ((:nnp_fully_connected_output , libnnpack), nnp_status, (Csize_t, Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t, Ptr{Cvoid}), batch_size, input_channels, output_channels, input, kernel, output, threadpool, C_NULL )
48
48
end
49
49
50
- function nnp_fully_connected_output (x:: AbstractArray {Float32,2} , w:: AbstractArray {Float32,2} , y:: AbstractArray {Float32,2} ; profile = nothing , threadpool = shared_threadpool[])
50
+ function nnp_fully_connected_output (x:: Array {Float32,2} , w:: Array {Float32,2} , y:: Array {Float32,2} ; profile = nothing , threadpool = shared_threadpool[])
51
51
profile = profile == nothing ? nnp_profile () : profile
52
52
nnp_fully_connected_output (size (x, 2 ), size (x, 1 ), size (w, 1 ), x, w, y, threadpool, profile)
53
53
y
@@ -57,10 +57,10 @@ function nnp_fully_connected_inference_f16f32(input_channels, output_channels, i
57
57
@check ccall ((:nnp_fully_connected_inference_f16f32 , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cvoid}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
58
58
end
59
59
60
- nnp_fully_connected_inference_f16f32 (x:: AbstractVector {Float32} , w:: AbstractArray {Float16,2} , y:: AbstractVector {Float32} ; threadpool = shared_threadpool[]) =
60
+ nnp_fully_connected_inference_f16f32 (x:: Array {Float32, 1 } , w:: Array {Float16,2} , y:: Array {Float32, 1 } ; threadpool = shared_threadpool[]) =
61
61
nnp_fully_connected_inference (reshape (x, size (x), 1 ), w, reshape (y, size (y), 1 ), threadpool = threadpool)
62
62
63
- function nnp_fully_connected_inference_f16f32 (x:: AbstractMatrix {Float32} , w:: AbstractArray {Float16,2} , y:: AbstractMatrix {Float32} ; threadpool = shared_threadpool[])
63
+ function nnp_fully_connected_inference_f16f32 (x:: Array {Float32, 2 } , w:: Array {Float16,2} , y:: Array {Float32, 2 } ; threadpool = shared_threadpool[])
64
64
nnp_fully_connected_inference (size (x, 1 ), size (y, 1 ), x, w, y, threadpool)
65
65
y
66
66
end
@@ -69,10 +69,10 @@ function nnp_fully_connected_inference(input_channels, output_channels, input, k
69
69
@check ccall ((:nnp_fully_connected_inference , libnnpack), nnp_status, (Csize_t, Csize_t, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), input_channels, output_channels, input, kernel, output, threadpool)
70
70
end
71
71
72
- nnp_fully_connected_inference (x:: AbstractVector {Float32} , w:: AbstractArray {Float32,2} ; threadpool = shared_threadpool[]) =
72
+ nnp_fully_connected_inference (x:: Array {Float32, 1 } , w:: Array {Float32,2} ; threadpool = shared_threadpool[]) =
73
73
nnp_fully_connected_inference (reshape (x, size (x), 1 ), w, threadpool = threadpool)
74
74
75
- function nnp_fully_connected_inference (x:: AbstractMatrix {Float32} , w:: AbstractMatrix {Float32} , y:: AbstractMatrix {Float32} ; threadpool = shared_threadpool[])
75
+ function nnp_fully_connected_inference (x:: Array {Float32, 2 } , w:: Array {Float32, 2 } , y:: Array {Float32, 2 } ; threadpool = shared_threadpool[])
76
76
nnp_fully_connected_inference (size (x, 1 ), size (y, 1 ), x, w, y, threadpool)
77
77
y
78
78
end
@@ -81,7 +81,7 @@ function nnp_max_pooling_output(batch_size, channels, input_size, input_padding,
81
81
@check ccall ((:nnp_max_pooling_output , libnnpack), nnp_status, (Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, pthreadpool_t), batch_size, channels, input_size, input_padding, pooling_size, pooling_stride, input, output, threadpool)
82
82
end
83
83
84
- function nnp_max_pooling_output (x:: AbstractArray {Float32,4} , y:: AbstractArray {Float32,4} , kernel:: Tuple ; padding = 0 , stride = 1 , threadpool = shared_threadpool[])
84
+ function nnp_max_pooling_output (x:: Array {Float32,4} , y:: Array {Float32,4} , kernel:: Tuple ; padding = 0 , stride = 1 , threadpool = shared_threadpool[])
85
85
input_size = nnp_size (Csize_t .((size (x, 1 ), size (x, 2 )))... )
86
86
pooling_size = nnp_size (Csize_t .(kernel)... )
87
87
input_padding = nnp_padding (Csize_t (padding[2 ]), Csize_t (padding[1 ]), Csize_t (padding[2 ]), Csize_t (padding[1 ]))
@@ -96,7 +96,7 @@ function nnp_convolution_input_gradient(algorithm, batch_size, input_channels, o
96
96
@check ccall ((:nnp_convolution_input_gradient , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, grad_output, kernel, grad_input, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
97
97
end
98
98
99
- function nnp_convolution_input_gradient (dx:: AbstractArray {Float32,4} , x:: AbstractArray {Float32,4} , dy:: AbstractArray {Float32,4} , w:: AbstractArray {Float32,4} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
99
+ function nnp_convolution_input_gradient (dx:: Array {Float32,4} , x:: Array {Float32,4} , dy:: Array {Float32,4} , w:: Array {Float32,4} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
100
100
input_size = nnp_size (Csize_t .((size (x,1 ), size (x,2 )))... )
101
101
kernel_size = nnp_size (Csize_t .((size (w,1 ),size (w,2 )))... )
102
102
input_padding = nnp_padding (Csize_t (padding[2 ]), Csize_t (padding[1 ]), Csize_t (padding[2 ]), Csize_t (padding[1 ]))
@@ -110,7 +110,7 @@ function nnp_convolution_kernel_gradient(algorithm, batch_size, input_channels,
110
110
@check ccall ((:nnp_convolution_kernel_gradient , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, grad_output, grad_kernel, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
111
111
end
112
112
113
- function nnp_convolution_kernel_gradient (dw:: AbstractArray {Float32,4} , x:: AbstractArray {Float32,4} , dy:: AbstractArray {Float32,4} , w:: AbstractArray {Float32,4} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
113
+ function nnp_convolution_kernel_gradient (dw:: Array {Float32,4} , x:: Array {Float32,4} , dy:: Array {Float32,4} , w:: Array {Float32,4} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
114
114
input_size = nnp_size (Csize_t .((size (x,1 ), size (x,2 )))... )
115
115
kernel_size = nnp_size (Csize_t .((size (w,1 ),size (w,2 )))... )
116
116
input_padding = nnp_padding (Csize_t (padding[2 ]), Csize_t (padding[1 ]), Csize_t (padding[2 ]), Csize_t (padding[1 ]))
@@ -124,7 +124,7 @@ function nnp_convolution_output(algorithm, batch_size, input_channels, output_ch
124
124
@check ccall ((:nnp_convolution_output , libnnpack), nnp_status, (nnp_convolution_algorithm, Csize_t, Csize_t, Csize_t, nnp_size, nnp_padding, nnp_size, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cvoid}, Csize_t, nnp_activation, Ptr{Cvoid}, pthreadpool_t, Ptr{Cvoid}), algorithm, batch_size, input_channels, output_channels, input_size, input_padding, kernel_size, input, kernel, bias, output, workspace_buffer, workspace_size, activation, activation_parameters, threadpool, C_NULL )
125
125
end
126
126
127
- function nnp_convolution_output (y:: AbstractArray {Float32,4} , x:: AbstractArray {Float32,4} , w:: AbstractArray {Float32,4} , b:: AbstractArray {Float32,1} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
127
+ function nnp_convolution_output (y:: Array {Float32,4} , x:: Array {Float32,4} , w:: Array {Float32,4} , b:: Array {Float32,1} ; algo:: nnp_convolution_algorithm = UInt32 (0 ), workspace_buffer = nothing , workspace_size = 0 , padding = 0 , stride = 1 , threadpool = shared_threadpool[], profile = nothing )
128
128
input_size = nnp_size (Csize_t .((size (x,1 ), size (x,2 )))... )
129
129
kernel_size = nnp_size (Csize_t .((size (w,1 ),size (w,2 )))... )
130
130
input_padding = nnp_padding (Csize_t (padding[2 ]), Csize_t (padding[1 ]), Csize_t (padding[2 ]), Csize_t (padding[1 ]))
0 commit comments