@@ -8,40 +8,40 @@ const AA1 = Union{AA{2}, AA{3}, AA{4}, AA{5}}
8
8
# leakyrelu(x::AA1, a = oftype(x/1, 0.01)) =
9
9
# nnp_relu_output(x, inplace ? x : similar(x), negative_slope = a, threadpool = shared_threadpool)
10
10
11
- softmax! (x:: AbstractVecOrMat{Float64} ) = Float64 .( softmax! (Float32 .(x) ))
11
+ softmax! (x:: A ) where A <: AbstractVecOrMat{Float64} = softmax! (Float32 .(x))
12
12
13
- softmax! (x:: AbstractVecOrMat{Float32} ) =
13
+ softmax! (x:: A ) where A <: AbstractVecOrMat{Float32} =
14
14
nnp_softmax_output (x, x, threadpool = shared_threadpool)
15
15
16
- softmax! (y:: AbstractVecOrMat{Float64} , x:: AbstractVecOrMat{Float64} ) = Float64 .( softmax! (Float32 .(y), Float32 .(x) ))
16
+ softmax! (y:: A , x:: A ) where A <: AbstractVecOrMat{Float64} = softmax! (Float32 .(y), Float32 .(x))
17
17
18
- softmax! (y:: AbstractVecOrMat{Float32} , x:: AbstractVecOrMat{Float32} ) =
18
+ softmax! (y:: A , x:: A ) where A <: AbstractVecOrMat{Float32} =
19
19
nnp_softmax_output (x, y, threadpool = shared_threadpool)
20
20
21
- softmax (x:: AbstractVecOrMat{Float64} ) = Float64 .( softmax (Float32 .(x) ))
21
+ softmax (x:: A ) where A <: AbstractVecOrMat{Float64} = softmax (Float32 .(x))
22
22
23
- softmax (x:: AbstractVecOrMat{Float32} ) =
23
+ softmax (x:: A ) where A <: AbstractVecOrMat{Float32} =
24
24
nnp_softmax_output (x, similar (x), threadpool = shared_threadpool)
25
25
26
- maxpool (x:: AbstractArray{Float64, 4} , k; pad = map (_-> 0 ,k), stride = k) =
27
- Float64 .( maxpool (Float32 .(x), k, pad = pad, stride = stride) )
26
+ maxpool (x:: A , k; pad = map (_-> 0 ,k), stride = k) where A <: AbstractArray{Float64, 4} =
27
+ maxpool (Float32 .(x), k, pad = pad, stride = stride)
28
28
29
- function maxpool (x:: AA{4} , k; pad = map (_-> 0 ,k), stride = k)
29
+ function maxpool (x:: A , k; pad = map (_-> 0 ,k), stride = k) where A <: AA{4}
30
30
pad_, stride_ = expand (Val{length (k)}, pad), expand (Val{length (k)}, stride)
31
31
((size (x, 1 ) - k[1 ] + 2 * pad_[1 ]) % stride_[1 ] == 0 && (size (x, 2 ) - k[2 ] + 2 * pad_[2 ]) % stride_[2 ] == 0 ) || error (" Choose the stride, pad and kernel size properly" )
32
32
maxpool! (similar (x, pdims (size (x), k, pad_, stride_)), x, k, pad = pad_, stride = stride_)
33
33
end
34
34
35
- maxpool! (y:: AbstractArray{Float64, 4} , x:: AbstractArray{Float64, 4} , k; pad = map (_-> 0 ,k), stride = k) =
36
- Float64 .( maxpool! (Float32 .(y), Float32 .(x), k, pad = pad, stride = stride) )
35
+ maxpool! (y:: A , x:: A , k; pad = map (_-> 0 ,k), stride = k) where A <: AbstractArray{Float64, 4} =
36
+ maxpool! (Float32 .(y), Float32 .(x), k, pad = pad, stride = stride)
37
37
38
- maxpool! (y:: AA{4} , x:: AA{4} , k; pad = map (_-> 0 ,k), stride = k) =
38
+ maxpool! (y:: A , x:: A , k; pad = map (_-> 0 ,k), stride = k) where A <: AA{4} =
39
39
nnp_max_pooling_output (x, y, k, padding = expand (Val{length (k)}, pad), stride = expand (Val{length (k)}, stride), threadpool = shared_threadpool)
40
40
41
- conv (x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) =
42
- Float64 .( conv (Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo) )
41
+ conv (x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where A <: AbstractArray{Float64, 4} =
42
+ conv (Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo)
43
43
44
- function conv (x:: AA{4} , w:: AA{4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ))
44
+ function conv (x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where A <: AA{4}
45
45
dilation == 1 || dilation == (1 , 1 ) || error (" NNPACK does not support dilation > 1" )
46
46
pad_, stride_ = padtuple (x, pad), padtuple (x, stride)
47
47
((size (x, 1 ) - size (w, 1 ) + 2 * pad_[1 ]) % stride_[1 ] == 0 && (size (x, 2 ) - size (w, 2 ) + 2 * pad_[2 ]) % stride_[2 ] == 0 ) || error (" Choose the stride, pad and kernel size properly" )
@@ -50,72 +50,72 @@ function conv(x::AA{4}, w::AA{4}; pad = 0, stride = 1, dilation = 1, algo = UInt
50
50
conv! (y, x, w, b, pad = pad_, stride = stride_, dilation = dilation, algo = UInt32 (algo))
51
51
end
52
52
53
- conv (x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} , b:: AbstractArray{Float64, 1} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) =
54
- Float64 .( conv (Float32 .(x), Float32 .(w), Float32 .(b), pad = pad, stride = stride, dilation = dilation, algo = algo) )
53
+ conv (x:: A1 , w:: A1 , b:: A2 ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where {A1 <: AbstractArray{Float64, 4} , A2 <: AbstractArray{Float64, 1} } =
54
+ conv (Float32 .(x), Float32 .(w), Float32 .(b), pad = pad, stride = stride, dilation = dilation, algo = algo)
55
55
56
- function conv (x:: AA{4} , w:: AA{4} , b:: AA{1} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ))
56
+ function conv (x:: A1 , w:: A1 , b:: A2 ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where {A1 <: AA{4} , A2 <: AA{1} }
57
57
dilation == 1 || dilation == (1 , 1 ) || error (" NNPACK does not support dilation > 1" )
58
58
pad_, stride_ = padtuple (x, pad), padtuple (x, stride)
59
59
((size (x, 1 ) - size (w, 1 ) + 2 * pad_[1 ]) % stride_[1 ] == 0 && (size (x, 2 ) - size (w, 2 ) + 2 * pad_[2 ]) % stride_[2 ] == 0 ) || error (" Choose the stride, pad and kernel size properly" )
60
60
conv! (similar (x, cdims (size (x), dilation_dims (w, dilation), pad_, stride_)), x, w, b, pad = pad_, stride = stride_, dilation = dilation, algo = UInt32 (algo))
61
61
end
62
62
63
- crosscor (x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} , b:: AbstractArray{Float64, 1} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) =
64
- Float64 .( crosscor (Float32 .(x), Float32 .(w), Float32 .(b), pad = pad, stride = stride, dilation = dilation, algo = algo) )
63
+ crosscor (x:: A1 , w:: A1 , b:: A2 ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where {A1 <: AbstractArray{Float64, 4} , A2 <: AbstractArray{Float64, 1} } =
64
+ crosscor (Float32 .(x), Float32 .(w), Float32 .(b), pad = pad, stride = stride, dilation = dilation, algo = algo)
65
65
66
- function crosscor (x:: AA{4} , w:: AA{4} , b:: AA{1} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ))
66
+ function crosscor (x:: A1 , w:: A1 , b:: A2 ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where {A1 <: AA{4} , A2 <: AA{1} }
67
67
dilation == 1 || dilation == (1 , 1 ) || error (" NNPACK does not support dilation > 1" )
68
68
pad_, stride_ = padtuple (x, pad), padtuple (x, stride)
69
69
((size (x, 1 ) - size (w, 1 ) + 2 * pad_[1 ]) % stride_[1 ] == 0 && (size (x, 2 ) - size (w, 2 ) + 2 * pad_[2 ]) % stride_[2 ] == 0 ) || error (" Choose the stride, pad and kernel size properly" )
70
70
conv! (similar (x, cdims (size (x), dilation_dims (w, dilation), pad_, stride_)), x, w, b, pad = pad_, stride = stride_, dilation = dilation, algo = UInt32 (algo), flipkernel = 1 )
71
71
end
72
72
73
- conv! (y:: AbstractArray{Float64, 4} , x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} , b:: AbstractArray{Float64, 1} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) =
74
- Float64 .( conv (Float32 .(y), Float32 .(x), Float32 .(w), Float32 .(b), pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = flipkernel) )
73
+ conv! (y:: A1 , x:: A1 , w:: A1 , b:: A2 ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) where {A1 <: AbstractArray{Float64, 4} , A2 <: AbstractArray{Float64, 1} } =
74
+ conv (Float32 .(y), Float32 .(x), Float32 .(w), Float32 .(b), pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = flipkernel)
75
75
76
- function conv! (y:: AA{4} , x:: AA{4} , w:: AA{4} , b:: AA{1} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 )
76
+ function conv! (y:: A1 , x:: A1 , w:: A1 , b:: A2 ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) where {A1 <: AA{4} , A2 <: AA{1} }
77
77
flipkernel == 0 && (w = reverse (reverse (w, dims= 1 ), dims= 2 ))
78
78
nnp_convolution_output (y, x, w, b, algo = algo, padding = pad, stride = stride, threadpool = shared_threadpool)
79
79
end
80
80
81
- crosscor! (y:: AbstractArray{Float64, 4} , x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} , b:: AbstractArray{Float64, 1} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) =
82
- Float64 .( conv! (Float32 .(y), Float32 .(x), Float32 .(w), Float32 .(b), pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = 1 ) )
81
+ crosscor! (y:: A1 , x:: A1 , w:: A1 , b:: A2 ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where {A1 <: AbstractArray{Float64, 4} , A2 <: AbstractArray{Float64, 1} } =
82
+ conv! (Float32 .(y), Float32 .(x), Float32 .(w), Float32 .(b), pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = 1 )
83
83
84
- crosscor! (y:: AA{4} , x:: AA{4} , w:: AA{4} , b:: AA{1} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) =
84
+ crosscor! (y:: A1 , x:: A1 , w:: A1 , b:: A2 ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where {A1 <: AA{4} , A2 <: AA{1} } =
85
85
conv! (y, x, w, b, pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = 1 )
86
86
87
- ∇conv_data (dy:: AbstractArray{Float64, 4} , x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) =
88
- Float64 .( ∇conv_data (Float32 .(dy), Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo) )
87
+ ∇conv_data (dy:: A , x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where A <: AbstractArray{Float64, 4} =
88
+ ∇conv_data (Float32 .(dy), Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo)
89
89
90
- function ∇conv_data (dy:: AA{4} , x:: AA{4} , w:: AA{4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ))
90
+ function ∇conv_data (dy:: A , x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where A <: AA{4}
91
91
dilation == 1 || dilation == (1 , 1 ) || error (" NNPACK does not support dilation > 1" )
92
92
pad_, stride_ = padtuple (x, pad), padtuple (x, stride)
93
93
((size (x, 1 ) - size (w, 1 ) + 2 * pad_[1 ]) % stride_[1 ] == 0 && (size (x, 2 ) - size (w, 2 ) + 2 * pad_[2 ]) % stride_[2 ] == 0 ) || error (" Choose the stride, pad and kernel size properly" )
94
94
∇conv_data! (zeros (Float32, size (x)), dy, x, w; pad = pad_, stride = stride_, dilation = dilation, algo = UInt32 (algo))
95
95
end
96
96
97
- ∇conv_data! (dx:: AbstractArray{Float64, 4} , dy:: AbstractArray{Float64, 4} , x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) =
98
- Float64 .( ∇conv_data! (Float32 .(dx), Float32 .(dy), Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = flipkernel) )
97
+ ∇conv_data! (dx:: A , dy:: A , x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) where A <: AbstractArray{Float64, 4} =
98
+ ∇conv_data! (Float32 .(dx), Float32 .(dy), Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = flipkernel)
99
99
100
- function ∇conv_data! (dx:: AA{4} , dy:: AA{4} , x:: AA{4} , w:: AA{4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 )
100
+ function ∇conv_data! (dx:: A , dy:: A , x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) where A <: AA{4}
101
101
flipkernel == 0 && (w = reverse (reverse (w, dims= 1 ), dims= 2 ))
102
102
nnp_convolution_input_gradient (dx, x, dy, w, padding = pad, stride = stride, algo = algo, threadpool = shared_threadpool)
103
103
end
104
104
105
- ∇conv_filter (dy:: AbstractArray{Float64, 4} , x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) =
106
- Float64 .( ∇conv_filter (Float32 .(dy), Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo) )
105
+ ∇conv_filter (dy:: A , x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where A <: AbstractArray{Float64, 4} =
106
+ ∇conv_filter (Float32 .(dy), Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo)
107
107
108
- function ∇conv_filter (dy:: AA{4} , x:: AA{4} , w:: AA{4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ))
108
+ function ∇conv_filter (dy:: A , x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 )) where A <: AA{4}
109
109
dilation == 1 || dilation == (1 , 1 ) || error (" NNPACK does not support dilation > 1" )
110
110
pad_, stride_ = padtuple (x, pad), padtuple (x, stride)
111
111
((size (x, 1 ) - size (w, 1 ) + 2 * pad_[1 ]) % stride_[1 ] == 0 && (size (x, 2 ) - size (w, 2 ) + 2 * pad_[2 ]) % stride_[2 ] == 0 ) || error (" Choose the stride, pad and kernel size properly" )
112
112
∇conv_filter! (zeros (Float32, size (w)), dy, x, w; pad = pad_, stride = stride_, dilation = dilation, algo = UInt32 (algo))
113
113
end
114
114
115
- ∇conv_filter! (dw:: AbstractArray{Float64, 4} , dy:: AbstractArray{Float64, 4} , x:: AbstractArray{Float64, 4} , w:: AbstractArray{Float64, 4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) =
116
- Float64 .( ∇conv_filter! (Float32 .(dw), Float32 .(dy), Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = flipkernel) )
115
+ ∇conv_filter! (dw:: A , dy:: A , x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) where A <: AbstractArray{Float64, 4} =
116
+ ∇conv_filter! (Float32 .(dw), Float32 .(dy), Float32 .(x), Float32 .(w), pad = pad, stride = stride, dilation = dilation, algo = algo, flipkernel = flipkernel)
117
117
118
- function ∇conv_filter! (dw:: AA{4} , dy:: AA{4} , x:: AA{4} , w:: AA{4} ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 )
118
+ function ∇conv_filter! (dw:: A , dy:: A , x:: A , w:: A ; pad = 0 , stride = 1 , dilation = 1 , algo = UInt32 (0 ), flipkernel = 0 ) where A <: AA{4}
119
119
flipkernel == 0 && (w = reverse (reverse (w, dims= 1 ), dims= 2 ))
120
120
dw .= nnp_convolution_kernel_gradient (dw, x, dy, w, padding = pad, stride = stride, algo = algo, threadpool = shared_threadpool)
121
121
flipkernel == 0 ? reverse (reverse (dw, dims= 1 ), dims= 2 ) : dw
0 commit comments