1
- export σ, sigmoid, relu, leakyrelu, elu, gelu, swish, selu, softplus, softsign, logσ,
2
- logsigmoid, logcosh, mish
1
+ export σ, sigmoid, relu, leakyrelu, relu6, rrelu, elu, gelu, swish, selu, celu , softplus, softsign, logσ,
2
+ logsigmoid, logcosh, mish, tanhshrink, softshrink
3
3
4
4
"""
5
5
σ(x) = 1 / (1 + exp(-x))
@@ -13,7 +13,7 @@ const sigmoid = σ
13
13
# ForwardDiff numerical stability hack
14
14
σ_stable (x:: Real ) = ifelse (x < - 80 , zero (x), one (x) / (one (x) + exp (- x)))
15
15
σ (x:: Float32 ) = σ_stable (x)
16
- @init @require ForwardDiff= " f6369f11-7733-5829-9624-2563aa707210" begin
16
+ @init @require ForwardDiff = " f6369f11-7733-5829-9624-2563aa707210" begin
17
17
σ (x:: ForwardDiff.Dual{T,Float32} ) where T = σ_stable (x)
18
18
end
19
19
@@ -51,8 +51,29 @@ Leaky [Rectified Linear Unit](https://en.wikipedia.org/wiki/Rectifier_(neural_ne
51
51
activation function.
52
52
You can also specify the coefficient explicitly, e.g. `leakyrelu(x, 0.01)`.
53
53
"""
54
- leakyrelu (x:: Real , a = oftype (x/ 1 , 0.01 )) = max (a* x, x/ one (x))
54
+ leakyrelu (x:: Real , a = oftype (x / 1 , 0.01 )) = max (a * x, x / one (x))
55
55
56
+ """
57
+ relu6(x) = min(max(0, x),6)
58
+
59
+ [Rectified Linear Unit](https://en.wikipedia.org/wiki/Rectifier_(neural_networks))
60
+ activation function.
61
+ """
62
+ relu6 (x:: Real ) = min (relu (x), one (x)* oftype (x, 6 ))
63
+
64
+ """
65
+ rrelu(x) = max(ax, x)
66
+
67
+ a = randomly sampled from uniform distribution U(l,u)
68
+
69
+ Randomized Leaky [Rectified Linear Unit](https://arxiv.org/pdf/1505.00853.pdf)
70
+ activation function.
71
+ You can also specify the bound explicitly, e.g. `rrelu(x, 0.0, 1.0)`.
72
+ """
73
+ function rrelu (x:: Real , l:: Real = 1 / 8.0 , u:: Real = 1 / 3.0 )
74
+ a = oftype (x / 1 , (u - l) * rand () + l)
75
+ return leakyrelu (x, a)
76
+ end
56
77
57
78
"""
58
79
elu(x, α = 1) =
@@ -62,7 +83,7 @@ Exponential Linear Unit activation function.
62
83
See [Fast and Accurate Deep Network Learning by Exponential Linear Units](https://arxiv.org/abs/1511.07289).
63
84
You can also specify the coefficient explicitly, e.g. `elu(x, 1)`.
64
85
"""
65
- elu (x, α = one (x)) = ifelse (x ≥ 0 , x/ one (x), α * (exp (x) - one (x)))
86
+ elu (x, α = one (x)) = ifelse (x ≥ 0 , x / one (x), α * (exp (x) - one (x)))
66
87
67
88
68
89
"""
@@ -72,10 +93,10 @@ elu(x, α = one(x)) = ifelse(x ≥ 0, x/one(x), α * (exp(x) - one(x)))
72
93
activation function.
73
94
"""
74
95
function gelu (x:: Real )
75
- p = oftype (x/ 1 , π)
76
- λ = oftype (x/ 1 , √ (2 / p))
77
- α = oftype (x/ 1 , 0.044715 )
78
- h = oftype (x/ 1 , 0.5 )
96
+ p = oftype (x / 1 , π)
97
+ λ = oftype (x / 1 , √ (2 / p))
98
+ α = oftype (x / 1 , 0.044715 )
99
+ h = oftype (x / 1 , 0.5 )
79
100
h * x * (one (x) + tanh (λ * (x + α * x^ 3 )))
80
101
end
81
102
@@ -98,11 +119,20 @@ Scaled exponential linear units.
98
119
See [Self-Normalizing Neural Networks](https://arxiv.org/pdf/1706.02515.pdf).
99
120
"""
100
121
function selu (x:: Real )
101
- λ = oftype (x/ 1 , 1.0507009873554804934193349852946 )
102
- α = oftype (x/ 1 , 1.6732632423543772848170429916717 )
103
- λ * ifelse (x > 0 , x/ one (x), α * (exp (x) - one (x)))
122
+ λ = oftype (x / 1 , 1.0507009873554804934193349852946 )
123
+ α = oftype (x / 1 , 1.6732632423543772848170429916717 )
124
+ λ * ifelse (x > 0 , x / one (x), α * (exp (x) - one (x)))
104
125
end
105
126
127
+ """
128
+ celu(x) = (x ≥ 0 ? x : α * (exp(x/α) - 1))
129
+
130
+ Continuously Differentiable Exponential Linear Units
131
+ See [Continuously Differentiable Exponential Linear Units](https://arxiv.org/pdf/1704.07483.pdf).
132
+ """
133
+ function celu (x:: Real , α:: Real = one (x))
134
+ return ifelse (x ≥ 0 , x / one (x), α * (exp (x/ α) - one (x)))
135
+ end
106
136
107
137
"""
108
138
softsign(x) = x / (1 + |x|)
@@ -136,9 +166,22 @@ See [Mish: A Self Regularized Non-Monotonic Neural Activation Function](https://
136
166
"""
137
167
mish (x:: Real ) = x * tanh (softplus (x))
138
168
169
+ """
170
+ tanhshrink(x) = x - tanh(x)
171
+
172
+ See [Tanhshrink Activation Function](https://www.gabormelli.com/RKB/Tanhshrink_Activation_Function)
173
+ """
174
+ tanhshrink (x:: Real ) = x - tanh (x)
175
+
176
+ """
177
+ softshrink = (x ≥ λ ? x-λ : (-λ ≥ x ? x+λ : 0))
178
+
179
+ See [Softshrink Activation Function](https://www.gabormelli.com/RKB/Softshrink_Activation_Function)
180
+ """
181
+ softshrink (x:: Real , λ = oftype (x/ 1 , 0.5 )) = min (max (zero (x), x - λ), x + λ)
139
182
140
183
# Provide an informative error message if activation functions are called with an array
141
- for f in (:σ , :σ_stable , :logσ , :relu , :leakyrelu , :elu , :gelu , :swish , :selu , :softsign , :softplus , :logcosh , :mish )
184
+ for f in (:σ , :σ_stable , :logσ , :relu , :leakyrelu , :relu6 , :rrelu , : elu , :gelu , :swish , :selu , :celu , : softsign , :softplus , :logcosh , :mish , :tanhshrink , :softshrink )
142
185
@eval $ (f)(x:: AbstractArray , args... ) =
143
186
error (" Use broadcasting (`" , $ (string (f)), " .(x)`) to apply activation functions to arrays." )
144
187
end
0 commit comments