@@ -23,10 +23,10 @@ const sigmoid = σ
23
23
end
24
24
25
25
"""
26
- hardσ(x, a=0.2) = max(0, min(1.0, a * x + 0.5))
26
+ hardσ(x, a=0.2) = max(0, min(1.0, a * x + 0.5))
27
27
28
- Segment-wise linear approximation of sigmoid
29
- See: [BinaryConnect: Training Deep Neural Networks withbinary weights during propagations](https://arxiv.org/pdf/1511.00363.pdf)
28
+ Segment-wise linear approximation of sigmoid.
29
+ See [BinaryConnect: Training Deep Neural Networks withbinary weights during propagations](https://arxiv.org/pdf/1511.00363.pdf).
30
30
"""
31
31
hardσ (x:: Real , a= 0.2 ) = oftype (x/ 1 , max (zero (x/ 1 ), min (one (x/ 1 ), oftype (x/ 1 ,a) * x + oftype (x/ 1 ,0.5 ))))
32
32
const hardsigmoid = hardσ
@@ -53,7 +53,7 @@ const logsigmoid = logσ
53
53
hardtanh(x) = max(-1, min(1, x))
54
54
55
55
Segment-wise linear approximation of tanh. Cheaper and more computational efficient version of tanh.
56
- See: (http://ronan.collobert.org/pub/matos/2004_phdthesis_lip6.pdf)
56
+ See [Large Scale Machine Learning] (http://ronan.collobert.org/pub/matos/2004_phdthesis_lip6.pdf).
57
57
"""
58
58
hardtanh (x:: Real ) = max (- one (x), min ( one (x), x))
59
59
@@ -137,7 +137,7 @@ swish(x::Real) = x * σ(x)
137
137
"""
138
138
lisht(x) = x * tanh(x)
139
139
140
- Non-Parametric Linearly Scaled Hyperbolic Tangent Activation Function
140
+ Non-Parametric Linearly Scaled Hyperbolic Tangent Activation Function.
141
141
See [LiSHT](https://arxiv.org/abs/1901.05894)
142
142
"""
143
143
lisht (x:: Real ) = x * tanh (x)
@@ -171,7 +171,7 @@ celu(x::Real, α::Real = one(x)) = ifelse(x ≥ 0, x / one(x), α * (exp(x/α) -
171
171
"""
172
172
trelu(x, theta = 1.0) = x > theta ? x : 0
173
173
174
- Threshold Gated Rectified Linear
174
+ Threshold Gated Rectified Linear.
175
175
See [ThresholdRelu](https://arxiv.org/pdf/1402.3337.pdf)
176
176
"""
177
177
trelu (x:: Real ,theta = one (x)) = ifelse (x> theta, x, zero (x))
@@ -205,23 +205,23 @@ logcosh(x::Real) = x + softplus(-2x) - log(oftype(x, 2))
205
205
"""
206
206
mish(x) = x * tanh(softplus(x))
207
207
208
- Self Regularized Non-Monotonic Neural Activation Function
208
+ Self Regularized Non-Monotonic Neural Activation Function.
209
209
See [Mish: A Self Regularized Non-Monotonic Neural Activation Function](https://arxiv.org/abs/1908.08681).
210
210
"""
211
211
mish (x:: Real ) = x * tanh (softplus (x))
212
212
213
213
"""
214
214
tanhshrink(x) = x - tanh(x)
215
215
216
- See [Tanhshrink Activation Function](https://www.gabormelli.com/RKB/Tanhshrink_Activation_Function)
216
+ See [Tanhshrink Activation Function](https://www.gabormelli.com/RKB/Tanhshrink_Activation_Function).
217
217
"""
218
218
tanhshrink (x:: Real ) = x - tanh (x)
219
219
220
220
"""
221
221
softshrink(x, λ=0.5) =
222
222
(x ≥ λ ? x - λ : (-λ ≥ x ? x + λ : 0))
223
223
224
- See [Softshrink Activation Function](https://www.gabormelli.com/RKB/Softshrink_Activation_Function)
224
+ See [Softshrink Activation Function](https://www.gabormelli.com/RKB/Softshrink_Activation_Function).
225
225
"""
226
226
softshrink (x:: Real , λ = oftype (x/ 1 , 0.5 )) = min (max (zero (x), x - λ), x + λ)
227
227
0 commit comments