Activation functions added (#168)

sriyash421 · web-flow · commit c335aa524dd2 · 2020-02-11T13:50:34.000-08:00
* some activation functions added

* tests added

* celu corrected

* softshrink corrected

* relu6 test corrected

* 6 in relu6 typecasted
diff --git a/src/activation.jl b/src/activation.jl
@@ -1,5 +1,5 @@
-export σ, sigmoid, relu, leakyrelu, elu, gelu, swish, selu, softplus, softsign, logσ,
-       logsigmoid, logcosh, mish
+export σ, sigmoid, relu, leakyrelu, relu6, rrelu, elu, gelu, swish, selu, celu, softplus, softsign, logσ,
+       logsigmoid, logcosh, mish, tanhshrink, softshrink
 
 """
     σ(x) = 1 / (1 + exp(-x))
@@ -13,7 +13,7 @@ const sigmoid = σ
 # ForwardDiff numerical stability hack
 σ_stable(x::Real) = ifelse(x < -80, zero(x), one(x) / (one(x) + exp(-x)))
 σ(x::Float32) = σ_stable(x)
-@init @require ForwardDiff="f6369f11-7733-5829-9624-2563aa707210" begin
+@init @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin
   σ(x::ForwardDiff.Dual{T,Float32}) where T = σ_stable(x)
 end
 
@@ -51,8 +51,29 @@ Leaky [Rectified Linear Unit](https://en.wikipedia.org/wiki/Rectifier_(neural_ne
 activation function.
 You can also specify the coefficient explicitly, e.g. `leakyrelu(x, 0.01)`.
 """
-leakyrelu(x::Real, a = oftype(x/1, 0.01)) = max(a*x, x/one(x))
+leakyrelu(x::Real, a = oftype(x / 1, 0.01)) = max(a * x, x / one(x))
 
+"""
+    relu6(x) = min(max(0, x),6)
+
+[Rectified Linear Unit](https://en.wikipedia.org/wiki/Rectifier_(neural_networks))
+activation function.
+"""
+relu6(x::Real) = min(relu(x), one(x)*oftype(x, 6))
+
+"""
+    rrelu(x) = max(ax, x)
+
+    a = randomly sampled from uniform distribution U(l,u)
+
+Randomized Leaky [Rectified Linear Unit](https://arxiv.org/pdf/1505.00853.pdf)
+activation function.
+You can also specify the bound explicitly, e.g. `rrelu(x, 0.0, 1.0)`.
+"""
+function rrelu(x::Real, l::Real = 1 / 8.0, u::Real = 1 / 3.0)
+    a = oftype(x /1, (u - l) * rand() + l)
+    return leakyrelu(x, a)
+end
 
 """
     elu(x, α = 1) =
@@ -62,7 +83,7 @@ Exponential Linear Unit activation function.
 See [Fast and Accurate Deep Network Learning by Exponential Linear Units](https://arxiv.org/abs/1511.07289).
 You can also specify the coefficient explicitly, e.g. `elu(x, 1)`.
 """
-elu(x, α = one(x)) = ifelse(x ≥ 0, x/one(x), α * (exp(x) - one(x)))
+elu(x, α = one(x)) = ifelse(x ≥ 0, x / one(x), α * (exp(x) - one(x)))
 
 
 """
@@ -72,10 +93,10 @@ elu(x, α = one(x)) = ifelse(x ≥ 0, x/one(x), α * (exp(x) - one(x)))
 activation function.
 """
 function gelu(x::Real)
-    p = oftype(x/1, π)
-    λ = oftype(x/1, √(2/p))
-    α = oftype(x/1, 0.044715)
-    h = oftype(x/1, 0.5)
+    p = oftype(x / 1, π)
+    λ = oftype(x / 1, √(2 / p))
+    α = oftype(x / 1, 0.044715)
+    h = oftype(x / 1, 0.5)
     h * x * (one(x) + tanh(λ * (x + α * x^3)))
 end
 
@@ -98,11 +119,20 @@ Scaled exponential linear units.
 See [Self-Normalizing Neural Networks](https://arxiv.org/pdf/1706.02515.pdf).
 """
 function selu(x::Real)
-  λ = oftype(x/1, 1.0507009873554804934193349852946)
-  α = oftype(x/1, 1.6732632423543772848170429916717)
-  λ * ifelse(x > 0, x/one(x), α * (exp(x) - one(x)))
+  λ = oftype(x / 1, 1.0507009873554804934193349852946)
+  α = oftype(x / 1, 1.6732632423543772848170429916717)
+  λ * ifelse(x > 0, x / one(x), α * (exp(x) - one(x)))
 end
 
+"""
+    celu(x) = (x ≥ 0 ? x : α * (exp(x/α) - 1))
+
+Continuously Differentiable Exponential Linear Units
+See [Continuously Differentiable Exponential Linear Units](https://arxiv.org/pdf/1704.07483.pdf).
+"""
+function celu(x::Real, α::Real = one(x))
+    return ifelse(x ≥ 0, x / one(x), α * (exp(x/α) - one(x)))
+end 
 
 """
     softsign(x) = x / (1 + |x|)
@@ -136,9 +166,22 @@ See [Mish: A Self Regularized Non-Monotonic Neural Activation Function](https://
 """
 mish(x::Real) = x * tanh(softplus(x))
 
+"""
+    tanhshrink(x) = x - tanh(x)
+
+See [Tanhshrink Activation Function](https://www.gabormelli.com/RKB/Tanhshrink_Activation_Function)
+"""
+tanhshrink(x::Real) = x - tanh(x)
+
+"""
+    softshrink = (x ≥ λ ? x-λ : (-λ ≥ x ? x+λ : 0))
+
+See [Softshrink Activation Function](https://www.gabormelli.com/RKB/Softshrink_Activation_Function)
+"""
+softshrink(x::Real, λ = oftype(x/1, 0.5)) = min(max(zero(x), x - λ), x + λ)
 
 # Provide an informative error message if activation functions are called with an array
-for f in (:σ, :σ_stable, :logσ, :relu, :leakyrelu, :elu, :gelu, :swish, :selu, :softsign, :softplus, :logcosh, :mish)
+for f in (:σ, :σ_stable, :logσ, :relu, :leakyrelu, :relu6, :rrelu, :elu, :gelu, :swish, :selu, :celu, :softsign, :softplus, :logcosh, :mish, :tanhshrink, :softshrink)
     @eval $(f)(x::AbstractArray, args...) =
       error("Use broadcasting (`", $(string(f)), ".(x)`) to apply activation functions to arrays.")
 end
diff --git a/test/activation.jl b/test/activation.jl
@@ -1,6 +1,6 @@
 using NNlib, Test, Zygote
 
-ACTIVATION_FUNCTIONS = [σ, relu, leakyrelu, elu, gelu, swish, selu, softplus, softsign, logcosh, mish];
+ACTIVATION_FUNCTIONS = [σ, relu, leakyrelu, relu6, rrelu, elu, gelu, celu, swish, selu, softplus, softsign, logcosh, mish, tanhshrink, softshrink];
 
 function test_value_float_precision_preserving(a)
     @testset "$(a): " begin
@@ -39,6 +39,8 @@ end
     @test σ(0.0) == 0.5
     @test relu(0.0) == 0.0
     @test leakyrelu(0.0) == 0.0
+    @test relu6(0.0) == 0.0
+    @test rrelu(0.0) == 0.0
     @test elu(0.0) == 0.0
     @test gelu(0.0) == 0.0
     @test swish(0.0) == 0.0
@@ -47,32 +49,45 @@ end
     @test softplus(-1e8) ≈ 0.0
     @test softsign(0.0) == 0.0
     @test selu(0.0) == 0.0
+    @test celu(0.0) == 0.0
     @test logcosh(0.0) == log(cosh(0.0))
     @test mish(0.0) == 0.0
+    @test tanhshrink(0.0) == 0.0
+    @test softshrink(0.0) == 0.0
 
     @test σ(1.0) == 1.0 / (1.0 + exp(-1.0))
     @test relu(1.0) == 1.0
     @test leakyrelu(1.0) == 1.0
+    @test relu6(1.0) == 1.0
+    @test rrelu(1.0) == 1.0
     @test elu(1.0) == 1.0
     @test gelu(1.0) == 0.8411919906082768
     @test swish(1.0) == 1.0 / (1.0 + exp(-1.0))
     @test softplus(1.0) ≈ log(exp(1.0) + 1.0)
     @test softsign(1.0) == 0.5
     @test selu(1.0) == 1.0507009873554804934193349852946
+    @test celu(1.0) == 1.0
     @test logcosh(1.0) ≈ log(cosh(1.0))
     @test mish(1.0) ≈ tanh(log(1.0 + exp(1.0)))
+    @test tanhshrink(1.0) ≈ 0.23840584404423515
+    @test softshrink(1.0) == 0.5
 
     @test σ(-1.0) == 1.0 / (1.0 + exp(1.0))
     @test relu(-1.0) == 0.0
     @test leakyrelu(-1.0) == -0.01
+    @test relu6(-1.0) == 0.0
+    @test -1/3.0 <= rrelu(-1.0) <= -1/8.0
     @test elu(-1.0) == exp(-1.0) - 1.0
     @test gelu(-1.0) == -0.15880800939172324
     @test swish(-1.0) == -1.0 / (1.0 + exp(1.0))
     @test softplus(-1.0) ≈ log(exp(-1.0) + 1.0)
     @test softsign(-1.0) == -0.5
     @test selu(-1.0) == 1.0507009873554804934193349852946 * 1.6732632423543772848170429916717 * (exp(-1.0) - 1.0)
+    @test celu(-1.0) == exp(-1.0) - 1
     @test log(cosh(-1.0)) ≈ log(cosh(-1.0))
     @test mish(-1.0) ≈ -tanh(log(1.0 + exp(-1.0)))
+    @test tanhshrink(-1.0) ≈ -0.23840584404423515
+    @test softshrink(-1.0) == -0.5
 
     @testset "Float inference" begin
         test_value_float_precision_preserving.(ACTIVATION_FUNCTIONS)
@@ -86,13 +101,20 @@ end
     end
 
     @testset "Test Integer64 and Integer32 inputs will force Float64 outputs" begin
-        test_value_int_input_forces_float64.(filter(x -> x != relu, ACTIVATION_FUNCTIONS))
+        test_value_int_input_forces_float64.(filter(x -> (x != relu && x != relu6), ACTIVATION_FUNCTIONS))
 
         @testset "relu: " begin
             # relu doesn't have to force floating point outputs
             @test typeof(relu(Int64(1))) == Int64
             @test typeof(relu(Int32(1))) == Int32
         end
+
+        @testset "relu6: " begin
+            # relu6 doesn't have to force floating point outputs
+            @test typeof(relu6(Int64(1))) == Int64
+            @test typeof(relu6(Int32(1))) == Int32
+        end
+
     end
     
     @testset "Float gradient inference" begin
@@ -155,6 +177,22 @@ end
     @test leakyrelu( 0.4,0.3) ≈  0.4
     @test leakyrelu(-0.4,0.3) ≈ -0.12
 
+    @test relu6(10.0) == 6.0
+    @test -0.2 <= rrelu(-0.4,0.25,0.5) <= -0.1 
+
+    @testset "celu" begin
+        @test celu(42) == 42
+        @test celu(42.) == 42.
+
+        @test celu(-4, 0.5) ≈ 0.5*(exp(-4.0/0.5) - 1)
+    end
+
+    @testset "softshrink" begin
+        @test softshrink(15., 5.) == 10.
+        @test softshrink(4., 5.) == 0.
+        @test softshrink(-15., 5.) == -10.
+    end
+
     @testset "logsigmoid" begin
         xs = randn(10,10)
         @test logsigmoid.(xs) ≈ log.(sigmoid.(xs))
@@ -164,4 +202,4 @@ end
     end
 
     @test logcosh(1_000.0) + log(2) == 1_000.0
-end
+end