Added some Activation functions (#175)

AdarshKumar712 · matsueushi · web-flow · commit 60ac74258008 · 2020-02-29T18:23:32.000Z
* Added Activation Functions

* Update activation.jl

* Remove Typecast errors

* Added Activation function tests

* Removed extra space

* Removed unwanted typecast

* Update Test Functions

* Apply suggestions from code review

Co-Authored-By: matsueushi &lt;matsueushi@gmail.com&gt;

* Update activation.jl

* Updated celu and trelu

* Update activation.jl

Co-authored-by: matsueushi &lt;matsueushi@gmail.com&gt;
diff --git a/src/activation.jl b/src/activation.jl
@@ -1,5 +1,5 @@
-export σ, sigmoid, relu, leakyrelu, relu6, rrelu, elu, gelu, swish, selu, celu, softplus, softsign, logσ,
-       logsigmoid, logcosh, mish, tanhshrink, softshrink
+export σ, sigmoid, hardσ, hardsigmoid, hardtanh, relu, leakyrelu, relu6, rrelu, elu, gelu, swish, selu, celu, softplus, softsign, logσ,
+       logsigmoid, logcosh, mish, tanhshrink, softshrink, thresholdrelu, trelu, lisht
 
 """
     σ(x) = 1 / (1 + exp(-x))
@@ -17,6 +17,15 @@ const sigmoid = σ
   σ(x::ForwardDiff.Dual{T,Float32}) where T = σ_stable(x)
 end
 
+"""
+   hardσ(x, a=0.2) = max(0, min(1.0, a * x + 0.5))
+
+Segment-wise linear approximation of sigmoid
+See: [BinaryConnect: Training Deep Neural Networks withbinary weights during propagations](https://arxiv.org/pdf/1511.00363.pdf)
+"""
+hardσ(x::Real, a=0.2) = oftype(x/1, max(zero(x/1), min(one(x/1), oftype(x/1,a) * x + oftype(x/1,0.5))))
+const hardsigmoid = hardσ
+
 
 """
     logσ(x)
@@ -35,6 +44,15 @@ logσ(x::Real) = -softplus(-x)
 const logsigmoid = logσ
 
 
+"""
+    hardtanh(x) = max(-1, min(1, x))
+
+Segment-wise linear approximation of tanh. Cheaper  and  more  computational  efficient version of tanh.
+See: (http://ronan.collobert.org/pub/matos/2004_phdthesis_lip6.pdf)
+"""
+hardtanh(x::Real) = max(-one(x), min( one(x), x))
+
+
 """
     relu(x) = max(0, x)
 
@@ -110,6 +128,16 @@ See [Swish: a Self-Gated Activation Function](https://arxiv.org/pdf/1710.05941.p
 """
 swish(x::Real) = x * σ(x)
 
+
+"""
+    lisht(x) = x * tanh(x)
+
+Non-Parametric Linearly Scaled Hyperbolic Tangent Activation Function
+See [LiSHT](https://arxiv.org/abs/1901.05894)
+"""
+lisht(x::Real) = x * tanh(x)
+
+
 """
     selu(x) = λ * (x ≥ 0 ? x : α * (exp(x) - 1))
 
@@ -132,9 +160,18 @@ end
 Continuously Differentiable Exponential Linear Units
 See [Continuously Differentiable Exponential Linear Units](https://arxiv.org/pdf/1704.07483.pdf).
 """
-function celu(x::Real, α::Real = one(x))
-    return ifelse(x ≥ 0, x / one(x), α * (exp(x/α) - one(x)))
-end 
+celu(x::Real, α::Real = one(x)) = ifelse(x ≥ 0, x / one(x), α * (exp(x/α) - one(x))) 
+
+
+"""
+    trelu(x, theta = 1.0) = x > theta ? x : 0 
+
+Threshold Gated Rectified Linear   
+See [ThresholdRelu](https://arxiv.org/pdf/1402.3337.pdf)
+"""
+trelu(x::Real,theta = one(x)) = ifelse(x> theta, x, zero(x))
+const thresholdrelu = trelu
+
 
 """
     softsign(x) = x / (1 + |x|)
@@ -184,7 +221,7 @@ See [Softshrink Activation Function](https://www.gabormelli.com/RKB/Softshrink_A
 softshrink(x::Real, λ = oftype(x/1, 0.5)) = min(max(zero(x), x - λ), x + λ)
 
 # Provide an informative error message if activation functions are called with an array
-for f in (:σ, :σ_stable, :logσ, :relu, :leakyrelu, :relu6, :rrelu, :elu, :gelu, :swish, :selu, :celu, :softsign, :softplus, :logcosh, :mish, :tanhshrink, :softshrink)
+for f in (:σ, :σ_stable, :hardσ, :logσ, :hardtanh, :relu, :leakyrelu, :relu6, :rrelu, :elu, :gelu, :swish, :lisht, :selu, :celu, :trelu, :softsign, :softplus, :logcosh, :mish, :tanhshrink, :softshrink)
     @eval $(f)(x::AbstractArray, args...) =
       error("Use broadcasting (`", $(string(f)), ".(x)`) to apply activation functions to arrays.")
-end
+end
diff --git a/test/activation.jl b/test/activation.jl
@@ -1,6 +1,6 @@
 using NNlib, Test, Zygote
 
-ACTIVATION_FUNCTIONS = [σ, relu, leakyrelu, relu6, rrelu, elu, gelu, celu, swish, selu, softplus, softsign, logcosh, mish, tanhshrink, softshrink];
+ACTIVATION_FUNCTIONS = [σ, hardσ, hardtanh, relu, leakyrelu, relu6, rrelu, elu, gelu, celu, swish, lisht, selu, trelu, softplus, softsign, logcosh, mish, tanhshrink, softshrink];
 
 function test_value_float_precision_preserving(a)
     @testset "$(a): " begin
@@ -37,53 +37,65 @@ end
 
 @testset "Activation Functions" begin
     @test σ(0.0) == 0.5
+    @test hardσ(0.0) == 0.5
+    @test hardtanh(0.0) == 0.0
     @test relu(0.0) == 0.0
     @test leakyrelu(0.0) == 0.0
     @test relu6(0.0) == 0.0
     @test rrelu(0.0) == 0.0
     @test elu(0.0) == 0.0
     @test gelu(0.0) == 0.0
     @test swish(0.0) == 0.0
+    @test lisht(0.0) == 0.0
     @test softplus(0.0) ≈ log(2.0)
     @test softplus(1e8) ≈ 1e8
     @test softplus(-1e8) ≈ 0.0
     @test softsign(0.0) == 0.0
     @test selu(0.0) == 0.0
     @test celu(0.0) == 0.0
+    @test trelu(0.0) == 0.0
     @test logcosh(0.0) == log(cosh(0.0))
     @test mish(0.0) == 0.0
     @test tanhshrink(0.0) == 0.0
     @test softshrink(0.0) == 0.0
 
     @test σ(1.0) == 1.0 / (1.0 + exp(-1.0))
+    @test hardσ(1.0) == max(0,min(1,0.2*1.0 + 0.5))
+    @test hardtanh(1.0) == 1.0
     @test relu(1.0) == 1.0
     @test leakyrelu(1.0) == 1.0
     @test relu6(1.0) == 1.0
     @test rrelu(1.0) == 1.0
     @test elu(1.0) == 1.0
     @test gelu(1.0) == 0.8411919906082768
     @test swish(1.0) == 1.0 / (1.0 + exp(-1.0))
+    @test lisht(1.0) ≈ 1.0 * tanh(1.0) 
     @test softplus(1.0) ≈ log(exp(1.0) + 1.0)
     @test softsign(1.0) == 0.5
     @test selu(1.0) == 1.0507009873554804934193349852946
     @test celu(1.0) == 1.0
+    @test trelu(1.0) == 0.0
     @test logcosh(1.0) ≈ log(cosh(1.0))
     @test mish(1.0) ≈ tanh(log(1.0 + exp(1.0)))
     @test tanhshrink(1.0) ≈ 0.23840584404423515
     @test softshrink(1.0) == 0.5
 
     @test σ(-1.0) == 1.0 / (1.0 + exp(1.0))
+    @test hardσ(-1.0) == max(0,min(1,0.2*-1.0 + 0.5))
+    @test hardtanh(-1.0) == -1.0
     @test relu(-1.0) == 0.0
     @test leakyrelu(-1.0) == -0.01
     @test relu6(-1.0) == 0.0
     @test -1/3.0 <= rrelu(-1.0) <= -1/8.0
     @test elu(-1.0) == exp(-1.0) - 1.0
     @test gelu(-1.0) == -0.15880800939172324
     @test swish(-1.0) == -1.0 / (1.0 + exp(1.0))
+    @test lisht(-1.0) ≈ -1.0 * tanh(-1.0)
     @test softplus(-1.0) ≈ log(exp(-1.0) + 1.0)
     @test softsign(-1.0) == -0.5
     @test selu(-1.0) == 1.0507009873554804934193349852946 * 1.6732632423543772848170429916717 * (exp(-1.0) - 1.0)
     @test celu(-1.0) == exp(-1.0) - 1
+    @test trelu(-1.0) == 0.0
     @test log(cosh(-1.0)) ≈ log(cosh(-1.0))
     @test mish(-1.0) ≈ -tanh(log(1.0 + exp(-1.0)))
     @test tanhshrink(-1.0) ≈ -0.23840584404423515
@@ -101,7 +113,7 @@ end
     end
 
     @testset "Test Integer64 and Integer32 inputs will force Float64 outputs" begin
-        test_value_int_input_forces_float64.(filter(x -> (x != relu && x != relu6), ACTIVATION_FUNCTIONS))
+        test_value_int_input_forces_float64.(filter(x -> (x != relu && x != relu6 && x != hardtanh && x != trelu), ACTIVATION_FUNCTIONS))
 
         @testset "relu: " begin
             # relu doesn't have to force floating point outputs
@@ -114,7 +126,18 @@ end
             @test typeof(relu6(Int64(1))) == Int64
             @test typeof(relu6(Int32(1))) == Int32
         end
-
+        
+        @testset "hardtanh: " begin
+            # hardtanh doesn't have to force floating point outputs
+            @test typeof(hardtanh(Int64(1))) == Int64
+            @test typeof(hardtanh(Int32(1))) == Int32
+        end
+        
+        @testset "trelu: " begin
+            # trelu doesn't have to force floating point outputs
+            @test typeof(trelu(Int64(1))) == Int64
+            @test typeof(trelu(Int32(1))) == Int32
+        end
     end
     
     @testset "Float gradient inference" begin
@@ -202,4 +225,23 @@ end
     end
 
     @test logcosh(1_000.0) + log(2) == 1_000.0
-end
+    
+    @testset "hardsigmoid" begin
+        @test hardsigmoid(0.3) == 0.56
+        @test hardsigmoid(-0.3) == 0.44
+        @test hardsigmoid(0.1,0.5) == 0.55
+        for T in [:Float32, :Float64]
+            @eval @test hardsigmoid.($T[-100_000, 100_000.]) ≈ $T[0., 1.]
+        end
+    end
+    
+    @test hardtanh(10.0) == 1.0
+    @test lisht(2.5) == 2.5*tanh(2.5)
+    
+    @testset "trelu" begin
+        @test trelu(0.5) == 0.0
+        @test trelu(1.0) == 0.0
+        @test trelu(1.1) == 1.1
+        @test trelu(0.9,0.5) == 0.9
+    end
+end