ReLU6 rework (#687)

Jonathan DEKHTIAR · zsdonghao · commit 58f4620699ac · 2018-06-08T18:49:27.000+01:00
* leaky_relu deprecated and leaky_relu6 and PReLU6Layer added

* cleaning

* doc corrections

* YAPF fix

* leaky_twice_relu6 function added

* recenter initializer

* YAPF correction

* Update test_activations.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -72,6 +72,8 @@ To release a new version, please update the changelog as followed:
 - API:
   - `tl.alphas` and `tl.alphas_like` added following the tf.ones/zeros and tf.zeros_like/ones_like (by @DEKHTIARJonathan in #580)
   - `tl.lazy_imports.LazyImport` to import heavy libraries only when necessary (by @DEKHTIARJonathan in #667)
+  - `tl.act.leaky_relu6` and `tl.layers.PRelu6Layer` have been (by @DEKHTIARJonathan in #686)
+  - `tl.act.leaky_twice_relu6` and `tl.layers.PTRelu6Layer` have been (by @DEKHTIARJonathan in #686)
 - CI Tool:
   - [Stale Probot](https://github.com/probot/stale) added to clean stale issues (by @DEKHTIARJonathan in #573)
   - [Changelog Probot](https://github.com/mikz/probot-changelog) Configuration added (by @DEKHTIARJonathan in #637)
@@ -107,6 +109,7 @@ To release a new version, please update the changelog as followed:
   - `test_optimizer_amsgrad.py` added to test `AMSGrad` optimizer (by @DEKHTIARJonathan in #636)
   - `test_logging.py` added to insure robustness of the logging API (by @DEKHTIARJonathan in #645)
   - `test_decorators.py` added (by @DEKHTIARJonathan in #660)
+  - `test_activations.py` added (by @DEKHTIARJonathan in #686)
 - Tutorials:
   - `tutorial_tfslim` has been introduced to show how to use `SlimNetsLayer` (by @2wins in #560).
 
@@ -135,6 +138,7 @@ To release a new version, please update the changelog as followed:
 
 ### Deprecated
 - `tl.layers.TimeDistributedLayer` argurment `args` is deprecated in favor of `layer_args` (by @DEKHTIARJonathan in #667)
+- `tl.act.leaky_relu` have been deprecated in favor of `tf.nn.leaky_relu` (by @DEKHTIARJonathan in #686)
 
 ### Removed
 - `assert()` calls remove and replaced by `raise AssertionError()` (by @DEKHTIARJonathan in #667)
@@ -156,6 +160,7 @@ To release a new version, please update the changelog as followed:
 - Tutorial:
   - `tutorial_word2vec_basic.py` saving issue #476 fixed (by @DEKHTIARJonathan in #635)
   - All tutorials tested and errors have been fixed (by @DEKHTIARJonathan in #635)
+
 ### Security
 
 ### Dependencies Update
@@ -173,6 +178,7 @@ To release a new version, please update the changelog as followed:
 - API:
   - `tl.alphas` and `tl.alphas_like` added following the tf.ones/zeros and tf.zeros_like/ones_like (by @DEKHTIARJonathan in #580)
   - `tl.lazy_imports.LazyImport` to import heavy libraries only when necessary (by @DEKHTIARJonathan in #667)
+  - `tl.act.leaky_relu6` and `tl.layers.PRelu6Layer` have been (by @DEKHTIARJonathan in #686)
 - CI Tool:
   - [Stale Probot](https://github.com/probot/stale) added to clean stale issues (by @DEKHTIARJonathan in #573)
   - [Changelog Probot](https://github.com/mikz/probot-changelog) Configuration added (by @DEKHTIARJonathan in #637)
@@ -196,6 +202,7 @@ To release a new version, please update the changelog as followed:
 - Layer:
   - ElementwiseLambdaLayer added to use custom function to connect multiple layer inputs (by @One-sixth in #579)
   - AtrousDeConv2dLayer added (by @2wins in #662)
+  - Fix bugs of using `tf.layers` in CNN (by @zsdonghao in #686)
 - Optimizer:
   - AMSGrad Optimizer added based on `On the Convergence of Adam and Beyond (ICLR 2018)` (by @DEKHTIARJonathan in #636)
 - Setup:
@@ -235,6 +242,7 @@ To release a new version, please update the changelog as followed:
 
 ### Deprecated
 - `tl.layers.TimeDistributedLayer` argurment `args` is deprecated in favor of `layer_args` (by @DEKHTIARJonathan in #667)
+- `tl.act.leaky_relu` have been deprecated in favor of `tf.nn.leaky_relu` (by @DEKHTIARJonathan in #686)
 
 ### Removed
 - `assert()` calls remove and replaced by `raise AssertionError()` (by @DEKHTIARJonathan in #667)
@@ -256,6 +264,7 @@ To release a new version, please update the changelog as followed:
 - Tutorial:
   - `tutorial_word2vec_basic.py` saving issue #476 fixed (by @DEKHTIARJonathan in #635)
   - All tutorials tested and errors have been fixed (by @DEKHTIARJonathan in #635)
+
 ### Security
 
 ### Dependencies Update
diff --git a/docs/modules/activation.rst b/docs/modules/activation.rst
@@ -26,8 +26,10 @@ For more complex activation, TensorFlow API will be required.
 
 .. autosummary::
 
-   ramp
    leaky_relu
+   leaky_relu6
+   leaky_twice_relu6
+   ramp
    swish
    sign
    hard_tanh
@@ -37,10 +39,18 @@ Ramp
 ------
 .. autofunction:: ramp
 
-Leaky Relu
+Leaky ReLU
 ------------
 .. autofunction:: leaky_relu
 
+Leaky ReLU6
+------------
+.. autofunction:: leaky_relu6
+
+Twice Leaky ReLU6
+-----------------
+.. autofunction:: leaky_twice_relu6
+
 Swish
 ------------
 .. autofunction:: swish
diff --git a/docs/modules/layers.rst b/docs/modules/layers.rst
@@ -345,6 +345,8 @@ Layer list
    ScaleLayer
 
    PReluLayer
+   PRelu6Layer
+   PTRelu6Layer
 
    MultiplexerLayer
 
@@ -886,8 +888,21 @@ Scale
 Parametric activation layer
 ---------------------------
 
+PReLU Layer
+^^^^^^^^^^^
 .. autoclass:: PReluLayer
 
+
+PReLU6 Layer
+^^^^^^^^^^^^
+.. autoclass:: PRelu6Layer
+
+
+PTReLU6 Layer
+^^^^^^^^^^^^^
+.. autoclass:: PTRelu6Layer
+
+
 Flow control layer
 ----------------------
 
diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py
@@ -6,13 +6,18 @@
 from tensorflow.python.util.deprecation import deprecated
 
 __all__ = [
-    'ramp',
     'leaky_relu',
+    'leaky_relu6',
+    'leaky_twice_relu6',
+    'lrelu',
+    'lrelu6',
+    'ltrelu6',
+    'ramp',
     'swish',
     'sign',
+    'htanh',
+    'hard_tanh',
     'pixel_wise_softmax',
-    'linear',
-    'lrelu',
 ]
 
 
@@ -39,10 +44,16 @@ def ramp(x, v_min=0, v_max=1, name=None):
     return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name)
 
 
-def leaky_relu(x, alpha=0.1, name="lrelu"):
-    """LeakyReLU, Shortcut is ``lrelu``.
+@deprecated("2018-09-30", "This API is deprecated. Please use as `tf.nn.leaky_relu`.")
+def leaky_relu(x, alpha=0.2, name="leaky_relu"):
+    """leaky_relu can be used through its shortcut: :func:`tl.act.lrelu`.
+
+    This function is a modified version of ReLU, introducing a nonzero gradient for negative input. Introduced by the paper:
+    `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__
 
-    Modified version of ReLU, introducing a nonzero gradient for negative input.
+    The function return the following results:
+      - When x < 0: ``f(x) = alpha_low * x``.
+      - When x >= 0: ``f(x) = x``.
 
     Parameters
     ----------
@@ -55,6 +66,7 @@ def leaky_relu(x, alpha=0.1, name="lrelu"):
 
     Examples
     --------
+    >>> import tensorlayer as tl
     >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.lrelu(x, 0.2), name='dense')
 
     Returns
@@ -64,16 +76,122 @@ def leaky_relu(x, alpha=0.1, name="lrelu"):
 
     References
     ----------
-    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013)`
-       http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf
+    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__
 
     """
-    # with tf.name_scope(name) as scope:
-    # x = tf.nn.relu(x)
-    # m_x = tf.nn.relu(-x)
-    # x -= alpha * m_x
-    x = tf.maximum(x, alpha * x, name=name)
-    return x
+
+    if not (0 < alpha <= 1):
+        raise ValueError("`alpha` value must be in [0, 1]`")
+
+    with tf.name_scope(name, "leaky_relu") as name_scope:
+        x = tf.convert_to_tensor(x, name="features")
+        return tf.maximum(x, alpha * x, name=name_scope)
+
+
+def leaky_relu6(x, alpha=0.2, name="leaky_relu6"):
+    """:func:`leaky_relu6` can be used through its shortcut: :func:`tl.act.lrelu6`.
+
+    This activation function is a modified version :func:`leaky_relu` introduced by the following paper:
+    `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__
+
+    This activation function also follows the behaviour of the activation function :func:`tf.nn.relu6` introduced by the following paper:
+    `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] <http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf>`__
+
+    The function return the following results:
+      - When x < 0: ``f(x) = alpha_low * x``.
+      - When x in [0, 6]: ``f(x) = x``.
+      - When x > 6: ``f(x) = 6``.
+
+    Parameters
+    ----------
+    x : Tensor
+        Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, ``int16``, or ``int8``.
+    alpha : float
+        Slope.
+    name : str
+        The function name (optional).
+
+    Examples
+    --------
+    >>> import tensorlayer as tl
+    >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.leaky_relu6(x, 0.2), name='dense')
+
+    Returns
+    -------
+    Tensor
+        A ``Tensor`` in the same type as ``x``.
+
+    References
+    ----------
+    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__
+    - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] <http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf>`__
+    """
+
+    if not (0 < alpha <= 1):
+        raise ValueError("`alpha` value must be in [0, 1]`")
+
+    with tf.name_scope(name, "leaky_relu6") as name_scope:
+        x = tf.convert_to_tensor(x, name="features")
+        return tf.minimum(tf.maximum(x, alpha * x), 6, name=name_scope)
+
+
+def leaky_twice_relu6(x, alpha_low=0.2, alpha_high=0.2, name="leaky_relu6"):
+    """:func:`leaky_twice_relu6` can be used through its shortcut: :func:`:func:`tl.act.ltrelu6`.
+
+    This activation function is a modified version :func:`leaky_relu` introduced by the following paper:
+    `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__
+
+    This activation function also follows the behaviour of the activation function :func:`tf.nn.relu6` introduced by the following paper:
+    `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] <http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf>`__
+
+    This function push further the logic by adding `leaky` behaviour both below zero and above six.
+
+    The function return the following results:
+      - When x < 0: ``f(x) = alpha_low * x``.
+      - When x in [0, 6]: ``f(x) = x``.
+      - When x > 6: ``f(x) = 6 + (alpha_high * (x-6))``.
+
+    Parameters
+    ----------
+    x : Tensor
+        Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, ``int16``, or ``int8``.
+    alpha_low : float
+        Slope for x < 0: ``f(x) = alpha_low * x``.
+    alpha_high : float
+        Slope for x < 6: ``f(x) = 6 (alpha_high * (x-6))``.
+    name : str
+        The function name (optional).
+
+    Examples
+    --------
+    >>> import tensorlayer as tl
+    >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.leaky_twice_relu6(x, 0.2, 0.2), name='dense')
+
+    Returns
+    -------
+    Tensor
+        A ``Tensor`` in the same type as ``x``.
+
+    References
+    ----------
+    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__
+    - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] <http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf>`__
+
+    """
+
+    if not (0 < alpha_high <= 1):
+        raise ValueError("`alpha_high` value must be in [0, 1]`")
+
+    if not (0 < alpha_low <= 1):
+        raise ValueError("`alpha_low` value must be in [0, 1]`")
+
+    with tf.name_scope(name, "leaky_twice_relu6") as name_scope:
+        x = tf.convert_to_tensor(x, name="features")
+
+        x_is_above_0 = tf.minimum(x, 6 * (1 - alpha_high) + alpha_high * x)
+        x_is_below_0 = tf.minimum(alpha_low * x, 0)
+
+        return tf.maximum(x_is_above_0, x_is_below_0, name=name_scope)
 
 
 def swish(x, name='swish'):
@@ -219,4 +337,6 @@ def pixel_wise_softmax(x, name='pixel_wise_softmax'):
 
 # Alias
 lrelu = leaky_relu
+lrelu6 = leaky_relu6
+ltrelu6 = leaky_twice_relu6
 htanh = hard_tanh
diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py
@@ -31,8 +31,8 @@
 
 
 def cross_entropy(output, target, name=None):
-    """Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions, it implements
-    softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``.
+    """Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions,
+    it implements softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``.
 
     Parameters
     ----------
diff --git a/tensorlayer/layers/padding.py b/tensorlayer/layers/padding.py
@@ -125,7 +125,8 @@ def __init__(
         if not isinstance(padding, (int, tuple)):
             raise AssertionError("Padding should be of type `int` or `tuple`")
 
-        self.outputs = tf.keras.layers.ZeroPadding2D(padding=padding, name=name)(self.inputs)
+        self.outputs = tf.keras.layers.ZeroPadding2D(padding=padding, name=name)(self.inputs)  # TODO: Stop using Keras
+
         self._add_layers(self.outputs)
 
 
diff --git a/tensorlayer/layers/special_activation.py b/tensorlayer/layers/special_activation.py
diff --git a/tests/test_activations.py b/tests/test_activations.py