[Image Segmentation Function] dice and pixel-wise softmax

zsdonghao · zsdonghao · commit dea2a0ef8eb1 · 2016-10-28T16:56:16.000+01:00
diff --git a/docs/modules/activation.rst b/docs/modules/activation.rst
@@ -29,11 +29,26 @@ For more complex activation, TensorFlow API will be required.
    identity
    ramp
    leaky_relu
+   pixel_wise_softmax
 
 
 Activation functions
 ---------------------
 
+Identity
+^^^^^^^^^^
 .. autofunction:: identity
+
+Ramp
+^^^^^
 .. autofunction:: ramp
+
+Leaky Relu
+^^^^^^^^^^^
 .. autofunction:: leaky_relu
+
+
+Pixel-wise Softmax
+^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: pixel_wise_softmax
diff --git a/docs/modules/cost.rst b/docs/modules/cost.rst
@@ -110,6 +110,7 @@ to the cost function.
    cross_entropy
    binary_cross_entropy
    mean_squared_error
+   dice_coe
    cross_entropy_seq
    li_regularizer
    lo_regularizer
@@ -123,6 +124,7 @@ Cost functions
 .. autofunction:: cross_entropy
 .. autofunction:: binary_cross_entropy
 .. autofunction:: mean_squared_error
+.. autofunction:: dice_coe
 .. autofunction:: cross_entropy_seq
 
 
diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py
@@ -72,38 +72,31 @@ def leaky_relu(x=None, alpha=0.1, name="LeakyReLU"):
 #Shortcut
 lrelu = leaky_relu
 
-#
-# ## Alternatively we can use tl.layers.PReluLayer()
-# def prelu(x, channel_shared=False, W_init=tf.constant_initializer(value=0.0), W_init_args={}, restore=True, name="PReLU"):
-#     """ Parametric Rectified Linear Unit.
-#
-#     Parameters
-#     ----------
-#     x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
-#         `int16`, or `int8`.
-#     channel_shared : `bool`. Single weight is shared by all channels
-#     W_init: weights initializer, default zero constant.
-#         The initializer for initializing the alphas.
-#     restore : `bool`. Restore or not alphas
-#     name : A name for this activation op (optional).
-#
-#     Returns
-#     -------
-#     A `Tensor` with the same type as `x`.
-#
-#     References
-#     -----------
-#     - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification <http://arxiv.org/pdf/1502.01852v1.pdf>`_
-#     """
-#     print(' prelu: untested !!!')
-#     if channel_shared:
-#         w_shape = (1,)
-#     else:
-#         w_shape = int(x._shape[-1:])
-#
-#     with tf.name_scope(name) as scope:
-#         W_init = initializations.get(weights_init)()
-#         alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=W_init, **W_init_args )
-#         x = tf.nn.relu(x) + tf.mul(alphas, (x - tf.abs(x))) * 0.5
-#
-#     return x
+def pixel_wise_softmax(output, name='pixel_wise_softmax'):
+    """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1.
+    Usually be used for image segmentation.
+
+    Parameters
+    ------------
+    output : tensor
+        - For 2d image, 4D tensor [batch_size, height, weight, channel], channel >= 2.
+        - For 3d image, 5D tensor [batch_size, depth, height, weight, channel], channel >= 2.
+
+    Examples
+    ---------
+    >>> outputs = pixel_wise_softmax(network.outputs)
+    >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5)
+
+    References
+    -----------
+    - `tf.reverse <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#reverse>`_
+    """
+    with tf.name_scope(name) as scope:
+        exp_map = tf.exp(output)
+        if output.get_shape().ndims == 4:   # 2d image
+            evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, True]))
+        elif output.get_shape().ndims == 5: # 3d image
+            evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, False, True]))
+        else:
+            raise Exception("output parameters should be 2d or 3d image, not %s" % str(output._shape))
+        return tf.div(exp_map, evidence)
diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py
@@ -74,6 +74,42 @@ def mean_squared_error(output, target):
         mse = tf.reduce_sum(tf.squared_difference(output, target), reduction_indices = 1)
         return tf.reduce_mean(mse)
 
+
+
+def dice_coe(output, target, epsilon=1e-10):
+    """Sørensen–Dice coefficient for comparing the similarity of two distributions,
+    usually be used for binary image segmentation i.e. labels are binary.
+    The coefficient = [0, 1], 1 if totally match.
+
+    Parameters
+    -----------
+    output : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+    target : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+
+    Examples
+    ---------
+    >>> outputs = pixel_wise_softmax(network.outputs)
+    >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5)
+    
+    References
+    -----------
+    - `wiki-dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
+    """
+    # inse = tf.reduce_sum( tf.mul(output, target) )
+    # l = tf.reduce_sum( tf.mul(output, output) )
+    # r = tf.reduce_sum( tf.mul(target, target) )
+    inse = tf.reduce_sum( output * target )
+    l = tf.reduce_sum( output * output )
+    r = tf.reduce_sum( target * target )
+    dice = 2 * (inse) / (l + r)
+    if epsilon == 0:
+        return dice
+    else:
+        return tf.clip_by_value(dice, 0, 1.0-epsilon)
+
+
 def cross_entropy_seq(logits, target_seqs, batch_size=1, num_steps=None):
     """Returns the expression of cross-entropy of two sequences, implement
     softmax internally. Normally be used for Fixed Length RNN outputs.