Internal

Mesh TensorFlow Team · Mesh TensorFlow Team · commit acf624736647 · 2021-09-08T13:36:27.000-07:00
PiperOrigin-RevId: 395547787
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -14,7 +14,7 @@ jobs:
     - name: Install dependencies
       run: |
         pip install tf-nightly mock pytest
-        pip install -e .[auto_mtf,transformer]
+        pip install -e .[test,auto_mtf,transformer]
     - name: Test with pytest
       run: pytest
     # The below step just reports the success or failure of tests as a "commit status".
diff --git a/mesh_tensorflow/layers.py b/mesh_tensorflow/layers.py
@@ -947,6 +947,39 @@ def conv3d_transpose_with_blocks(
       variable_dtype, name)
 
 
+def corr(x, dim, epsilon=1e-20, name="pearson_correlation"):
+  """Compute correlation along dimension dim, equiv to tfp.stats.correlation.
+
+  It treats the dim Dimension as the random event axis, and all the other dims
+  as the sample axis. Pearson correlation is computed between random events in
+  dim Dimension, and marginalized over the other dims.
+
+  Example usage:
+    inputs = tf.random_normal([batch, channels])
+    mtf_inputs = mtf.import_tf_tensor(
+        mesh, inputs, shape=mtf.Shape([batch_dim, channels_dim]))
+    correlation = corr(mtf_inputs, dim=channels_dim)
+
+  Args:
+    x: a mtf.Tensor whose shape contains dim.
+    dim: a mtf.Dimension.
+    epsilon: a small floating point number for numerical stability.
+    name: a string used for tf.variable_scope.
+
+  Returns:
+    a mtf.Tensor with the shape of [dim, dim].
+  """
+  with tf.variable_scope(name):
+    mean = mtf.reduce_mean(x, output_shape=[dim])
+    dim_name = dim.name
+    x1 = mtf.rename_dimension(x - mean, dim_name, f"{dim_name}_1")
+    x2 = mtf.rename_dimension(x - mean, dim_name, f"{dim_name}_2")
+    variance = lambda z: mtf.sqrt(  # pylint: disable=g-long-lambda
+        mtf.reduce_sum(mtf.square(z), output_shape=z.shape.dims[-1:])) + epsilon
+    v1, v2 = variance(x1), variance(x2)
+    return mtf.matmul(x1, x2) / mtf.matmul(v1, v2)
+
+
 def layer_norm(x, dim, epsilon=1e-6, name="layer_prepostprocess"):
   """Layer normalization over dimension dim.
 
diff --git a/mesh_tensorflow/layers_test.py b/mesh_tensorflow/layers_test.py
@@ -20,13 +20,13 @@
 from __future__ import print_function
 
 from absl.testing import parameterized
-
 import mesh_tensorflow as mtf
 from mesh_tensorflow import test_utils
 import mock
 import numpy as np
-
 import tensorflow.compat.v1 as tf
+import tensorflow_probability as tfp
+
 from tensorflow.python.framework import test_util  # pylint:disable=g-direct-tensorflow-import
 
 
@@ -85,6 +85,68 @@ def testDense(self, units, use_bias, new_dim_name):
 
     self.assertEqual(actual.shape, expected.shape)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testCorr2DInput(self):
+    batch = 4
+    channels = 3
+    inputs = tf.random_normal([batch, channels])
+
+    graph = mtf.Graph()
+    mesh = mtf.Mesh(graph, "my_mesh")
+    batch_dim = mtf.Dimension("batch", batch)
+    channels_dim = mtf.Dimension("channels", channels)
+
+    mtf_inputs = mtf.import_tf_tensor(
+        mesh, inputs, shape=mtf.Shape([batch_dim, channels_dim]))
+    mtf_outputs = mtf.layers.corr(mtf_inputs, dim=channels_dim)
+    mesh_impl = mtf.placement_mesh_impl.PlacementMeshImpl(
+        shape=[], layout={}, devices=[""])
+    lowering = mtf.Lowering(graph, {mesh: mesh_impl})
+    actual_outputs = lowering.export_to_tf_tensor(mtf_outputs)
+
+    expected_outputs = tfp.stats.correlation(
+        inputs, sample_axis=0, event_axis=1)
+    tf_group = lowering.copy_masters_to_slices()
+    init = tf.global_variables_initializer()
+    self.evaluate(init)
+    self.evaluate(tf_group)
+    actual, expected = self.evaluate([actual_outputs, expected_outputs])
+
+    self.assertEqual(actual.shape, expected.shape)
+    self.assertAllClose(actual, expected)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testCorr3DInput(self):
+    batch = 4
+    sequence = 5
+    channels = 3
+    inputs = tf.random_normal([batch, sequence, channels])
+
+    graph = mtf.Graph()
+    mesh = mtf.Mesh(graph, "my_mesh")
+    batch_dim = mtf.Dimension("batch", batch)
+    seq_dim = mtf.Dimension("seq", sequence)
+    channels_dim = mtf.Dimension("channels", channels)
+
+    mtf_inputs = mtf.import_tf_tensor(
+        mesh, inputs, shape=mtf.Shape([batch_dim, seq_dim, channels_dim]))
+    mtf_outputs = mtf.layers.corr(mtf_inputs, dim=channels_dim)
+    mesh_impl = mtf.placement_mesh_impl.PlacementMeshImpl(
+        shape=[], layout={}, devices=[""])
+    lowering = mtf.Lowering(graph, {mesh: mesh_impl})
+    actual_outputs = lowering.export_to_tf_tensor(mtf_outputs)
+
+    expected_outputs = tfp.stats.correlation(
+        inputs, sample_axis=[0, 1], event_axis=2)
+    tf_group = lowering.copy_masters_to_slices()
+    init = tf.global_variables_initializer()
+    self.evaluate(init)
+    self.evaluate(tf_group)
+    actual, expected = self.evaluate([actual_outputs, expected_outputs])
+
+    self.assertEqual(actual.shape, expected.shape)
+    self.assertAllClose(actual, expected)
+
   @test_util.run_in_graph_and_eager_modes()
   def testLayerNorm(self):
     batch = 2
diff --git a/setup.py b/setup.py
@@ -27,6 +27,7 @@
         'auto_mtf': ['ortools'],
         'tensorflow': ['tensorflow>=1.15.0'],
         'transformer': ['tensorflow-datasets', 'scipy'],
+        'test': ['tensorflow_probability']
     },
     tests_require=[
         'ortools',