Extended tf_utils.map_per_key_reductions to map reductions per-key and element-wise, for n-dim Dense Tensors.

tf-transform-team · tfx-copybara · commit 04064f3acff6 · 2022-06-17T13:17:29.000-07:00
PiperOrigin-RevId: 455681807
diff --git a/tensorflow_transform/mappers.py b/tensorflow_transform/mappers.py
@@ -379,7 +379,7 @@ def _scale_by_min_max_internal(
       # Missing keys will translate to 0 for both min and max which will be
       # ignored below in the tf.where.
       min_x_value, max_x_value = tf_utils.map_per_key_reductions(
-          (min_x_value, max_x_value), key, key_vocab, x)
+          (min_x_value, max_x_value), key, key_vocab, x, not elementwise)
     else:
       minus_min_max_for_key = tf_utils.apply_per_key_vocabulary(
           key_values, key, target_ndims=x.get_shape().ndims)
@@ -626,8 +626,8 @@ def _scale_to_z_score_internal(
       # Missing keys will translate to 0 for both mean and var which will be
       # ignored below in the tf.where.
       key_vocab, key_means, key_vars = mean_and_var_per_key_result
-      x_mean, x_var = tf_utils.map_per_key_reductions((key_means, key_vars),
-                                                      key, key_vocab, x)
+      x_mean, x_var = tf_utils.map_per_key_reductions(
+          (key_means, key_vars), key, key_vocab, x, not elementwise)
     else:
       mean_var_for_key = tf_utils.apply_per_key_vocabulary(
           mean_and_var_per_key_result, key, target_ndims=x.get_shape().ndims)
diff --git a/tensorflow_transform/tf_utils.py b/tensorflow_transform/tf_utils.py
@@ -1234,18 +1234,20 @@ def _align_dims(tensor: tf.Tensor, target_ndims: int) -> tf.Tensor:
   return tensor
 
 
-def map_per_key_reductions(
-    tensors_to_map: Tuple[tf.Tensor, ...], key: common_types.TensorType,
-    key_vocab: tf.Tensor,
-    original_input: common_types.TensorType) -> Tuple[tf.Tensor, ...]:
+def map_per_key_reductions(tensors_to_map: Tuple[tf.Tensor, ...],
+                           key: common_types.TensorType, key_vocab: tf.Tensor,
+                           original_input: common_types.TensorType,
+                           reduce_instance_dims: bool) -> Tuple[tf.Tensor, ...]:
   """Rearrange the reduced per-key result to correspond to the original keys.
 
   Args:
     tensors_to_map: A tuple of 1-D `Tensor`s that are same shape as key_vocab,
-        to be mapped to respective key.
+      to be mapped to respective key.
     key: A `Tensor` or `CompositeTensor`.
     key_vocab: A 1-D `Tensor`.
     original_input: A `Tensor` or `CompositeTensor`.
+    reduce_instance_dims: A `bool`. True if tensors_to_map are reduced in
+      dimension, else False.
 
   Returns:
     A tuple same length as tensors_to_map, of `Tensor`s the same dimension as
@@ -1262,17 +1264,22 @@ def map_per_key_reductions(
                               (tf.SparseTensor, tf.RaggedTensor)) else
            original_input.get_shape().ndims)
 
-  # Append a 0 to allow mapping OOVs to it.
-  tensors_to_map = [tf.concat([t, [0]], axis=0) for t in tensors_to_map]
+  # Append 0s to allow mapping OOVs to it.
+  tensors_to_map = [
+      tf.concat([t, tf.expand_dims(tf.zeros_like(t[0]), 0)], axis=0)
+      for t in tensors_to_map
+  ]
 
   # Replace `-1`s due to OOV with size of key_vocab.
   adjusted_indices = tf.where(
       key_indices >= 0, key_indices,
       tf.cast(
           tf.fill(tf.shape(key_indices), tf.size(key_vocab)), dtype=tf.int64))
-
-  mapped_result = [_align_dims(tf.gather(t, adjusted_indices, axis=-1), ndims)
-                   for t in tensors_to_map]
+  axis = -1 if reduce_instance_dims else 0
+  mapped_result = [
+      _align_dims(tf.gather(t, adjusted_indices, axis=axis), ndims)
+      for t in tensors_to_map
+  ]
 
   return tuple(mapped_result)
 
diff --git a/tensorflow_transform/tf_utils_test.py b/tensorflow_transform/tf_utils_test.py
@@ -1942,6 +1942,7 @@ def test_convert_ragged_indices(self):
           key_vocab=['a', 'b'],
           reductions=([1, 2], [3, 4]),
           x=[5, 6, 7],
+          reduce_instance_dims=True,
           expected_results=([2, 1, 2], [4, 3, 4])),
       dict(
           testcase_name='sparse_tensor_dense_key',
@@ -1952,6 +1953,7 @@ def test_convert_ragged_indices(self):
               indices=[[0, 0], [1, 2], [2, 2], [2, 3]],
               values=[3, 2, -1, 3],
               dense_shape=[3, 5]),
+          reduce_instance_dims=True,
           expected_results=([2, 1, 2, 2], [4, 3, 4, 4])),
       dict(
           testcase_name='sparse_tensor_sparse_key',
@@ -1965,6 +1967,7 @@ def test_convert_ragged_indices(self):
               indices=[[0, 0], [1, 2], [2, 2], [2, 3]],
               values=[3, 2, -1, 3],
               dense_shape=[3, 5]),
+          reduce_instance_dims=True,
           expected_results=([2, 1, 2, 2], [4, 3, 4, 4])),
       dict(
           testcase_name='ragged_tensor_dense_key',
@@ -1976,6 +1979,7 @@ def test_convert_ragged_indices(self):
                   values=np.array([1.2, 1., 1.2, 1.]),
                   row_splits=np.array([0, 2, 4])),
               row_splits=np.array([0, 1, 2, 2])),
+          reduce_instance_dims=True,
           expected_results=([1, 1, 2, 2], [3, 3, 4, 4])),
       dict(
           testcase_name='ragged_tensor_ragged_key',
@@ -1991,24 +1995,45 @@ def test_convert_ragged_indices(self):
                   values=np.array([1.2, 1., 1.2, 1.]),
                   row_splits=np.array([0, 2, 4])),
               row_splits=np.array([0, 2])),
+          reduce_instance_dims=True,
           expected_results=([1, 2, 2, 1], [3, 4, 4, 3])),
       dict(
           testcase_name='missing_key',
           key=['b', 'a', 'c'],
           key_vocab=['z', 'a', 'b'],
           reductions=([-77, 1, 2], [-99, 3, 4]),
           x=[5, 6, 7],
+          reduce_instance_dims=True,
           expected_results=([2, 1, 0], [4, 3, 0])),
+      dict(
+          testcase_name='_dense_tensor_2d_elementwise',
+          key=['a'],
+          key_vocab=['a', 'b'],
+          reductions=([[1, 5], [-2, 0]], [[5, 9], [2, 4]]),
+          x=[[4, 8]],
+          reduce_instance_dims=False,
+          expected_results=([[1, 5]], [[5, 9]])),
+      dict(
+          testcase_name='_dense_tensor_3d_elementwise',
+          key=['a'],
+          key_vocab=['a', 'b'],
+          reductions=([[[1, 1], [1, 1]], [[3, -3], [3, 3]]], [[[5, 5], [5, 5]],
+                                                              [[3, -3], [3,
+                                                                         3]]]),
+          x=[[[1, 5], [1, 1]]],
+          reduce_instance_dims=False,
+          expected_results=([[[1, 1], [1, 1]]], [[[5, 5], [5, 5]]])),
   )
-  def test_map_per_key_reductions(
-      self, key, key_vocab, reductions, x, expected_results):
+  def test_map_per_key_reductions(self, key, key_vocab, reductions, x,
+                                  reduce_instance_dims, expected_results):
     with tf.compat.v1.Graph().as_default():
       key = _value_to_tensor(key)
       key_vocab = tf.constant(key_vocab)
       reductions = tuple([tf.constant(t) for t in reductions])
       x = _value_to_tensor(x)
       expected_results = tuple(tf.constant(t) for t in expected_results)
-      results = tf_utils.map_per_key_reductions(reductions, key, key_vocab, x)
+      results = tf_utils.map_per_key_reductions(reductions, key, key_vocab, x,
+                                                reduce_instance_dims)
       with tf.compat.v1.Session() as sess:
         sess.run(tf.compat.v1.tables_initializer())
         output = sess.run(results)