tensorflow
diff --git a/‎official/nlp/modeling/ops/decoding_module.py‎
Lines changed: 289 additions & 0 deletions b/‎official/nlp/modeling/ops/decoding_module.py‎
Lines changed: 289 additions & 0 deletions
diff --git a/‎official/nlp/modeling/ops/decoding_module_test.py‎
Lines changed: 84 additions & 0 deletions b/‎official/nlp/modeling/ops/decoding_module_test.py‎
Lines changed: 84 additions & 0 deletions
@@ -0,0 +1,289 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base class for Decoding Strategies (beam_search, top_k, top_p and greedy)."""
+
+import abc
+from typing import Any, Callable, Dict, Tuple
+
+import tensorflow as tf
+
+from tensorflow.python.framework import dtypes
+
+Output = Tuple[tf.Tensor, tf.Tensor]
+InternalState = Tuple[tf.Tensor, tf.Tensor, tf.Tensor, Dict]
+InitialState = Tuple[Dict[str, Any], Dict[str, Any]]
+
+
+class StateKeys:
+  """Keys to dictionary storing the state of Decoding loop."""
+
+  # Variable storing the loop index.
+  CUR_INDEX = "CUR_INDEX"
+
+  # Top sequences that are alive for each batch item. Alive sequences are ones
+  # that have not generated an EOS token. Sequences that reach EOS are marked as
+  # finished and moved to the FINISHED_SEQ tensor.
+  # Has shape [batch_size, beam_size, CUR_INDEX + 1] for SequenceBeamSearch and
+  # [batch_size, CUR_INDEX + 1] otherwise.
+  ALIVE_SEQ = "ALIVE_SEQ"
+  # Log probabilities of each alive sequence. Shape [batch_size, beam_size]
+  ALIVE_LOG_PROBS = "ALIVE_LOG_PROBS"
+  # Dictionary of cached values for each alive sequence. The cache stores
+  # the encoder output, attention bias, and the decoder attention output from
+  # the previous iteration.
+  ALIVE_CACHE = "ALIVE_CACHE"
+
+  # Top finished sequences for each batch item.
+  # Has shape [batch_size, beam_size, CUR_INDEX + 1]. Sequences that are
+  # shorter than CUR_INDEX + 1 are padded with 0s.
+  FINISHED_SEQ = "FINISHED_SEQ"
+  # Scores for each finished sequence. Score = log probability / length norm
+  # Shape [batch_size, beam_size]
+  FINISHED_SCORES = "FINISHED_SCORES"
+  # Flags indicating which sequences in the finished sequences are finished.
+  # At the beginning, all of the sequences in FINISHED_SEQ are filler values.
+  # True -> finished sequence, False -> filler. Shape [batch_size, beam_size]
+  FINISHED_FLAGS = "FINISHED_FLAGS"
+
+
+class DecodingModule(tf.Module, metaclass=abc.ABCMeta):
+  """A base class for the API required for decoding (go/decoding-tf-nlp)."""
+
+  def __init__(self,
+               length_normalization_fn: Callable[[int, tf.DType], float],
+               dtype: tf.DType = tf.float32):
+    """Initialize the Decoding Module.
+
+    Args:
+      length_normalization_fn: Closure for returning length normalization
+      parameter. Function accepts input as length, dtype and returns float.
+      dtype: A tensorflow data type used for score computation. The default is
+        tf.float32.
+    """
+    self.length_normalization_fn = length_normalization_fn
+    self.dtype = tf.as_dtype(dtype)
+
+  def generate(self,
+               initial_ids: tf.Tensor,
+               initial_cache: Dict[str, tf.Tensor]) -> Output:
+    """Implements the decoding strategy (beam_search or sampling).
+
+    Args:
+      initial_ids: initial ids to pass into the symbols_to_logits_fn.
+                   int tensor with shape [batch_size, 1]
+      initial_cache: dictionary for caching model outputs from previous step.
+    Returns:
+      Tuple of tensors representing
+        finished_sequence: shape [batch, max_seq_length]
+        finished_scores: [batch]
+    """
+    batch_size = (
+        initial_ids.shape.as_list()[0]
+        if self.padded_decode else tf.shape(initial_ids)[0])
+
+    state, state_shapes = self._create_initial_state(initial_ids,
+                                                     initial_cache,
+                                                     batch_size)
+
+    def _generate_step(state):
+      topk_seq, topk_log_probs, topk_ids, new_cache = self._grow_alive_seq(
+          state, batch_size)
+      new_finished_flags = self._finished_flags(topk_ids, state)
+      alive_state = self._get_new_alive_state(topk_seq,
+                                              topk_log_probs,
+                                              new_finished_flags,
+                                              new_cache)
+      finished_state = self._get_new_finished_state(state,
+                                                    topk_seq,
+                                                    topk_log_probs,
+                                                    new_finished_flags,
+                                                    batch_size)
+      new_state = {
+          StateKeys.CUR_INDEX: state[StateKeys.CUR_INDEX] + 1
+      }
+      new_state.update(alive_state)
+      new_state.update(finished_state)
+      return [new_state]
+
+    finished_state = tf.nest.map_structure(
+        tf.stop_gradient,
+        tf.while_loop(
+            self._continue_search,
+            _generate_step,
+            loop_vars=[state],
+            shape_invariants=[state_shapes],
+            parallel_iterations=1))
+    final_state = self._process_finished_state(finished_state[0])
+    return final_state
+
+  @abc.abstractmethod
+  def _create_initial_state(self,
+                            initial_ids: tf.Tensor,
+                            initial_cache: Dict[str, tf.Tensor],
+                            batch_size: int) -> InitialState:
+    """Return initial state dictionary and its shape invariants."""
+    pass
+
+  @abc.abstractmethod
+  def _grow_alive_seq(self,
+                      state: Dict[str, Any],
+                      batch_size: int) -> InternalState:
+    """Grow alive sequences by one token.
+
+    Args:
+      state: A dictionary with the current loop state.
+      batch_size: The given batch size
+
+    Returns:
+      Tuple of
+      (Top sequences,
+       Scores of returned sequences,
+       New ids,
+       New alive cache)
+    """
+    pass
+
+  @abc.abstractmethod
+  def _get_new_alive_state(
+      self,
+      new_seq: tf.Tensor,
+      new_log_probs: tf.Tensor,
+      new_finished_flags: tf.Tensor,
+      new_cache: Dict[str, tf.Tensor]) -> Dict[str, Any]:
+    """Gather the sequences that are still alive.
+
+    Args:
+      new_seq: New sequences generated by growing the current alive sequences
+        int32 tensor with shape
+      new_log_probs: Log probabilities of new sequences float32 tensor with
+        shape
+      new_finished_flags: A boolean Tensor indicates which sequences are live.
+      new_cache: Dict of cached values for each sequence.
+
+    Returns:
+      Dictionary with alive keys from StateKeys.
+    """
+    pass
+
+  @abc.abstractmethod
+  def _get_new_finished_state(self,
+                              state: Dict[str, Any],
+                              new_seq: tf.Tensor,
+                              new_log_probs: tf.Tensor,
+                              new_finished_flags: tf.Tensor,
+                              batch_size: int) -> Dict[str, tf.Tensor]:
+    """Combine new and old finished sequences.
+
+    Args:
+      state: A dictionary with the current loop state.
+      new_seq: New sequences generated by growing the current alive sequences
+        int32 tensor.
+      new_log_probs: Log probabilities of new sequences float32 tensor with
+        shape.
+      new_finished_flags: A boolean Tensor indicates which sequences are live.
+      batch_size: The given batch size.
+
+    Returns:
+      Dictionary with finished keys from StateKeys.
+    """
+    pass
+
+  @abc.abstractmethod
+  def _process_finished_state(self, finished_state: Dict[str, Any]) -> Output:
+    """Process the alive/finished state to return final sequences and scores."""
+    pass
+
+  @abc.abstractmethod
+  def _continue_search(self, state: Dict[str, Any]) -> tf.Tensor:
+    """Returns a bool tensor if the decoding loop should continue."""
+    pass
+
+  @abc.abstractmethod
+  def _finished_flags(self,
+                      topk_ids: tf.Tensor,
+                      state: Dict[str, Any]) -> tf.Tensor:
+    """Calculate the finished flags."""
+    pass
+
+  def inf(self):
+    """Returns a value close to infinity, but is still finite in `dtype`.
+
+    This is useful to get a very large value that is still zero when multiplied
+    by zero. The floating-point "Inf" value is NaN when multiplied by zero.
+
+    Returns:
+      A very large value.
+    """
+    if self.dtype == dtypes.float32 or self.dtype == dtypes.bfloat16:
+      return 1e7
+    elif self.dtype == dtypes.float16:
+      return dtypes.float16.max
+    else:
+      raise AssertionError("Invalid dtype: %s" % self.dtype)
+
+  @staticmethod
+  def _log_prob_from_logits(logits):
+    return logits - tf.reduce_logsumexp(logits, axis=-1, keepdims=True)
+
+  @staticmethod
+  def _shape_list(tensor):
+    """Return a list of the tensor's shape, and ensure no None values in list."""
+    # Get statically known shape (may contain None's for unknown dimensions)
+    shape = tensor.get_shape().as_list()
+
+    # Ensure that the shape values are not None
+    dynamic_shape = tf.shape(tensor)
+    for i in range(len(shape)):  # pylint: disable=consider-using-enumerate
+      if shape[i] is None:
+        shape[i] = dynamic_shape[i]
+    return shape
+
+  @staticmethod
+  def _get_shape_keep_last_dim(tensor):
+    shape_list_obj = DecodingModule._shape_list(tensor)
+    for i in range(len(shape_list_obj) - 1):
+      shape_list_obj[i] = None
+
+    if isinstance(shape_list_obj[-1], tf.Tensor):
+      shape_list_obj[-1] = None
+    return tf.TensorShape(shape_list_obj)
+
+  @staticmethod
+  def _expand_to_same_rank(tensor, target):
+    """Expands a given tensor to target's rank to be broadcastable.
+
+    Args:
+      tensor: input tensor to tile. Shape: [b, d1, ..., da]
+      target: target tensor. Shape: [b, d1, ..., da, ..., dn]
+
+    Returns:
+      Tiled tensor of shape [b, d1, ..., da, 1, ..., 1] with same rank of target
+
+    Raises:
+      ValueError, if the shape rank of rank tensor/target is None.
+    """
+    if tensor.shape.rank is None:
+      raise ValueError("Expect rank for tensor shape, but got None.")
+    if target.shape.rank is None:
+      raise ValueError("Expect rank for target shape, but got None.")
+
+    with tf.name_scope("expand_rank"):
+      diff_rank = target.shape.rank - tensor.shape.rank
+      for _ in range(diff_rank):
+        tensor = tf.expand_dims(tensor, -1)
+      return tensor
+
+
+
@@ -0,0 +1,84 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test decoding utility methods."""
+
+import abc
+import tensorflow as tf
+
+from official.nlp.modeling.ops import decoding_module
+
+
+def length_normalization(length, dtype):
+  """Return length normalization factor."""
+  return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)
+
+
+class TestSubclass(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
+
+  def __init__(self,
+               length_normalization_fn=length_normalization,
+               dtype=tf.float32):
+    super(TestSubclass, self).__init__(
+        length_normalization_fn=length_normalization, dtype=dtype)
+
+  def _create_initial_state(self, initial_ids, initial_cache, batch_size):
+    pass
+
+  def _grow_alive_seq(self, state, batch_size):
+    pass
+
+  def _process_finished_state(self, finished_state):
+    pass
+
+  def _get_new_finished_state(self, state, new_seq, new_log_probs,
+                              new_finished_flags, batch_size):
+    pass
+
+  def _finished_flags(self, topk_ids, state):
+    pass
+
+  def _continue_search(self, state):
+    pass
+
+  def _get_new_alive_state(self, new_seq, new_log_probs, new_finished_flags,
+                           new_cache):
+    pass
+
+
+class DecodingModuleTest(tf.test.TestCase):
+
+  def test_get_shape_keep_last_dim(self):
+    y = tf.constant(4.0)
+    x = tf.ones([7, tf.cast(tf.sqrt(y), tf.int32), 2, 5])
+    shape = decoding_module.DecodingModule._get_shape_keep_last_dim(x)
+    self.assertAllEqual([None, None, None, 5], shape.as_list())
+
+  def test_shape_list(self):
+    x = tf.ones([7, 1])
+    shape = decoding_module.DecodingModule._shape_list(x)
+    self.assertAllEqual([7, 1], shape)
+
+  def test_inf(self):
+    d = TestSubclass()
+    inf_value = d.inf()
+    self.assertAllEqual(inf_value, tf.constant(10000000., tf.float32))
+
+  def test_length_normalization(self):
+    d = TestSubclass()
+    normalized_length = d.length_normalization_fn(32, tf.float32)
+    self.assertAllEqual(normalized_length, tf.constant(1.0, tf.float32))
+
+if __name__ == '__main__':
+  tf.test.main()