Internal change

Jiayu Ye · tensorflower-gardener · commit 326930f3bc4a · 2023-05-12T09:38:18.000-07:00
PiperOrigin-RevId: 531529187
diff --git a/official/nlp/modeling/networks/funnel_transformer.py b/official/nlp/modeling/networks/funnel_transformer.py
@@ -452,6 +452,7 @@ def __init__(
 
   def call(self, inputs, output_range: Optional[tf.Tensor] = None):
     # inputs are [word_ids, mask, type_ids]
+    word_embeddings = None
     if isinstance(inputs, (list, tuple)):
       logging.warning('List inputs to  %s are discouraged.', self.__class__)
       if len(inputs) == 3:
@@ -472,14 +473,16 @@ def call(self, inputs, output_range: Optional[tf.Tensor] = None):
       word_ids = inputs.get('input_word_ids')
       mask = inputs.get('input_mask')
       type_ids = inputs.get('input_type_ids')
+      word_embeddings = inputs.get('input_word_embeddings', None)
 
       dense_inputs = inputs.get('dense_inputs', None)
       dense_mask = inputs.get('dense_mask', None)
       dense_type_ids = inputs.get('dense_type_ids', None)
     else:
       raise ValueError('Unexpected inputs type to %s.' % self.__class__)
 
-    word_embeddings = self._embedding_layer(word_ids)
+    if word_embeddings is None:
+      word_embeddings = self._embedding_layer(word_ids)
 
     if dense_inputs is not None:
       # Concat the dense embeddings at sequence begin so unpool_len can control
diff --git a/official/nlp/modeling/networks/funnel_transformer_test.py b/official/nlp/modeling/networks/funnel_transformer_test.py
@@ -320,6 +320,43 @@ def test_network_invocation(self, output_range, out_seq_len, unpool_length):
     self.assertEqual(outputs[0].shape[-1], hidden_size)
     self.assertTrue(hasattr(test_network, "_embedding_projection"))
 
+  def test_embeddings_as_inputs(self):
+    hidden_size = 32
+    sequence_length = 21
+    # Create a small BertEncoder for testing.
+    test_network = funnel_transformer.FunnelTransformerEncoder(
+        vocab_size=100,
+        hidden_size=hidden_size,
+        num_attention_heads=2,
+        num_layers=3,
+        pool_stride=2,
+    )
+    # Create the inputs (note that the first dimension is implicit).
+    word_ids = tf.keras.Input(shape=(sequence_length), dtype=tf.int32)
+    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
+    type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
+    test_network.build(
+        dict(input_word_ids=word_ids, input_mask=mask, input_type_ids=type_ids)
+    )
+    embeddings = test_network.get_embedding_layer()(word_ids)
+    # Calls with the embeddings.
+    dict_outputs = test_network(
+        dict(
+            input_word_embeddings=embeddings,
+            input_mask=mask,
+            input_type_ids=type_ids,
+        )
+    )
+    all_encoder_outputs = dict_outputs["encoder_outputs"]
+    pooled = dict_outputs["pooled_output"]
+
+    expected_pooled_shape = [None, hidden_size]
+    self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list())
+
+    # The default output dtype is float32.
+    self.assertAllEqual(tf.float32, all_encoder_outputs[-1].dtype)
+    self.assertAllEqual(tf.float32, pooled.dtype)
+
   def test_serialize_deserialize(self):
     # Create a network object that sets all of its config options.
     kwargs = dict(