Fix torch end-to-end generation flow on GPU (#1122)

mattdangerw · mattdangerw · commit cd4dd0a70a8d · 2023-07-10T14:50:25.000-07:00
We don't have a good way to test this yet, till we add GPU testing, so
that will have to come later.
diff --git a/keras_nlp/models/bart/bart_seq_2_seq_lm_preprocessor.py b/keras_nlp/models/bart/bart_seq_2_seq_lm_preprocessor.py
@@ -20,6 +20,7 @@
 from absl import logging
 
 from keras_nlp.api_export import keras_nlp_export
+from keras_nlp.backend import ops
 from keras_nlp.models.bart.bart_preprocessor import BartPreprocessor
 from keras_nlp.models.bart.bart_presets import backbone_presets
 from keras_nlp.utils.keras_utils import (
@@ -267,6 +268,10 @@ def generate_postprocess(
             x["decoder_token_ids"],
             x["decoder_padding_mask"],
         )
+        if not isinstance(decoder_token_ids, tf.Tensor):
+            decoder_token_ids = ops.convert_to_numpy(decoder_token_ids)
+        if not isinstance(decoder_padding_mask, tf.Tensor):
+            decoder_padding_mask = ops.convert_to_numpy(decoder_padding_mask)
         # Strip any special tokens during detokenization, i.e., the start and
         # end markers. In the future, we could make this configurable.
         decoder_padding_mask = (
diff --git a/keras_nlp/models/gpt2/gpt2_causal_lm_preprocessor.py b/keras_nlp/models/gpt2/gpt2_causal_lm_preprocessor.py
@@ -18,6 +18,7 @@
 from absl import logging
 
 from keras_nlp.api_export import keras_nlp_export
+from keras_nlp.backend import ops
 from keras_nlp.models.gpt2.gpt2_preprocessor import GPT2Preprocessor
 from keras_nlp.utils.keras_utils import (
     convert_inputs_to_list_of_tensor_segments,
@@ -164,6 +165,10 @@ def generate_postprocess(
         back to a string.
         """
         token_ids, padding_mask = x["token_ids"], x["padding_mask"]
+        if not isinstance(token_ids, tf.Tensor):
+            token_ids = ops.convert_to_numpy(token_ids)
+        if not isinstance(padding_mask, tf.Tensor):
+            padding_mask = ops.convert_to_numpy(padding_mask)
         # Strip any special tokens during detokenization (e.g. the start and
         # end markers). In the future we could make this configurable.
         padding_mask = padding_mask & (token_ids != self.tokenizer.end_token_id)
diff --git a/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py b/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py
@@ -18,6 +18,7 @@
 from absl import logging
 
 from keras_nlp.api_export import keras_nlp_export
+from keras_nlp.backend import ops
 from keras_nlp.models.gpt_neo_x.gpt_neo_x_preprocessor import (
     GPTNeoXPreprocessor,
 )
@@ -132,6 +133,10 @@ def generate_postprocess(
         back to a string.
         """
         token_ids, padding_mask = x["token_ids"], x["padding_mask"]
+        if not isinstance(token_ids, tf.Tensor):
+            token_ids = ops.convert_to_numpy(token_ids)
+        if not isinstance(padding_mask, tf.Tensor):
+            padding_mask = ops.convert_to_numpy(padding_mask)
         # Strip any special tokens during detokenization (e.g. the start and
         # end markers). In the future we could make this configurable.
         padding_mask = padding_mask & (token_ids != self.tokenizer.end_token_id)
diff --git a/keras_nlp/models/opt/opt_causal_lm_preprocessor.py b/keras_nlp/models/opt/opt_causal_lm_preprocessor.py
@@ -18,6 +18,7 @@
 from absl import logging
 
 from keras_nlp.api_export import keras_nlp_export
+from keras_nlp.backend import ops
 from keras_nlp.models.opt.opt_preprocessor import OPTPreprocessor
 from keras_nlp.utils.keras_utils import (
     convert_inputs_to_list_of_tensor_segments,
@@ -165,6 +166,10 @@ def generate_postprocess(
         back to a string.
         """
         token_ids, padding_mask = x["token_ids"], x["padding_mask"]
+        if not isinstance(token_ids, tf.Tensor):
+            token_ids = ops.convert_to_numpy(token_ids)
+        if not isinstance(padding_mask, tf.Tensor):
+            padding_mask = ops.convert_to_numpy(padding_mask)
         # Strip any special tokens during detokenization (e.g. the start and
         # end markers). In the future we could make this configurable.
         padding_mask = padding_mask & (token_ids != self.tokenizer.end_token_id)