PaddlePaddle · huismiling · Aug 28, 2023 · Aug 28, 2023
diff --git a/paddlenlp/ops/fast_transformer/src/fusion_unified_decoding_op.cc b/paddlenlp/ops/fast_transformer/src/fusion_unified_decoding_op.cc
@@ -66,7 +66,7 @@ std::vector<paddle::Tensor> UnifiedDecodingForward(
     const int& num_layer,
     const int& bos_id,
     const int& eos_id,
-    const int64_t& max_len,
+    const int& max_len,
     const float& beam_search_diversity_rate,
     const int& unk_id,
     const int& mask_id,
@@ -251,7 +251,7 @@ std::vector<std::vector<int64_t>> UnifiedDecodingInferShape(
     const int& num_layer,
     const int& bos_id,
     const int& eos_id,
-    const int64_t& max_len,
+    const int& max_len,
     const float& beam_search_diversity_rate,
     const int& unk_id,
     const int& mask_id,
@@ -397,7 +397,7 @@ PD_BUILD_OP(fusion_unified_decoding)
             "num_layer: int",
             "bos_id: int",
             "eos_id: int",
-            "max_len: int64_t",
+            "max_len: int",
             "beam_search_diversity_rate: float",
             "unk_id: int",
             "mask_id: int",

diff --git a/paddlenlp/transformers/unimo/modeling.py b/paddlenlp/transformers/unimo/modeling.py
@@ -104,7 +104,7 @@
                 if input_ids is not None:
                     num_pad = paddle.sum((input_ids == self.pad_token_id).astype("float32"), axis=-1, keepdim=True)
                     position_ids = F.relu(
-                        paddle.expand_as(paddle.arange(end=inputs_shape[1], dtype="int64"), inputs_shape) - num_pad
+                        paddle.expand_as(paddle.arange(end=inputs_shape[1], dtype="float32"), inputs_shape) - num_pad
                     ).astype("int64")
                 else:
                     logger.warning(
@@ -462,6 +462,7 @@
     def prepare_fast_entry(self, kwargs):
         from paddlenlp.ops import FasterMIRO, FasterUNIMOText
 
+        decoding_lib = kwargs.get("decoding_lib", None)
         use_fp16_decoding = kwargs.get("use_fp16_decoding", False)
         decode_strategy = kwargs.get("decode_strategy")
         if decode_strategy == "sampling" and kwargs.get("top_k") != 0 and kwargs.get("top_p") != 1:
@@ -480,9 +481,9 @@
             )
 
         if getattr(self.encoder, "norm", None) is None:
-            self._fast_entry = FasterUNIMOText(self, use_fp16_decoding=use_fp16_decoding).forward
+            self._fast_entry = FasterUNIMOText(self, use_fp16_decoding=use_fp16_decoding, decoding_lib=decoding_lib).forward
         else:
-            self._fast_entry = FasterMIRO(self, use_fp16_decoding=use_fp16_decoding).forward
+            self._fast_entry = FasterMIRO(self, use_fp16_decoding=use_fp16_decoding, decoding_lib=decoding_lib).forward
         return self._fast_entry
 
     def adjust_logits_during_generation(self, logits):