@@ -777,7 +777,7 @@ def _prepare_inputs(self) -> None:
777
777
output_padding_offset ,
778
778
) = pre_process (
779
779
self .share_inputs ["input_ids" ],
780
- self .share_inputs [ "seq_lens_this_time" ] ,
780
+ getattr ( self .share_inputs , "seq_lens_this_time" , self . seq_lens_this_time_buffer ) ,
781
781
self .speculative_decoding ,
782
782
(self .share_inputs ["draft_tokens" ] if self .speculative_decoding else None ),
783
783
self .share_inputs ["seq_lens_encoder" ],
@@ -861,7 +861,7 @@ def initialize_forward_meta(self):
861
861
max_len_tensor_cpu = self .share_inputs ["max_len_tensor_cpu" ],
862
862
seq_lens_encoder = self .share_inputs ["seq_lens_encoder" ],
863
863
seq_lens_decoder = self .share_inputs ["seq_lens_decoder" ],
864
- seq_lens_this_time = self .share_inputs [ "seq_lens_this_time" ] ,
864
+ seq_lens_this_time = getattr ( self .share_inputs , "seq_lens_this_time" , self . seq_lens_this_time_buffer ) ,
865
865
batch_id_per_token = self .share_inputs ["batch_id_per_token" ],
866
866
cu_seqlens_q = self .share_inputs ["cu_seqlens_q" ],
867
867
cu_seqlens_k = self .share_inputs ["cu_seqlens_k" ],
0 commit comments