@@ -780,7 +780,7 @@ def _prepare_inputs(self) -> None:
780
780
output_padding_offset ,
781
781
) = pre_process (
782
782
self .share_inputs ["input_ids" ],
783
- getattr ( self .share_inputs , "seq_lens_this_time" , self . seq_lens_this_time_buffer ) ,
783
+ self .share_inputs [ "seq_lens_this_time" ] ,
784
784
self .speculative_decoding ,
785
785
(self .share_inputs ["draft_tokens" ] if self .speculative_decoding else None ),
786
786
self .share_inputs ["seq_lens_encoder" ],
@@ -864,7 +864,7 @@ def initialize_forward_meta(self):
864
864
max_len_tensor_cpu = self .share_inputs ["max_len_tensor_cpu" ],
865
865
seq_lens_encoder = self .share_inputs ["seq_lens_encoder" ],
866
866
seq_lens_decoder = self .share_inputs ["seq_lens_decoder" ],
867
- seq_lens_this_time = getattr ( self .share_inputs , "seq_lens_this_time" , self . seq_lens_this_time_buffer ) ,
867
+ seq_lens_this_time = self .share_inputs [ "seq_lens_this_time" ] ,
868
868
batch_id_per_token = self .share_inputs ["batch_id_per_token" ],
869
869
cu_seqlens_q = self .share_inputs ["cu_seqlens_q" ],
870
870
cu_seqlens_k = self .share_inputs ["cu_seqlens_k" ],
0 commit comments