@@ -90,7 +90,7 @@ def overlap_prefill(
9090 b_seq_len = _0_b_seq_len ,
9191 is_prefill = True ,
9292 b_ready_cache_len = _o_b_ready_cache_len ,
93- multimodal_params = {} ,
93+ multimodal_params = [{ "images" : [], "audios" : []} for _ in range ( _0_batch_size )] ,
9494 mem_indexes_cpu = _0_mem_indexes ,
9595 )
9696
@@ -114,7 +114,7 @@ def overlap_prefill(
114114 b_seq_len = _1_b_seq_len ,
115115 is_prefill = True ,
116116 b_ready_cache_len = _1_b_ready_cache_len ,
117- multimodal_params = {} ,
117+ multimodal_params = [{ "images" : [], "audios" : []} for _ in range ( _1_batch_size )] ,
118118 mem_indexes_cpu = _1_mem_indexes ,
119119 )
120120
@@ -144,6 +144,7 @@ def overlap_decode(
144144 b_mtp_index = _0_b_mtp_index ,
145145 b_seq_len = _0_b_seq_len ,
146146 mem_indexes_cpu = _0_mem_indexes ,
147+ multimodal_params = [{"images" : [], "audios" : []} for _ in range (_0_batch_size )],
147148 )
148149
149150 _1_batch_size = batch_size - batch_size // 2
@@ -164,6 +165,7 @@ def overlap_decode(
164165 b_mtp_index = _1_b_mtp_index ,
165166 b_seq_len = _1_b_seq_len ,
166167 mem_indexes_cpu = _1_mem_indexes ,
168+ multimodal_params = [{"images" : [], "audios" : []} for _ in range (_1_batch_size )],
167169 )
168170
169171 output , output1 = model_part .microbatch_overlap_decode (micro_batch1 , micro_batch2 )
@@ -202,6 +204,7 @@ def prefill(
202204 b_ready_cache_len = b_ready_cache_len , # b_ready_cache_len
203205 b_prefill_start_loc = b_prefill_start_loc ,
204206 prefix_total_token_num = 0 , # the default kvcache len is zero.
207+ multimodal_params = [{"images" : [], "audios" : []} for _ in range (batch_size )],
205208 )
206209
207210 model_output = model_part .forward (model_input )
@@ -223,6 +226,7 @@ def decode(
223226 b_mtp_index = b_mtp_index ,
224227 mem_indexes_cpu = mem_indexes ,
225228 is_prefill = False ,
229+ multimodal_params = [{"images" : [], "audios" : []} for _ in range (batch_size )],
226230 )
227231 model_output = model_part .forward (model_input )
228232 return model_output .logits
0 commit comments