@@ -266,7 +266,10 @@ def inference_stream_vllm(llm_engine: LLMEngine,
266266 resp_list [i ] = {'response' : '' , 'history' : history }
267267 continue
268268 input_ids = inputs ['input_ids' ]
269- llm_engine .add_request (str (i ), None , generation_config , input_ids , ** add_request_kwargs )
269+ if version .parse (vllm .__version__ ) >= version .parse ('0.4.3' ):
270+ llm_engine .add_request (str (i ), {'prompt_token_ids' : input_ids }, generation_config , ** add_request_kwargs )
271+ else :
272+ llm_engine .add_request (str (i ), None , generation_config , input_ids , ** add_request_kwargs )
270273
271274 print_idx_list = [[0 ] for _ in range (len (request_list ))]
272275 prog_bar = tqdm (total = len (request_list ), dynamic_ncols = True , disable = not use_tqdm )
@@ -353,7 +356,10 @@ def inference_vllm(llm_engine: LLMEngine,
353356 resp_list [i ] = {'response' : '' , 'history' : history }
354357 continue
355358 input_ids = inputs ['input_ids' ]
356- llm_engine .add_request (str (i ), None , generation_config , input_ids , ** add_request_kwargs )
359+ if version .parse (vllm .__version__ ) >= version .parse ('0.4.3' ):
360+ llm_engine .add_request (str (i ), {'prompt_token_ids' : input_ids }, generation_config , ** add_request_kwargs )
361+ else :
362+ llm_engine .add_request (str (i ), None , generation_config , input_ids , ** add_request_kwargs )
357363
358364 if use_tqdm is True :
359365 assert verbose is False
0 commit comments