File tree Expand file tree Collapse file tree 2 files changed +2
-10
lines changed
Expand file tree Collapse file tree 2 files changed +2
-10
lines changed Original file line number Diff line number Diff line change @@ -83,18 +83,11 @@ def __init__(
8383 gpu_memory_utilization = config .gpu_memory_utilization ,
8484 enable_chunked_prefill = config .enable_chunked_prefill ,
8585 # max_num_batched_tokens=256, # you can further set this parameter to reduce the vllm peak memory usage
86- override_generation_config = {
86+ override_generation_config = { # TODO: find a way to unittest this
8787 "temperature" : config .temperature ,
8888 "top_p" : config .top_p ,
8989 "top_k" : config .top_k ,
90- "max_tokens" : config .max_response_tokens ,
91- "min_tokens" : config .min_response_tokens ,
92- "truncate_prompt_tokens" : config .max_prompt_tokens ,
93- "skip_special_tokens" : True ,
94- "include_stop_str_in_output" : False ,
95- "output_kind" : RequestOutputKind .FINAL_ONLY ,
96- "logprobs" : config .logprobs ,
97- "ignore_eos" : config .ignore_eos ,
90+ "max_new_tokens" : config .max_response_tokens ,
9891 },
9992 disable_log_stats = True ,
10093 enable_lora = config .enable_lora ,
Original file line number Diff line number Diff line change @@ -48,7 +48,6 @@ def __init__(
4848 generate_kwargs = {
4949 "temperature" : self .task .rollout_args .temperature ,
5050 "top_p" : self .task .rollout_args .top_p ,
51- "top_k" : self .task .rollout_args .top_k ,
5251 "max_tokens" : self .task .rollout_args .max_tokens or 4096 ,
5352 "logprobs" : True ,
5453 "top_logprobs" : self .task .rollout_args .logprobs ,
You can’t perform that action at this time.
0 commit comments