Skip to content

Commit 15fc450

Browse files
committed
apply suggestions
1 parent 582f48a commit 15fc450

File tree

2 files changed

+2
-10
lines changed

2 files changed

+2
-10
lines changed

trinity/common/models/vllm_model.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,18 +83,11 @@ def __init__(
8383
gpu_memory_utilization=config.gpu_memory_utilization,
8484
enable_chunked_prefill=config.enable_chunked_prefill,
8585
# max_num_batched_tokens=256, # you can further set this parameter to reduce the vllm peak memory usage
86-
override_generation_config={
86+
override_generation_config={ # TODO: find a way to unittest this
8787
"temperature": config.temperature,
8888
"top_p": config.top_p,
8989
"top_k": config.top_k,
90-
"max_tokens": config.max_response_tokens,
91-
"min_tokens": config.min_response_tokens,
92-
"truncate_prompt_tokens": config.max_prompt_tokens,
93-
"skip_special_tokens": True,
94-
"include_stop_str_in_output": False,
95-
"output_kind": RequestOutputKind.FINAL_ONLY,
96-
"logprobs": config.logprobs,
97-
"ignore_eos": config.ignore_eos,
90+
"max_new_tokens": config.max_response_tokens,
9891
},
9992
disable_log_stats=True,
10093
enable_lora=config.enable_lora,

trinity/common/workflows/agentscope_workflow.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def __init__(
4848
generate_kwargs={
4949
"temperature": self.task.rollout_args.temperature,
5050
"top_p": self.task.rollout_args.top_p,
51-
"top_k": self.task.rollout_args.top_k,
5251
"max_tokens": self.task.rollout_args.max_tokens or 4096,
5352
"logprobs": True,
5453
"top_logprobs": self.task.rollout_args.logprobs,

0 commit comments

Comments
 (0)