apply suggestions

chenyushuo · chenyushuo · commit 15fc4506dca2 · 2025-11-03T14:16:05.000+08:00
diff --git a/trinity/common/models/vllm_model.py b/trinity/common/models/vllm_model.py
@@ -83,18 +83,11 @@ def __init__(
             gpu_memory_utilization=config.gpu_memory_utilization,
             enable_chunked_prefill=config.enable_chunked_prefill,
             # max_num_batched_tokens=256, # you can further set this parameter to reduce the vllm peak memory usage
-            override_generation_config={
+            override_generation_config={  # TODO: find a way to unittest this
                 "temperature": config.temperature,
                 "top_p": config.top_p,
                 "top_k": config.top_k,
-                "max_tokens": config.max_response_tokens,
-                "min_tokens": config.min_response_tokens,
-                "truncate_prompt_tokens": config.max_prompt_tokens,
-                "skip_special_tokens": True,
-                "include_stop_str_in_output": False,
-                "output_kind": RequestOutputKind.FINAL_ONLY,
-                "logprobs": config.logprobs,
-                "ignore_eos": config.ignore_eos,
+                "max_new_tokens": config.max_response_tokens,
             },
             disable_log_stats=True,
             enable_lora=config.enable_lora,
diff --git a/trinity/common/workflows/agentscope_workflow.py b/trinity/common/workflows/agentscope_workflow.py
@@ -48,7 +48,6 @@ def __init__(
             generate_kwargs={
                 "temperature": self.task.rollout_args.temperature,
                 "top_p": self.task.rollout_args.top_p,
-                "top_k": self.task.rollout_args.top_k,
                 "max_tokens": self.task.rollout_args.max_tokens or 4096,
                 "logprobs": True,
                 "top_logprobs": self.task.rollout_args.logprobs,