Skip to content

Commit dcf633c

Browse files
authored
delete default value reasoning_max_tokens (#4250)
* delete default value reasoning_max_tokens * Adjust max_tokens and reasoning_max_tokens logic
1 parent 213f15e commit dcf633c

File tree

3 files changed

+4
-5
lines changed

3 files changed

+4
-5
lines changed

fastdeploy/engine/engine.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,6 @@ def add_requests(self, task, sampling_params=None, **kwargs):
210210
request.get("max_tokens"),
211211
),
212212
)
213-
if request.get("reasoning_max_tokens") is None:
214-
default_reasoning_max_tokens = max(int(request.get("max_tokens") * 0.8), 1)
215-
request.set("reasoning_max_tokens", default_reasoning_max_tokens)
216213
min_tokens = request.get("min_tokens")
217214
if input_ids_len + min_tokens >= self.cfg.max_model_len:
218215
error_msg = (

fastdeploy/engine/sampling_params.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,6 @@ def from_optional(
159159
def __post_init__(self):
160160
if self.seed is None:
161161
self.seed = random.randint(0, 922337203685477580)
162-
if self.max_tokens is not None and self.reasoning_max_tokens is None:
163-
self.reasoning_max_tokens = max(int(self.max_tokens * 0.8), 1)
164162
self._verify_args()
165163

166164
def _verify_args(self) -> None:

fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,10 @@ def process_request_dict(self, request, max_model_len=None):
255255
request["prompt_token_ids"] = request["prompt_token_ids"][: max_model_len - 1]
256256
if request.get("max_tokens") is None:
257257
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"]))
258+
else:
259+
request["max_tokens"] = min(max_model_len - len(request["prompt_token_ids"]), request["max_tokens"])
260+
if request.get("reasoning_max_tokens") is None:
261+
request["reasoning_max_tokens"] = max(int(request["max_tokens"] * 0.8), 1)
258262
data_processor_logger.info(f"Processed request {request}")
259263

260264
return request

0 commit comments

Comments
 (0)