Skip to content

Commit 1b6f482

Browse files
authored
[Cherry-pick] fix stop seq (#3263)
* fix out-bound value for stop sequence * catch error if there are out-of-bounds value * check in offline mode
1 parent 5d3bf30 commit 1b6f482

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed

fastdeploy/engine/engine.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,26 @@ def add_requests(self, task, sampling_params=None, **kwargs):
530530
llm_logger.error(error_msg)
531531
raise EngineError(error_msg, error_code=400)
532532

533+
if request.get("stop_seqs_len") is not None:
534+
stop_seqs_len = request.get("stop_seqs_len")
535+
max_stop_seqs_num = int(envs.FD_MAX_STOP_SEQS_NUM)
536+
if len(stop_seqs_len) > max_stop_seqs_num:
537+
error_msg = (
538+
f"Length of stop ({stop_seqs_len}) exceeds the limit max_model_len({max_stop_seqs_num})."
539+
"Please reduce the number of stop or set a lager max_stop_seqs_num by `FD_MAX_STOP_SEQS_NUM`"
540+
)
541+
llm_logger.error(error_msg)
542+
raise EngineError(error_msg, error_code=400)
543+
stop_seqs_max_len = int(envs.FD_STOP_SEQS_MAX_LEN)
544+
for single_stop_seq_len in stop_seqs_len:
545+
if single_stop_seq_len > stop_seqs_max_len:
546+
error_msg = (
547+
f"Length of stop_seqs({single_stop_seq_len}) exceeds the limit max_model_len({stop_seqs_max_len})."
548+
"Please reduce the length of stop sequences or set a larger stop_seqs_max_len by `FD_STOP_SEQS_MAX_LEN`"
549+
)
550+
llm_logger.error(error_msg)
551+
raise EngineError(error_msg, error_code=400)
552+
533553
if self.guided_decoding_checker is not None:
534554
request, err_msg = self.guided_decoding_checker.schema_format(request)
535555
if err_msg is not None:

fastdeploy/entrypoints/engine_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import numpy as np
2121

22+
from fastdeploy import envs
2223
from fastdeploy.input.preprocess import InputPreprocessor
2324
from fastdeploy.inter_communicator import IPCSignal, ZmqClient
2425
from fastdeploy.metrics.work_metrics import work_process_metrics
@@ -144,6 +145,26 @@ def add_requests(self, task):
144145
api_server_logger.error(error_msg)
145146
raise EngineError(error_msg, error_code=400)
146147

148+
if "stop_seqs_len" in task:
149+
stop_seqs_len = task["stop_seqs_len"]
150+
max_stop_seqs_num = int(envs.FD_MAX_STOP_SEQS_NUM)
151+
if len(stop_seqs_len) > max_stop_seqs_num:
152+
error_msg = (
153+
f"Length of stop ({stop_seqs_len}) exceeds the limit max_model_len({max_stop_seqs_num})."
154+
"Please reduce the number of stop or set a lager max_stop_seqs_num by `FD_MAX_STOP_SEQS_NUM`"
155+
)
156+
api_server_logger.error(error_msg)
157+
raise EngineError(error_msg, error_code=400)
158+
stop_seqs_max_len = int(envs.FD_STOP_SEQS_MAX_LEN)
159+
for single_stop_seq_len in stop_seqs_len:
160+
if single_stop_seq_len > stop_seqs_max_len:
161+
error_msg = (
162+
f"Length of stop_seqs({single_stop_seq_len}) exceeds the limit max_model_len({stop_seqs_max_len})."
163+
"Please reduce the length of stop sequences or set a larger stop_seqs_max_len by `FD_STOP_SEQS_MAX_LEN`"
164+
)
165+
api_server_logger.error(error_msg)
166+
raise EngineError(error_msg, error_code=400)
167+
147168
task["preprocess_end_time"] = time.time()
148169
preprocess_cost_time = task["preprocess_end_time"] - task["preprocess_start_time"]
149170
api_server_logger.info(

0 commit comments

Comments
 (0)