1919from lightllm .utils .dist_utils import init_distributed_env
2020from lightllm .utils .envs_utils import get_unique_server_name
2121from lightllm .server .core .objs import ShmReqManager , StartArgs
22- from lightllm .server .core .objs .io_objs import AbortedReqCmd
22+ from lightllm .server .core .objs .io_objs import AbortedReqCmd , StopStrMatchedReqCmd
2323from lightllm .server .router .model_infer .infer_batch import g_infer_context
2424from lightllm .server .router .model_infer .pin_mem_manager import g_pin_mem_manager
2525from lightllm .utils .dist_utils import get_global_rank , get_global_world_size , get_dp_size
2626from lightllm .utils .dist_utils import get_dp_world_size , get_global_dp_rank , get_current_rank_in_dp
2727from lightllm .utils .dist_utils import get_current_device_id , get_current_rank_in_node , get_node_world_size
2828from lightllm .utils .dist_utils import get_dp_rank_in_node , create_new_group_for_current_node
29- from lightllm .utils .envs_utils import get_env_start_args , enable_stop_string_match
29+ from lightllm .utils .envs_utils import get_env_start_args
3030from lightllm .distributed import dist_group_manager
3131from lightllm .server .router .shm_reqs_io_buffer import ShmReqsIOBuffer
3232from lightllm .server .router .model_infer .mode_backend .overlap_events import OverlapEventManager , OverlapEventPack
3333from lightllm .models .deepseek_mtp .model import Deepseek3MTPModel
34- from lightllm .server .tokenizer import get_tokenizer
3534
3635
3736class ModeBackend :
@@ -322,6 +321,12 @@ def _read_reqs_buffer_and_init_reqs(self):
322321 if obj .req_id in g_infer_context .requests_mapping :
323322 req : InferReq = g_infer_context .requests_mapping [obj .req_id ]
324323 req .infer_aborted = True
324+ elif isinstance (cmds [0 ], StopStrMatchedReqCmd ):
325+ for obj in cmds :
326+ obj : StopStrMatchedReqCmd = obj
327+ if obj .req_id in g_infer_context .requests_mapping :
328+ req : InferReq = g_infer_context .requests_mapping [obj .req_id ]
329+ req .infer_aborted = True
325330 else :
326331 self ._init_reqs (reqs = cmds )
327332 return
@@ -507,14 +512,6 @@ def _post_handle(
507512 extra_post_req_handle_func 用于提供在一个请求确定输出的时候,给出额外的后处理操作,主要是用于
508513 约束输出等模式,设置自己请求内部的状态机的状态,并添加额外的停止判定条件等。
509514 """
510- if enable_stop_string_match ():
511- if not hasattr (self , "tokenizer" ):
512- self .tokenizer = get_tokenizer (
513- self .args .model_dir , self .args .tokenizer_mode , trust_remote_code = self .args .trust_remote_code
514- )
515- else :
516- self .tokenizer = None
517-
518515 for req_obj , next_token_id , next_token_logprob , pack in zip (
519516 run_reqs , next_token_ids , next_token_logprobs , run_reqs_update_packs
520517 ):
@@ -526,7 +523,6 @@ def _post_handle(
526523 eos_ids = self .eos_id ,
527524 extra_post_req_handle_func = extra_post_req_handle_func ,
528525 is_master_in_dp = self .is_master_in_dp ,
529- tokenizer = self .tokenizer ,
530526 )
531527
532528 g_infer_context .req_manager .req_sampling_params_manager .update_reqs_token_counter (
0 commit comments