fix

hiworldwzj · hiworldwzj · commit 817a24abcd77 · 2025-08-20T08:39:06.000Z
diff --git a/lightllm/server/detokenization/decode_req.py b/lightllm/server/detokenization/decode_req.py
@@ -1,6 +1,10 @@
 import os
 from typing import List, Dict
 from lightllm.server.core.objs import Req
+from lightllm.utils.log_utils import init_logger
+
+logger = init_logger(__name__)
+
 
 LIGHTLLM_DECODE_PREFIX_LENGTH = int(os.getenv("LIGHTLLM_DECODE_PREFIX_LENGTH", 5))
 
@@ -15,6 +19,7 @@ def __init__(
         self.group_req_id = req.group_req_id
         self.prompt_ids = req.shm_prompt_ids.arr[0 : req.input_len].tolist()
         self.output_ids = []
+        self.output_strs = []
         self.prefix_offset = max(len(self.prompt_ids) - LIGHTLLM_DECODE_PREFIX_LENGTH, 0)
 
         if is_pd_decode_mode:
@@ -26,6 +31,8 @@ def __init__(
         self.req = req
         self.input_len = self.req.input_len
         self.prefix_str = ""
+        self.stop_strs: List[str] = self.req.sample_params.stop_sequences.to_strings()
+        self.stop_str_max_len = max([len(e) for e in self.stop_strs])
 
     def init_token_healing_prefix_str(self, token_id_to_token: Dict[int, str], tokenizer):
         tokens = [token_id_to_token[token_id] for token_id in self.req.prefix_token_ids.get_token_ids()]
@@ -35,6 +42,24 @@ def init_token_healing_prefix_str(self, token_id_to_token: Dict[int, str], token
             self.prefix_str = ""
         return
 
+    def stop_sequences_str_match(self) -> bool:
+        stop_strs = self.stop_strs
+        if not stop_strs or self.stop_str_max_len == 0:
+            return False
+
+        tail_token_len = self.stop_str_max_len + 10  # 10 for safety
+        tail_token_strs = self.output_strs[-tail_token_len:]
+        tail_str = "".join(tail_token_strs)
+
+        for stop_str in stop_strs:
+            if stop_str in tail_str:
+                logger.info(
+                    f"req_id {self.request_id} Found stop sequence in tail: stop_str='{stop_str}', "
+                    f"tail_str='{tail_str}'"
+                )
+                return True
+        return False
+
     def need_detoken(self):
         if (
             (not self.req.is_aborted)
diff --git a/lightllm/server/detokenization/manager.py b/lightllm/server/detokenization/manager.py
@@ -101,32 +101,6 @@ def handle_loop(self):
             logger.exception(f"detoken process has exception {str(e)}")
         return
 
-    def _stop_sequences_str_matched(self, decode_req, tokenizer):
-        stop_sequences_str = (
-            decode_req.req.sample_params.stop_sequences.to_string()
-            if decode_req.req.sample_params.stop_sequences
-            else []
-        )
-        if not stop_sequences_str or tokenizer is None:
-            return False
-
-        max_stop_str_len = max(len(stop_str) for stop_str in stop_sequences_str) if stop_sequences_str else 0
-        if max_stop_str_len == 0:
-            return False
-
-        output_len = len(decode_req.output_ids)
-        tail_token_len = min(decode_req.req.input_len + output_len, max_stop_str_len + 10)  # +10 for safety
-        if tail_token_len > 0:
-            tail_token_ids = decode_req.req.shm_prompt_ids.arr[
-                (decode_req.req.input_len + output_len - tail_token_len) : (decode_req.req.input_len + output_len)
-            ]
-            tail_str = tokenizer.decode(tail_token_ids, skip_special_tokens=False)
-            for stop_str in stop_sequences_str:
-                if stop_str in tail_str:
-                    logger.info(f"Found stop sequence in tail: stop_str='{stop_str}', " f"tail_str='{tail_str}'")
-                    return True
-        return False
-
     def gen_token_out(self):
         exist_need_detoken = False
         exist_decode = False
@@ -161,10 +135,10 @@ def gen_token_out(self):
                             f"error token healing state, prefix_str {decode_req.prefix_str} new_text {new_text}"
                         )
 
+                decode_req.output_strs.append(new_text)
+
                 # 停止字符串匹配
-                if not decode_req.req.finish_status.is_stoped() and self._stop_sequences_str_matched(
-                    decode_req, self.tokenizer
-                ):
+                if decode_req.stop_sequences_str_match():
                     decode_req.req.stop_str_matched = True
 
                 decode_req.req.out_tokens_queue.push(new_text, src_index, special, count_output_tokens)