opt

niushengxiao · niushengxiao · commit 965a1797ef89 · 2025-08-20T20:29:06.000+08:00
diff --git a/lightllm/server/api_openai.py b/lightllm/server/api_openai.py
@@ -524,12 +524,18 @@ async def _collect_generation_results(
     final_text = "".join(final_output)
     if finish_reason == "stop" and sampling_params.stop_sequences.size > 0:
         valid_stop_strings = sampling_params.stop_sequences.to_strings()
-        for stop_str in valid_stop_strings:
-            stop_index = final_text.rfind(stop_str)
-            if stop_index != -1:
-                logger.debug(f"removed stop sequence in tail: '{final_text[stop_index:]}'")
-                final_text = final_text[:stop_index]
-                break
+        if valid_stop_strings:
+            max_stop_len = len(valid_stop_strings[0])
+            search_len = min(len(final_text), max_stop_len + 20)  # 搜索长度为最长停止序列长度加20
+            tail_text = final_text[-search_len:] if search_len > 0 else final_text
+            tail_start_pos = len(final_text) - search_len
+            for stop_str in valid_stop_strings:
+                stop_index = tail_text.rfind(stop_str)
+                if stop_index != -1:
+                    earliest_stop_index = tail_start_pos + stop_index
+                    logger.info(f"removed stop sequence in tail: '{final_text[earliest_stop_index:]}'")
+                    final_text = final_text[:earliest_stop_index]
+                    break
 
     return {
         "index": prompt_index,
diff --git a/lightllm/server/core/objs/req.py b/lightllm/server/core/objs/req.py
@@ -32,6 +32,9 @@ def get_status(self):
     def is_finished(self):
         return self.FINISHED_STOP <= self.status <= self.FINISHED_LENGTH
 
+    def is_stopped(self):
+        return self.status == self.FINISHED_STOP
+
     def get_finish_reason(self):
         if self.status == self.FINISHED_STOP:
             return "stop"
diff --git a/lightllm/server/detokenization/decode_req.py b/lightllm/server/detokenization/decode_req.py
@@ -32,7 +32,8 @@ def __init__(
         self.input_len = self.req.input_len
         self.prefix_str = ""
         self.stop_strs: List[str] = self.req.sample_params.stop_sequences.to_strings()
-        self.stop_str_max_len = max([len(e) for e in self.stop_strs] + [0])
+        # to_strings()已经做了倒序排列，第一个元素就是最长字符串
+        self.stop_str_max_len = len(self.stop_strs[0]) if self.stop_strs else 0
 
     def init_token_healing_prefix_str(self, token_id_to_token: Dict[int, str], tokenizer):
         tokens = [token_id_to_token[token_id] for token_id in self.req.prefix_token_ids.get_token_ids()]
diff --git a/lightllm/server/detokenization/manager.py b/lightllm/server/detokenization/manager.py
@@ -139,7 +139,7 @@ def gen_token_out(self):
                 decode_req.output_strs.append(new_text)
 
                 # 停止字符串匹配
-                if decode_req.stop_sequences_str_match():
+                if not decode_req.req.finish_status.is_stopped() and decode_req.stop_sequences_str_match():
                     decode_req.req.stop_str_matched_token_index = src_index
                     decode_req.req.stop_str_matched = True