add x1 parser

luukunn · luukunn · commit 671a4dcc7538 · 2025-09-24T11:20:51.000+08:00
diff --git a/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py b/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py
@@ -34,19 +34,62 @@ class ErnieX1ReasoningParser(ReasoningParser):
 
     def __init__(self, tokenizer):
         super().__init__(tokenizer)
-        self.think_end_token = "</think>"
-        self.response_start_token = "<response>"
-        self.response_end_token = "</response>"
-        self.tool_call_start_token = "<tool_call>"
-        self.tool_call_end_token = "</tool_call>"
+
+        # 定义所有需要检查的token
+        token_definitions = {
+            "think_start_token": "<think>",
+            "think_end_token": "</think>",
+            "response_start_token": "<response>",
+            "response_end_token": "</response>",
+            "tool_call_start_token": "<tool_call>",
+            "tool_call_end_token": "</tool_call>",
+        }
 
         if not self.model_tokenizer:
             raise ValueError("The model tokenizer must be passed to the ReasoningParser constructor.")
 
-        self.think_end_token_id = self.vocab.get("</think>")
-        if self.think_end_token_id is None:
-            raise RuntimeError("Could not find think end token id in tokenizer vocabulary")
-        self.tool_call_start_token_id = self.vocab.get("<tool_call>")
+        missing_tokens = []
+        for name, token_value in token_definitions.items():
+            setattr(self, name, token_value)
+            token_id = self.vocab.get(token_value)
+            setattr(self, f"{name}_id", token_id)
+            if token_id is None:
+                missing_tokens.append(f"{name.replace('_', ' ')} token")
+
+        if missing_tokens:
+            raise RuntimeError(
+                f"Could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
+            )
+
+        self.token_status_mapping = {
+            self.think_start_token_id: "think_start",
+            self.think_end_token_id: "think_end",
+            self.response_start_token_id: "response_start",
+            self.response_end_token_id: "response_end",
+            self.tool_call_start_token_id: "tool_call_start",
+            self.tool_call_end_token_id: "tool_call_end",
+        }
+
+    def find_last_special_token(self, prompt_token_ids: list[int]) -> int:
+        for i in range(len(prompt_token_ids) - 1, -1, -1):
+            if prompt_token_ids[i] in [
+                self.think_end_token_id,
+                self.think_start_token_id,
+                self.response_start_token_id,
+                self.response_end_token_id,
+                self.tool_call_start_token_id,
+                self.tool_call_end_token_id,
+            ]:
+                return prompt_token_ids[i]
+        return -1
+
+    def get_model_status(self, prompt_token_ids: list[int]):
+        special_token_id = self.find_last_special_token(prompt_token_ids)
+
+        if special_token_id == -1:
+            return "response_start"
+
+        return self.token_status_mapping.get(special_token_id, "response_start")
 
     def is_reasoning_end(self, input_ids: list[int]) -> bool:
         return self.tool_call_start_token_id in input_ids
@@ -117,45 +160,55 @@ def extract_reasoning_content_streaming(
         # 默认情况不返回内容
         return None
 
-    def extract_reasoning_content(self, model_output: str, request: ChatCompletionRequest) -> Tuple[str, str]:
+    def strip_last_newline(self, content: str, end_pos: int) -> str:
+        return content[: end_pos - 1] if end_pos > 0 and content[end_pos - 1] == "\n" else content[:end_pos]
+
+    def extract_reasoning_content(
+        self, model_output: str, request: ChatCompletionRequest, model_status: str
+    ) -> Tuple[str, str]:
         """
-        Batch version of the enhanced parser.
-        Modified to preserve newlines in both reasoning and response content,
+        Optimized batch version of the enhanced parser.
+        Preserves newlines in both reasoning and response content,
         only removing the single newline before closing tags.
         """
         reasoning_content = ""
         response_content = ""
 
-        think_end_pos = model_output.find(self.think_end_token)
-        if think_end_pos != -1:
-            # Extract thinking content - only remove the last newline before </think>
-            reasoning_content = model_output[:think_end_pos]
-            if think_end_pos > 0 and reasoning_content[-1] == "\n":
-                reasoning_content = reasoning_content[:-1]
+        # Define helper function to strip the last newline before a closing tag
+        if model_status == "think_start":
+            think_end_pos = model_output.find(self.think_end_token)
+            if think_end_pos != -1:
+                # Extract reasoning content
+                reasoning_content = self.strip_last_newline(model_output, think_end_pos)
+                remaining = model_output[think_end_pos + len(self.think_end_token) :].lstrip("\n")
+
+                # Determine if remaining content is a response or tool call
+                if remaining.startswith(self.response_start_token):
+                    response_start_pos = len(self.response_start_token)
+                    response_content = self._extract_response_content(remaining[response_start_pos:])
+                elif remaining.startswith(self.tool_call_start_token):
+                    pass  # No response content
+            else:
+                # No think_end_token found, treat entire output as reasoning content
+                reasoning_content = model_output
 
-            remaining = model_output[think_end_pos + len(self.think_end_token) :]
+        elif model_status == "think_end":
+            remaining = model_output.lstrip("\n")
+            if remaining.startswith(self.response_start_token):
+                response_start_pos = len(self.response_start_token)
+                response_content = self._extract_response_content(remaining[response_start_pos:])
 
-            # Skip newlines after </think>
-            remaining = remaining.lstrip("\n")
+        elif model_status == "response_start":
+            response_content = model_output.replace(self.response_end_token, "")
 
-            # Check for response or tool_call
-            if remaining.startswith(self.response_start_token):
-                response_pos = len(self.response_start_token)
-                remaining = remaining[response_pos:].lstrip("\n")
-                response_end_pos = remaining.find(self.response_end_token)
-                if response_end_pos != -1:
-                    # Only strip the last newline before </response>, not all
-                    if response_end_pos > 0 and remaining[response_end_pos - 1] == "\n":
-                        response_content = remaining[: response_end_pos - 1]
-                    else:
-                        response_content = remaining[:response_end_pos]
-                else:
-                    # If no </response> found, return the rest as response content
-                    response_content = remaining
-            elif remaining.startswith(self.tool_call_start_token):
-                pass  # No response content
-        else:
-            # No thinking content found, return the whole input as reasoning
-            reasoning_content = model_output
-            response_content = ""
         return reasoning_content, response_content
+
+    def _extract_response_content(self, remaining: str) -> str:
+        """
+        Extracts response content, ensuring that the last newline before
+        the </response> tag is removed.
+        """
+        response_end_pos = remaining.find(self.response_end_token)
+        if response_end_pos != -1:
+            return self.strip_last_newline(remaining, response_end_pos)
+        return remaining