AyRickk
diff --git a/‎core/llm/ThinkingTagExtractor.ts‎
Lines changed: 127 additions & 0 deletions b/‎core/llm/ThinkingTagExtractor.ts‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎core/llm/ThinkingTagExtractor.vitest.ts‎
Lines changed: 217 additions & 0 deletions b/‎core/llm/ThinkingTagExtractor.vitest.ts‎
Lines changed: 217 additions & 0 deletions
@@ -0,0 +1,127 @@
+/**
+ * Helper class to extract thinking content from custom tags during streaming.
+ * This is used for providers like vLLM that support custom thinking output formats.
+ */
+export class ThinkingTagExtractor {
+  private buffer: string = "";
+  private inThinkingBlock: boolean = false;
+  private readonly openTag: string;
+  private readonly closeTag: string;
+
+  constructor(openTag: string, closeTag: string) {
+    this.openTag = openTag;
+    this.closeTag = closeTag;
+  }
+
+  /**
+   * Process a chunk of text and extract thinking/regular content.
+   * Returns an object with the thinking content and regular content that should be yielded.
+   */
+  process(text: string): {
+    thinking: string;
+    content: string;
+  } {
+    this.buffer += text;
+
+    let thinking = "";
+    let content = "";
+
+    while (this.buffer.length > 0) {
+      if (this.inThinkingBlock) {
+        // Look for closing tag
+        const closeIndex = this.buffer.indexOf(this.closeTag);
+        if (closeIndex !== -1) {
+          // Found closing tag - extract thinking content up to it
+          thinking += this.buffer.substring(0, closeIndex);
+          this.buffer = this.buffer.substring(
+            closeIndex + this.closeTag.length,
+          );
+          this.inThinkingBlock = false;
+        } else {
+          // No closing tag yet - check if we might have a partial closing tag at the end
+          const partialMatchLength = this.getPartialMatchLength(
+            this.buffer,
+            this.closeTag,
+          );
+          if (partialMatchLength > 0) {
+            // Keep the potential partial match in the buffer
+            thinking += this.buffer.substring(
+              0,
+              this.buffer.length - partialMatchLength,
+            );
+            this.buffer = this.buffer.substring(
+              this.buffer.length - partialMatchLength,
+            );
+          } else {
+            // No partial match - all content is thinking
+            thinking += this.buffer;
+            this.buffer = "";
+          }
+          break;
+        }
+      } else {
+        // Not in thinking block - look for opening tag
+        const openIndex = this.buffer.indexOf(this.openTag);
+        if (openIndex !== -1) {
+          // Found opening tag
+          content += this.buffer.substring(0, openIndex);
+          this.buffer = this.buffer.substring(openIndex + this.openTag.length);
+          this.inThinkingBlock = true;
+        } else {
+          // No opening tag - check if we might have a partial opening tag at the end
+          const partialMatchLength = this.getPartialMatchLength(
+            this.buffer,
+            this.openTag,
+          );
+          if (partialMatchLength > 0) {
+            // Keep the potential partial match in the buffer
+            content += this.buffer.substring(
+              0,
+              this.buffer.length - partialMatchLength,
+            );
+            this.buffer = this.buffer.substring(
+              this.buffer.length - partialMatchLength,
+            );
+          } else {
+            // No partial match - all content is regular content
+            content += this.buffer;
+            this.buffer = "";
+          }
+          break;
+        }
+      }
+    }
+
+    return { thinking, content };
+  }
+
+  /**
+   * Flush any remaining content in the buffer.
+   * Call this when the stream ends.
+   */
+  flush(): {
+    thinking: string;
+    content: string;
+  } {
+    const result = {
+      thinking: this.inThinkingBlock ? this.buffer : "",
+      content: this.inThinkingBlock ? "" : this.buffer,
+    };
+    this.buffer = "";
+    this.inThinkingBlock = false;
+    return result;
+  }
+
+  /**
+   * Check if the end of the text could be the start of the tag.
+   * Returns the length of the partial match, or 0 if no match.
+   */
+  private getPartialMatchLength(text: string, tag: string): number {
+    for (let i = 1; i < tag.length && i <= text.length; i++) {
+      if (text.slice(-i) === tag.slice(0, i)) {
+        return i;
+      }
+    }
+    return 0;
+  }
+}
@@ -0,0 +1,217 @@
+import { describe, expect, it } from "vitest";
+import { ThinkingTagExtractor } from "./ThinkingTagExtractor";
+
+describe("ThinkingTagExtractor", () => {
+  describe("basic functionality", () => {
+    it("should extract thinking content with simple tags", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+      const result = extractor.process(
+        "<think>thinking content</think>regular content",
+      );
+      expect(result.thinking).toBe("thinking content");
+      expect(result.content).toBe("regular content");
+    });
+
+    it("should handle content before thinking tags", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+      const result = extractor.process("before<think>thinking</think>after");
+      expect(result.thinking).toBe("thinking");
+      expect(result.content).toBe("beforeafter");
+    });
+
+    it("should handle only thinking content", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+      const result = extractor.process("<think>only thinking</think>");
+      expect(result.thinking).toBe("only thinking");
+      expect(result.content).toBe("");
+    });
+
+    it("should handle only regular content", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+      const result = extractor.process("just regular content");
+      expect(result.thinking).toBe("");
+      expect(result.content).toBe("just regular content");
+    });
+
+    it("should handle multiple thinking blocks", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+      const result = extractor.process(
+        "<think>first</think>middle<think>second</think>end",
+      );
+      expect(result.thinking).toBe("firstsecond");
+      expect(result.content).toBe("middleend");
+    });
+  });
+
+  describe("streaming chunks", () => {
+    it("should handle thinking content split across chunks", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+
+      // Simulate streaming: "<think>thinking content</think>regular content"
+      const result1 = extractor.process("<thi");
+      expect(result1.thinking).toBe("");
+      expect(result1.content).toBe("");
+
+      const result2 = extractor.process("nk>thinking");
+      expect(result2.thinking).toBe("thinking");
+      expect(result2.content).toBe("");
+
+      const result3 = extractor.process(" content</th");
+      expect(result3.thinking).toBe(" content");
+      expect(result3.content).toBe("");
+
+      const result4 = extractor.process("ink>regular");
+      expect(result4.thinking).toBe("");
+      expect(result4.content).toBe("regular");
+
+      const result5 = extractor.process(" content");
+      expect(result5.thinking).toBe("");
+      expect(result5.content).toBe(" content");
+    });
+
+    it("should handle partial open tag at end of chunk", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+
+      const result1 = extractor.process("before<th");
+      expect(result1.content).toBe("before");
+      expect(result1.thinking).toBe("");
+
+      const result2 = extractor.process("ink>thinking</think>");
+      expect(result2.thinking).toBe("thinking");
+      expect(result2.content).toBe("");
+    });
+
+    it("should handle partial close tag at end of chunk", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+
+      const result1 = extractor.process("<think>thinking</thi");
+      expect(result1.thinking).toBe("thinking");
+      expect(result1.content).toBe("");
+
+      const result2 = extractor.process("nk>after");
+      expect(result2.thinking).toBe("");
+      expect(result2.content).toBe("after");
+    });
+  });
+
+  describe("flush", () => {
+    it("should flush remaining content when not in thinking block", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+
+      extractor.process("some content<th");
+      const result = extractor.flush();
+      expect(result.content).toBe("<th");
+      expect(result.thinking).toBe("");
+    });
+
+    it("should flush remaining content when in thinking block", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+
+      // The thinking content after the open tag is returned in process()
+      const processResult = extractor.process("<think>incomplete thinking");
+      expect(processResult.thinking).toBe("incomplete thinking");
+      expect(processResult.content).toBe("");
+
+      // Flush returns nothing since buffer is empty (all was processed)
+      const result = extractor.flush();
+      expect(result.thinking).toBe("");
+      expect(result.content).toBe("");
+    });
+
+    it("should flush remaining partial close tag in thinking block", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+
+      // Process some thinking with a partial close tag
+      const processResult = extractor.process("<think>thinking</thi");
+      expect(processResult.thinking).toBe("thinking");
+      expect(processResult.content).toBe("");
+
+      // Flush should return the partial tag as thinking content
+      const result = extractor.flush();
+      expect(result.thinking).toBe("</thi");
+      expect(result.content).toBe("");
+    });
+
+    it("should reset state after flush", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+
+      extractor.process("<think>thinking");
+      extractor.flush();
+
+      const result = extractor.process("new content");
+      expect(result.content).toBe("new content");
+      expect(result.thinking).toBe("");
+    });
+  });
+
+  describe("custom tag formats", () => {
+    it("should work with vLLM default reasoning tags", () => {
+      const extractor = new ThinkingTagExtractor("<reasoning>", "</reasoning>");
+      const result = extractor.process(
+        "<reasoning>my reasoning</reasoning>answer",
+      );
+      expect(result.thinking).toBe("my reasoning");
+      expect(result.content).toBe("answer");
+    });
+
+    it("should work with simple brackets", () => {
+      const extractor = new ThinkingTagExtractor("[THINK]", "[/THINK]");
+      const result = extractor.process(
+        "[THINK]internal thoughts[/THINK]response",
+      );
+      expect(result.thinking).toBe("internal thoughts");
+      expect(result.content).toBe("response");
+    });
+
+    it("should work with multi-character tags", () => {
+      const extractor = new ThinkingTagExtractor(
+        "<<<REASONING>>>",
+        "<<<END_REASONING>>>",
+      );
+      const result = extractor.process(
+        "<<<REASONING>>>deep thoughts<<<END_REASONING>>>output",
+      );
+      expect(result.thinking).toBe("deep thoughts");
+      expect(result.content).toBe("output");
+    });
+  });
+
+  describe("edge cases", () => {
+    it("should handle empty string", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+      const result = extractor.process("");
+      expect(result.thinking).toBe("");
+      expect(result.content).toBe("");
+    });
+
+    it("should handle consecutive tags", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+      const result = extractor.process("<think></think><think>second</think>");
+      expect(result.thinking).toBe("second");
+      expect(result.content).toBe("");
+    });
+
+    it("should handle nested-like content (not actual nesting)", () => {
+      const extractor = new ThinkingTagExtractor("<think>", "</think>");
+      // Tags don't actually nest, so inner <think> is just content
+      const result = extractor.process(
+        "<think>outer <think> inner</think> after</think>",
+      );
+      // First </think> closes the block
+      expect(result.thinking).toBe("outer <think> inner");
+      expect(result.content).toBe(" after</think>");
+    });
+
+    it("should handle special characters in tags", () => {
+      const extractor = new ThinkingTagExtractor(
+        "<!--THINK-->",
+        "<!--/THINK-->",
+      );
+      const result = extractor.process(
+        "<!--THINK-->special<!--/THINK-->normal",
+      );
+      expect(result.thinking).toBe("special");
+      expect(result.content).toBe("normal");
+    });
+  });
+});