Skip to content

Commit 80b056d

Browse files
CopilotAyRickk
andcommitted
Add configurable thinking tag extraction for vLLM
- Create ThinkingTagExtractor class in separate file (core/llm/ThinkingTagExtractor.ts) - Move thinking tag extraction into vLLM class (provider-specific) - Export VllmOptions interface for external consumers - Add validation for thinking tag configuration - Add comprehensive tests for ThinkingTagExtractor Co-authored-by: AyRickk <[email protected]>
1 parent 6ecfcb8 commit 80b056d

File tree

4 files changed

+443
-3
lines changed

4 files changed

+443
-3
lines changed

core/llm/ThinkingTagExtractor.ts

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/**
2+
* Helper class to extract thinking content from custom tags during streaming.
3+
* This is used for providers like vLLM that support custom thinking output formats.
4+
*/
5+
export class ThinkingTagExtractor {
6+
private buffer: string = "";
7+
private inThinkingBlock: boolean = false;
8+
private readonly openTag: string;
9+
private readonly closeTag: string;
10+
11+
constructor(openTag: string, closeTag: string) {
12+
this.openTag = openTag;
13+
this.closeTag = closeTag;
14+
}
15+
16+
/**
17+
* Process a chunk of text and extract thinking/regular content.
18+
* Returns an object with the thinking content and regular content that should be yielded.
19+
*/
20+
process(text: string): {
21+
thinking: string;
22+
content: string;
23+
} {
24+
this.buffer += text;
25+
26+
let thinking = "";
27+
let content = "";
28+
29+
while (this.buffer.length > 0) {
30+
if (this.inThinkingBlock) {
31+
// Look for closing tag
32+
const closeIndex = this.buffer.indexOf(this.closeTag);
33+
if (closeIndex !== -1) {
34+
// Found closing tag - extract thinking content up to it
35+
thinking += this.buffer.substring(0, closeIndex);
36+
this.buffer = this.buffer.substring(
37+
closeIndex + this.closeTag.length,
38+
);
39+
this.inThinkingBlock = false;
40+
} else {
41+
// No closing tag yet - check if we might have a partial closing tag at the end
42+
const partialMatchLength = this.getPartialMatchLength(
43+
this.buffer,
44+
this.closeTag,
45+
);
46+
if (partialMatchLength > 0) {
47+
// Keep the potential partial match in the buffer
48+
thinking += this.buffer.substring(
49+
0,
50+
this.buffer.length - partialMatchLength,
51+
);
52+
this.buffer = this.buffer.substring(
53+
this.buffer.length - partialMatchLength,
54+
);
55+
} else {
56+
// No partial match - all content is thinking
57+
thinking += this.buffer;
58+
this.buffer = "";
59+
}
60+
break;
61+
}
62+
} else {
63+
// Not in thinking block - look for opening tag
64+
const openIndex = this.buffer.indexOf(this.openTag);
65+
if (openIndex !== -1) {
66+
// Found opening tag
67+
content += this.buffer.substring(0, openIndex);
68+
this.buffer = this.buffer.substring(openIndex + this.openTag.length);
69+
this.inThinkingBlock = true;
70+
} else {
71+
// No opening tag - check if we might have a partial opening tag at the end
72+
const partialMatchLength = this.getPartialMatchLength(
73+
this.buffer,
74+
this.openTag,
75+
);
76+
if (partialMatchLength > 0) {
77+
// Keep the potential partial match in the buffer
78+
content += this.buffer.substring(
79+
0,
80+
this.buffer.length - partialMatchLength,
81+
);
82+
this.buffer = this.buffer.substring(
83+
this.buffer.length - partialMatchLength,
84+
);
85+
} else {
86+
// No partial match - all content is regular content
87+
content += this.buffer;
88+
this.buffer = "";
89+
}
90+
break;
91+
}
92+
}
93+
}
94+
95+
return { thinking, content };
96+
}
97+
98+
/**
99+
* Flush any remaining content in the buffer.
100+
* Call this when the stream ends.
101+
*/
102+
flush(): {
103+
thinking: string;
104+
content: string;
105+
} {
106+
const result = {
107+
thinking: this.inThinkingBlock ? this.buffer : "",
108+
content: this.inThinkingBlock ? "" : this.buffer,
109+
};
110+
this.buffer = "";
111+
this.inThinkingBlock = false;
112+
return result;
113+
}
114+
115+
/**
116+
* Check if the end of the text could be the start of the tag.
117+
* Returns the length of the partial match, or 0 if no match.
118+
*/
119+
private getPartialMatchLength(text: string, tag: string): number {
120+
for (let i = 1; i < tag.length && i <= text.length; i++) {
121+
if (text.slice(-i) === tag.slice(0, i)) {
122+
return i;
123+
}
124+
}
125+
return 0;
126+
}
127+
}
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
import { describe, expect, it } from "vitest";
2+
import { ThinkingTagExtractor } from "./ThinkingTagExtractor";
3+
4+
describe("ThinkingTagExtractor", () => {
5+
describe("basic functionality", () => {
6+
it("should extract thinking content with simple tags", () => {
7+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
8+
const result = extractor.process(
9+
"<think>thinking content</think>regular content",
10+
);
11+
expect(result.thinking).toBe("thinking content");
12+
expect(result.content).toBe("regular content");
13+
});
14+
15+
it("should handle content before thinking tags", () => {
16+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
17+
const result = extractor.process("before<think>thinking</think>after");
18+
expect(result.thinking).toBe("thinking");
19+
expect(result.content).toBe("beforeafter");
20+
});
21+
22+
it("should handle only thinking content", () => {
23+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
24+
const result = extractor.process("<think>only thinking</think>");
25+
expect(result.thinking).toBe("only thinking");
26+
expect(result.content).toBe("");
27+
});
28+
29+
it("should handle only regular content", () => {
30+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
31+
const result = extractor.process("just regular content");
32+
expect(result.thinking).toBe("");
33+
expect(result.content).toBe("just regular content");
34+
});
35+
36+
it("should handle multiple thinking blocks", () => {
37+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
38+
const result = extractor.process(
39+
"<think>first</think>middle<think>second</think>end",
40+
);
41+
expect(result.thinking).toBe("firstsecond");
42+
expect(result.content).toBe("middleend");
43+
});
44+
});
45+
46+
describe("streaming chunks", () => {
47+
it("should handle thinking content split across chunks", () => {
48+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
49+
50+
// Simulate streaming: "<think>thinking content</think>regular content"
51+
const result1 = extractor.process("<thi");
52+
expect(result1.thinking).toBe("");
53+
expect(result1.content).toBe("");
54+
55+
const result2 = extractor.process("nk>thinking");
56+
expect(result2.thinking).toBe("thinking");
57+
expect(result2.content).toBe("");
58+
59+
const result3 = extractor.process(" content</th");
60+
expect(result3.thinking).toBe(" content");
61+
expect(result3.content).toBe("");
62+
63+
const result4 = extractor.process("ink>regular");
64+
expect(result4.thinking).toBe("");
65+
expect(result4.content).toBe("regular");
66+
67+
const result5 = extractor.process(" content");
68+
expect(result5.thinking).toBe("");
69+
expect(result5.content).toBe(" content");
70+
});
71+
72+
it("should handle partial open tag at end of chunk", () => {
73+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
74+
75+
const result1 = extractor.process("before<th");
76+
expect(result1.content).toBe("before");
77+
expect(result1.thinking).toBe("");
78+
79+
const result2 = extractor.process("ink>thinking</think>");
80+
expect(result2.thinking).toBe("thinking");
81+
expect(result2.content).toBe("");
82+
});
83+
84+
it("should handle partial close tag at end of chunk", () => {
85+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
86+
87+
const result1 = extractor.process("<think>thinking</thi");
88+
expect(result1.thinking).toBe("thinking");
89+
expect(result1.content).toBe("");
90+
91+
const result2 = extractor.process("nk>after");
92+
expect(result2.thinking).toBe("");
93+
expect(result2.content).toBe("after");
94+
});
95+
});
96+
97+
describe("flush", () => {
98+
it("should flush remaining content when not in thinking block", () => {
99+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
100+
101+
extractor.process("some content<th");
102+
const result = extractor.flush();
103+
expect(result.content).toBe("<th");
104+
expect(result.thinking).toBe("");
105+
});
106+
107+
it("should flush remaining content when in thinking block", () => {
108+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
109+
110+
// The thinking content after the open tag is returned in process()
111+
const processResult = extractor.process("<think>incomplete thinking");
112+
expect(processResult.thinking).toBe("incomplete thinking");
113+
expect(processResult.content).toBe("");
114+
115+
// Flush returns nothing since buffer is empty (all was processed)
116+
const result = extractor.flush();
117+
expect(result.thinking).toBe("");
118+
expect(result.content).toBe("");
119+
});
120+
121+
it("should flush remaining partial close tag in thinking block", () => {
122+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
123+
124+
// Process some thinking with a partial close tag
125+
const processResult = extractor.process("<think>thinking</thi");
126+
expect(processResult.thinking).toBe("thinking");
127+
expect(processResult.content).toBe("");
128+
129+
// Flush should return the partial tag as thinking content
130+
const result = extractor.flush();
131+
expect(result.thinking).toBe("</thi");
132+
expect(result.content).toBe("");
133+
});
134+
135+
it("should reset state after flush", () => {
136+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
137+
138+
extractor.process("<think>thinking");
139+
extractor.flush();
140+
141+
const result = extractor.process("new content");
142+
expect(result.content).toBe("new content");
143+
expect(result.thinking).toBe("");
144+
});
145+
});
146+
147+
describe("custom tag formats", () => {
148+
it("should work with vLLM default reasoning tags", () => {
149+
const extractor = new ThinkingTagExtractor("<reasoning>", "</reasoning>");
150+
const result = extractor.process(
151+
"<reasoning>my reasoning</reasoning>answer",
152+
);
153+
expect(result.thinking).toBe("my reasoning");
154+
expect(result.content).toBe("answer");
155+
});
156+
157+
it("should work with simple brackets", () => {
158+
const extractor = new ThinkingTagExtractor("[THINK]", "[/THINK]");
159+
const result = extractor.process(
160+
"[THINK]internal thoughts[/THINK]response",
161+
);
162+
expect(result.thinking).toBe("internal thoughts");
163+
expect(result.content).toBe("response");
164+
});
165+
166+
it("should work with multi-character tags", () => {
167+
const extractor = new ThinkingTagExtractor(
168+
"<<<REASONING>>>",
169+
"<<<END_REASONING>>>",
170+
);
171+
const result = extractor.process(
172+
"<<<REASONING>>>deep thoughts<<<END_REASONING>>>output",
173+
);
174+
expect(result.thinking).toBe("deep thoughts");
175+
expect(result.content).toBe("output");
176+
});
177+
});
178+
179+
describe("edge cases", () => {
180+
it("should handle empty string", () => {
181+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
182+
const result = extractor.process("");
183+
expect(result.thinking).toBe("");
184+
expect(result.content).toBe("");
185+
});
186+
187+
it("should handle consecutive tags", () => {
188+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
189+
const result = extractor.process("<think></think><think>second</think>");
190+
expect(result.thinking).toBe("second");
191+
expect(result.content).toBe("");
192+
});
193+
194+
it("should handle nested-like content (not actual nesting)", () => {
195+
const extractor = new ThinkingTagExtractor("<think>", "</think>");
196+
// Tags don't actually nest, so inner <think> is just content
197+
const result = extractor.process(
198+
"<think>outer <think> inner</think> after</think>",
199+
);
200+
// First </think> closes the block
201+
expect(result.thinking).toBe("outer <think> inner");
202+
expect(result.content).toBe(" after</think>");
203+
});
204+
205+
it("should handle special characters in tags", () => {
206+
const extractor = new ThinkingTagExtractor(
207+
"<!--THINK-->",
208+
"<!--/THINK-->",
209+
);
210+
const result = extractor.process(
211+
"<!--THINK-->special<!--/THINK-->normal",
212+
);
213+
expect(result.thinking).toBe("special");
214+
expect(result.content).toBe("normal");
215+
});
216+
});
217+
});

0 commit comments

Comments
 (0)