Skip to content

Commit ccfd0c9

Browse files
CopilotAyRickk
andcommitted
Move thinking tag extraction from BaseLLM to vLLM-specific
Per reviewer feedback on PR continuedev#8901: - Remove ThinkingTagExtractor class from core/llm/index.ts (keep in separate file) - Remove thinkingOpenTag/thinkingCloseTag from BaseLLM class - Remove thinking extractor logic from processChatChunk and streamChat in BaseLLM - Remove thinkingOpenTag/thinkingCloseTag from LLMOptions in core/index.d.ts - Remove thinkingTagIntegration.vitest.ts (BaseLLM integration test) The feature is now vLLM-specific only, handled by the Vllm class. Co-authored-by: AyRickk <[email protected]>
1 parent 8684322 commit ccfd0c9

File tree

3 files changed

+6
-571
lines changed

3 files changed

+6
-571
lines changed

core/index.d.ts

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -687,12 +687,6 @@ export interface LLMOptions {
687687

688688
sourceFile?: string;
689689
isFromAutoDetect?: boolean;
690-
691-
// Thinking output format options
692-
// These allow configuring custom tags to extract thinking content from the response
693-
// For example, vLLM can use <think>...</think> tags instead of the standard reasoning_content field
694-
thinkingOpenTag?: string;
695-
thinkingCloseTag?: string;
696690
}
697691

698692
type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<

core/llm/index.ts

Lines changed: 6 additions & 248 deletions
Original file line numberDiff line numberDiff line change
@@ -84,134 +84,6 @@ export function isModelInstaller(provider: any): provider is ModelInstaller {
8484

8585
type InteractionStatus = "in_progress" | "success" | "error" | "cancelled";
8686

87-
/**
88-
* Helper class to extract thinking content from custom tags during streaming.
89-
* This is used for providers like vLLM that support custom thinking output formats.
90-
*/
91-
export class ThinkingTagExtractor {
92-
private buffer: string = "";
93-
private inThinkingBlock: boolean = false;
94-
private readonly openTag: string;
95-
private readonly closeTag: string;
96-
97-
constructor(openTag: string, closeTag: string) {
98-
this.openTag = openTag;
99-
this.closeTag = closeTag;
100-
}
101-
102-
/**
103-
* Process a chunk of text and extract thinking/regular content.
104-
* Returns an object with the thinking content and regular content that should be yielded.
105-
*/
106-
process(text: string): {
107-
thinking: string;
108-
content: string;
109-
} {
110-
this.buffer += text;
111-
112-
let thinking = "";
113-
let content = "";
114-
115-
while (this.buffer.length > 0) {
116-
if (this.inThinkingBlock) {
117-
// Look for closing tag
118-
const closeIndex = this.buffer.indexOf(this.closeTag);
119-
if (closeIndex !== -1) {
120-
// Found closing tag - extract thinking content up to it
121-
thinking += this.buffer.substring(0, closeIndex);
122-
this.buffer = this.buffer.substring(
123-
closeIndex + this.closeTag.length,
124-
);
125-
this.inThinkingBlock = false;
126-
} else {
127-
// No closing tag yet - check if we might have a partial closing tag at the end
128-
const partialMatchLength = this.getPartialMatchLength(
129-
this.buffer,
130-
this.closeTag,
131-
);
132-
if (partialMatchLength > 0) {
133-
// Keep the potential partial match in the buffer
134-
thinking += this.buffer.substring(
135-
0,
136-
this.buffer.length - partialMatchLength,
137-
);
138-
this.buffer = this.buffer.substring(
139-
this.buffer.length - partialMatchLength,
140-
);
141-
} else {
142-
// No partial match - all content is thinking
143-
thinking += this.buffer;
144-
this.buffer = "";
145-
}
146-
break;
147-
}
148-
} else {
149-
// Not in thinking block - look for opening tag
150-
const openIndex = this.buffer.indexOf(this.openTag);
151-
if (openIndex !== -1) {
152-
// Found opening tag
153-
content += this.buffer.substring(0, openIndex);
154-
this.buffer = this.buffer.substring(openIndex + this.openTag.length);
155-
this.inThinkingBlock = true;
156-
} else {
157-
// No opening tag - check if we might have a partial opening tag at the end
158-
const partialMatchLength = this.getPartialMatchLength(
159-
this.buffer,
160-
this.openTag,
161-
);
162-
if (partialMatchLength > 0) {
163-
// Keep the potential partial match in the buffer
164-
content += this.buffer.substring(
165-
0,
166-
this.buffer.length - partialMatchLength,
167-
);
168-
this.buffer = this.buffer.substring(
169-
this.buffer.length - partialMatchLength,
170-
);
171-
} else {
172-
// No partial match - all content is regular content
173-
content += this.buffer;
174-
this.buffer = "";
175-
}
176-
break;
177-
}
178-
}
179-
}
180-
181-
return { thinking, content };
182-
}
183-
184-
/**
185-
* Flush any remaining content in the buffer.
186-
* Call this when the stream ends.
187-
*/
188-
flush(): {
189-
thinking: string;
190-
content: string;
191-
} {
192-
const result = {
193-
thinking: this.inThinkingBlock ? this.buffer : "",
194-
content: this.inThinkingBlock ? "" : this.buffer,
195-
};
196-
this.buffer = "";
197-
this.inThinkingBlock = false;
198-
return result;
199-
}
200-
201-
/**
202-
* Check if the end of the text could be the start of the tag.
203-
* Returns the length of the partial match, or 0 if no match.
204-
*/
205-
private getPartialMatchLength(text: string, tag: string): number {
206-
for (let i = 1; i < tag.length && i <= text.length; i++) {
207-
if (text.slice(-i) === tag.slice(0, i)) {
208-
return i;
209-
}
210-
}
211-
return 0;
212-
}
213-
}
214-
21587
export abstract class BaseLLM implements ILLM {
21688
static providerName: string;
21789
static defaultOptions: Partial<LLMOptions> | undefined = undefined;
@@ -324,10 +196,6 @@ export abstract class BaseLLM implements ILLM {
324196

325197
isFromAutoDetect?: boolean;
326198

327-
// Thinking output format options
328-
thinkingOpenTag?: string;
329-
thinkingCloseTag?: string;
330-
331199
lastRequestId: string | undefined;
332200

333201
private _llmOptions: LLMOptions;
@@ -435,10 +303,6 @@ export abstract class BaseLLM implements ILLM {
435303
this.autocompleteOptions = options.autocompleteOptions;
436304
this.sourceFile = options.sourceFile;
437305
this.isFromAutoDetect = options.isFromAutoDetect;
438-
439-
// Thinking output format options
440-
this.thinkingOpenTag = options.thinkingOpenTag;
441-
this.thinkingCloseTag = options.thinkingCloseTag;
442306
}
443307

444308
get contextLength() {
@@ -1132,54 +996,21 @@ export abstract class BaseLLM implements ILLM {
1132996
return completionOptions;
1133997
}
1134998

1135-
// Update the processChatChunk method:
1136999
private processChatChunk(
11371000
chunk: ChatMessage,
11381001
interaction: ILLMInteractionLog | undefined,
1139-
thinkingExtractor?: ThinkingTagExtractor,
11401002
): {
11411003
completion: string[];
11421004
thinking: string[];
11431005
usage: Usage | null;
11441006
chunk: ChatMessage;
1145-
thinkingChunk?: ChatMessage;
11461007
} {
11471008
const completion: string[] = [];
11481009
const thinking: string[] = [];
11491010
let usage: Usage | null = null;
1150-
let outputChunk = chunk;
1151-
let thinkingChunk: ChatMessage | undefined;
11521011

11531012
if (chunk.role === "assistant") {
1154-
// If we have a thinking extractor, process the content through it
1155-
if (thinkingExtractor && typeof chunk.content === "string") {
1156-
const extracted = thinkingExtractor.process(chunk.content);
1157-
1158-
if (extracted.thinking) {
1159-
thinking.push(extracted.thinking);
1160-
thinkingChunk = {
1161-
role: "thinking",
1162-
content: extracted.thinking,
1163-
};
1164-
}
1165-
1166-
if (extracted.content) {
1167-
const processedChunk: ChatMessage = {
1168-
...chunk,
1169-
content: extracted.content,
1170-
};
1171-
completion.push(this._formatChatMessage(processedChunk));
1172-
outputChunk = processedChunk;
1173-
} else {
1174-
// No regular content in this chunk, just thinking
1175-
outputChunk = {
1176-
...chunk,
1177-
content: "",
1178-
};
1179-
}
1180-
} else {
1181-
completion.push(this._formatChatMessage(chunk));
1182-
}
1013+
completion.push(this._formatChatMessage(chunk));
11831014
} else if (chunk.role === "thinking" && typeof chunk.content === "string") {
11841015
thinking.push(chunk.content);
11851016
}
@@ -1197,8 +1028,7 @@ export abstract class BaseLLM implements ILLM {
11971028
completion,
11981029
thinking,
11991030
usage,
1200-
chunk: outputChunk,
1201-
thinkingChunk,
1031+
chunk,
12021032
};
12031033
}
12041034

@@ -1332,12 +1162,6 @@ export abstract class BaseLLM implements ILLM {
13321162
let usage: Usage | undefined = undefined;
13331163
let citations: null | string[] = null;
13341164

1335-
// Create thinking tag extractor if custom tags are configured
1336-
const thinkingExtractor =
1337-
this.thinkingOpenTag && this.thinkingCloseTag
1338-
? new ThinkingTagExtractor(this.thinkingOpenTag, this.thinkingCloseTag)
1339-
: undefined;
1340-
13411165
try {
13421166
if (this.templateMessages) {
13431167
for await (const chunk of this._streamComplete(
@@ -1394,46 +1218,13 @@ export abstract class BaseLLM implements ILLM {
13941218
}
13951219

13961220
for await (const chunk of iterable) {
1397-
const result = this.processChatChunk(
1398-
chunk,
1399-
interaction,
1400-
thinkingExtractor,
1401-
);
1221+
const result = this.processChatChunk(chunk, interaction);
14021222
completion.push(...result.completion);
14031223
thinking.push(...result.thinking);
14041224
if (result.usage !== null) {
14051225
usage = result.usage;
14061226
}
1407-
// Yield thinking chunk first if present
1408-
if (result.thinkingChunk) {
1409-
yield result.thinkingChunk;
1410-
}
1411-
// Only yield the main chunk if it has content or tool calls
1412-
const hasToolCalls =
1413-
result.chunk.role === "assistant" &&
1414-
result.chunk.toolCalls?.length;
1415-
const hasContent =
1416-
result.chunk.content &&
1417-
(typeof result.chunk.content === "string"
1418-
? result.chunk.content.length > 0
1419-
: result.chunk.content.length > 0);
1420-
1421-
if (hasToolCalls || hasContent) {
1422-
yield result.chunk;
1423-
}
1424-
}
1425-
1426-
// Flush any remaining content from the extractor
1427-
if (thinkingExtractor) {
1428-
const flushed = thinkingExtractor.flush();
1429-
if (flushed.thinking) {
1430-
thinking.push(flushed.thinking);
1431-
yield { role: "thinking", content: flushed.thinking };
1432-
}
1433-
if (flushed.content) {
1434-
completion.push(flushed.content);
1435-
yield { role: "assistant", content: flushed.content };
1436-
}
1227+
yield result.chunk;
14371228
}
14381229
} else {
14391230
if (logEnabled) {
@@ -1453,46 +1244,13 @@ export abstract class BaseLLM implements ILLM {
14531244
signal,
14541245
completionOptions,
14551246
)) {
1456-
const result = this.processChatChunk(
1457-
chunk,
1458-
interaction,
1459-
thinkingExtractor,
1460-
);
1247+
const result = this.processChatChunk(chunk, interaction);
14611248
completion.push(...result.completion);
14621249
thinking.push(...result.thinking);
14631250
if (result.usage !== null) {
14641251
usage = result.usage;
14651252
}
1466-
// Yield thinking chunk first if present
1467-
if (result.thinkingChunk) {
1468-
yield result.thinkingChunk;
1469-
}
1470-
// Only yield the main chunk if it has content or tool calls
1471-
const hasToolCalls =
1472-
result.chunk.role === "assistant" &&
1473-
result.chunk.toolCalls?.length;
1474-
const hasContent =
1475-
result.chunk.content &&
1476-
(typeof result.chunk.content === "string"
1477-
? result.chunk.content.length > 0
1478-
: result.chunk.content.length > 0);
1479-
1480-
if (hasToolCalls || hasContent) {
1481-
yield result.chunk;
1482-
}
1483-
}
1484-
1485-
// Flush any remaining content from the extractor
1486-
if (thinkingExtractor) {
1487-
const flushed = thinkingExtractor.flush();
1488-
if (flushed.thinking) {
1489-
thinking.push(flushed.thinking);
1490-
yield { role: "thinking", content: flushed.thinking };
1491-
}
1492-
if (flushed.content) {
1493-
completion.push(flushed.content);
1494-
yield { role: "assistant", content: flushed.content };
1495-
}
1253+
yield result.chunk;
14961254
}
14971255
}
14981256
}

0 commit comments

Comments
 (0)