@@ -84,134 +84,6 @@ export function isModelInstaller(provider: any): provider is ModelInstaller {
8484
8585type InteractionStatus = "in_progress" | "success" | "error" | "cancelled" ;
8686
87- /**
88- * Helper class to extract thinking content from custom tags during streaming.
89- * This is used for providers like vLLM that support custom thinking output formats.
90- */
91- export class ThinkingTagExtractor {
92- private buffer : string = "" ;
93- private inThinkingBlock : boolean = false ;
94- private readonly openTag : string ;
95- private readonly closeTag : string ;
96-
97- constructor ( openTag : string , closeTag : string ) {
98- this . openTag = openTag ;
99- this . closeTag = closeTag ;
100- }
101-
102- /**
103- * Process a chunk of text and extract thinking/regular content.
104- * Returns an object with the thinking content and regular content that should be yielded.
105- */
106- process ( text : string ) : {
107- thinking : string ;
108- content : string ;
109- } {
110- this . buffer += text ;
111-
112- let thinking = "" ;
113- let content = "" ;
114-
115- while ( this . buffer . length > 0 ) {
116- if ( this . inThinkingBlock ) {
117- // Look for closing tag
118- const closeIndex = this . buffer . indexOf ( this . closeTag ) ;
119- if ( closeIndex !== - 1 ) {
120- // Found closing tag - extract thinking content up to it
121- thinking += this . buffer . substring ( 0 , closeIndex ) ;
122- this . buffer = this . buffer . substring (
123- closeIndex + this . closeTag . length ,
124- ) ;
125- this . inThinkingBlock = false ;
126- } else {
127- // No closing tag yet - check if we might have a partial closing tag at the end
128- const partialMatchLength = this . getPartialMatchLength (
129- this . buffer ,
130- this . closeTag ,
131- ) ;
132- if ( partialMatchLength > 0 ) {
133- // Keep the potential partial match in the buffer
134- thinking += this . buffer . substring (
135- 0 ,
136- this . buffer . length - partialMatchLength ,
137- ) ;
138- this . buffer = this . buffer . substring (
139- this . buffer . length - partialMatchLength ,
140- ) ;
141- } else {
142- // No partial match - all content is thinking
143- thinking += this . buffer ;
144- this . buffer = "" ;
145- }
146- break ;
147- }
148- } else {
149- // Not in thinking block - look for opening tag
150- const openIndex = this . buffer . indexOf ( this . openTag ) ;
151- if ( openIndex !== - 1 ) {
152- // Found opening tag
153- content += this . buffer . substring ( 0 , openIndex ) ;
154- this . buffer = this . buffer . substring ( openIndex + this . openTag . length ) ;
155- this . inThinkingBlock = true ;
156- } else {
157- // No opening tag - check if we might have a partial opening tag at the end
158- const partialMatchLength = this . getPartialMatchLength (
159- this . buffer ,
160- this . openTag ,
161- ) ;
162- if ( partialMatchLength > 0 ) {
163- // Keep the potential partial match in the buffer
164- content += this . buffer . substring (
165- 0 ,
166- this . buffer . length - partialMatchLength ,
167- ) ;
168- this . buffer = this . buffer . substring (
169- this . buffer . length - partialMatchLength ,
170- ) ;
171- } else {
172- // No partial match - all content is regular content
173- content += this . buffer ;
174- this . buffer = "" ;
175- }
176- break ;
177- }
178- }
179- }
180-
181- return { thinking, content } ;
182- }
183-
184- /**
185- * Flush any remaining content in the buffer.
186- * Call this when the stream ends.
187- */
188- flush ( ) : {
189- thinking : string ;
190- content : string ;
191- } {
192- const result = {
193- thinking : this . inThinkingBlock ? this . buffer : "" ,
194- content : this . inThinkingBlock ? "" : this . buffer ,
195- } ;
196- this . buffer = "" ;
197- this . inThinkingBlock = false ;
198- return result ;
199- }
200-
201- /**
202- * Check if the end of the text could be the start of the tag.
203- * Returns the length of the partial match, or 0 if no match.
204- */
205- private getPartialMatchLength ( text : string , tag : string ) : number {
206- for ( let i = 1 ; i < tag . length && i <= text . length ; i ++ ) {
207- if ( text . slice ( - i ) === tag . slice ( 0 , i ) ) {
208- return i ;
209- }
210- }
211- return 0 ;
212- }
213- }
214-
21587export abstract class BaseLLM implements ILLM {
21688 static providerName : string ;
21789 static defaultOptions : Partial < LLMOptions > | undefined = undefined ;
@@ -324,10 +196,6 @@ export abstract class BaseLLM implements ILLM {
324196
325197 isFromAutoDetect ?: boolean ;
326198
327- // Thinking output format options
328- thinkingOpenTag ?: string ;
329- thinkingCloseTag ?: string ;
330-
331199 lastRequestId : string | undefined ;
332200
333201 private _llmOptions : LLMOptions ;
@@ -435,10 +303,6 @@ export abstract class BaseLLM implements ILLM {
435303 this . autocompleteOptions = options . autocompleteOptions ;
436304 this . sourceFile = options . sourceFile ;
437305 this . isFromAutoDetect = options . isFromAutoDetect ;
438-
439- // Thinking output format options
440- this . thinkingOpenTag = options . thinkingOpenTag ;
441- this . thinkingCloseTag = options . thinkingCloseTag ;
442306 }
443307
444308 get contextLength ( ) {
@@ -1132,54 +996,21 @@ export abstract class BaseLLM implements ILLM {
1132996 return completionOptions ;
1133997 }
1134998
1135- // Update the processChatChunk method:
1136999 private processChatChunk (
11371000 chunk : ChatMessage ,
11381001 interaction : ILLMInteractionLog | undefined ,
1139- thinkingExtractor ?: ThinkingTagExtractor ,
11401002 ) : {
11411003 completion : string [ ] ;
11421004 thinking : string [ ] ;
11431005 usage : Usage | null ;
11441006 chunk : ChatMessage ;
1145- thinkingChunk ?: ChatMessage ;
11461007 } {
11471008 const completion : string [ ] = [ ] ;
11481009 const thinking : string [ ] = [ ] ;
11491010 let usage : Usage | null = null ;
1150- let outputChunk = chunk ;
1151- let thinkingChunk : ChatMessage | undefined ;
11521011
11531012 if ( chunk . role === "assistant" ) {
1154- // If we have a thinking extractor, process the content through it
1155- if ( thinkingExtractor && typeof chunk . content === "string" ) {
1156- const extracted = thinkingExtractor . process ( chunk . content ) ;
1157-
1158- if ( extracted . thinking ) {
1159- thinking . push ( extracted . thinking ) ;
1160- thinkingChunk = {
1161- role : "thinking" ,
1162- content : extracted . thinking ,
1163- } ;
1164- }
1165-
1166- if ( extracted . content ) {
1167- const processedChunk : ChatMessage = {
1168- ...chunk ,
1169- content : extracted . content ,
1170- } ;
1171- completion . push ( this . _formatChatMessage ( processedChunk ) ) ;
1172- outputChunk = processedChunk ;
1173- } else {
1174- // No regular content in this chunk, just thinking
1175- outputChunk = {
1176- ...chunk ,
1177- content : "" ,
1178- } ;
1179- }
1180- } else {
1181- completion . push ( this . _formatChatMessage ( chunk ) ) ;
1182- }
1013+ completion . push ( this . _formatChatMessage ( chunk ) ) ;
11831014 } else if ( chunk . role === "thinking" && typeof chunk . content === "string" ) {
11841015 thinking . push ( chunk . content ) ;
11851016 }
@@ -1197,8 +1028,7 @@ export abstract class BaseLLM implements ILLM {
11971028 completion,
11981029 thinking,
11991030 usage,
1200- chunk : outputChunk ,
1201- thinkingChunk,
1031+ chunk,
12021032 } ;
12031033 }
12041034
@@ -1332,12 +1162,6 @@ export abstract class BaseLLM implements ILLM {
13321162 let usage : Usage | undefined = undefined ;
13331163 let citations : null | string [ ] = null ;
13341164
1335- // Create thinking tag extractor if custom tags are configured
1336- const thinkingExtractor =
1337- this . thinkingOpenTag && this . thinkingCloseTag
1338- ? new ThinkingTagExtractor ( this . thinkingOpenTag , this . thinkingCloseTag )
1339- : undefined ;
1340-
13411165 try {
13421166 if ( this . templateMessages ) {
13431167 for await ( const chunk of this . _streamComplete (
@@ -1394,46 +1218,13 @@ export abstract class BaseLLM implements ILLM {
13941218 }
13951219
13961220 for await ( const chunk of iterable ) {
1397- const result = this . processChatChunk (
1398- chunk ,
1399- interaction ,
1400- thinkingExtractor ,
1401- ) ;
1221+ const result = this . processChatChunk ( chunk , interaction ) ;
14021222 completion . push ( ...result . completion ) ;
14031223 thinking . push ( ...result . thinking ) ;
14041224 if ( result . usage !== null ) {
14051225 usage = result . usage ;
14061226 }
1407- // Yield thinking chunk first if present
1408- if ( result . thinkingChunk ) {
1409- yield result . thinkingChunk ;
1410- }
1411- // Only yield the main chunk if it has content or tool calls
1412- const hasToolCalls =
1413- result . chunk . role === "assistant" &&
1414- result . chunk . toolCalls ?. length ;
1415- const hasContent =
1416- result . chunk . content &&
1417- ( typeof result . chunk . content === "string"
1418- ? result . chunk . content . length > 0
1419- : result . chunk . content . length > 0 ) ;
1420-
1421- if ( hasToolCalls || hasContent ) {
1422- yield result . chunk ;
1423- }
1424- }
1425-
1426- // Flush any remaining content from the extractor
1427- if ( thinkingExtractor ) {
1428- const flushed = thinkingExtractor . flush ( ) ;
1429- if ( flushed . thinking ) {
1430- thinking . push ( flushed . thinking ) ;
1431- yield { role : "thinking" , content : flushed . thinking } ;
1432- }
1433- if ( flushed . content ) {
1434- completion . push ( flushed . content ) ;
1435- yield { role : "assistant" , content : flushed . content } ;
1436- }
1227+ yield result . chunk ;
14371228 }
14381229 } else {
14391230 if ( logEnabled ) {
@@ -1453,46 +1244,13 @@ export abstract class BaseLLM implements ILLM {
14531244 signal ,
14541245 completionOptions ,
14551246 ) ) {
1456- const result = this . processChatChunk (
1457- chunk ,
1458- interaction ,
1459- thinkingExtractor ,
1460- ) ;
1247+ const result = this . processChatChunk ( chunk , interaction ) ;
14611248 completion . push ( ...result . completion ) ;
14621249 thinking . push ( ...result . thinking ) ;
14631250 if ( result . usage !== null ) {
14641251 usage = result . usage ;
14651252 }
1466- // Yield thinking chunk first if present
1467- if ( result . thinkingChunk ) {
1468- yield result . thinkingChunk ;
1469- }
1470- // Only yield the main chunk if it has content or tool calls
1471- const hasToolCalls =
1472- result . chunk . role === "assistant" &&
1473- result . chunk . toolCalls ?. length ;
1474- const hasContent =
1475- result . chunk . content &&
1476- ( typeof result . chunk . content === "string"
1477- ? result . chunk . content . length > 0
1478- : result . chunk . content . length > 0 ) ;
1479-
1480- if ( hasToolCalls || hasContent ) {
1481- yield result . chunk ;
1482- }
1483- }
1484-
1485- // Flush any remaining content from the extractor
1486- if ( thinkingExtractor ) {
1487- const flushed = thinkingExtractor . flush ( ) ;
1488- if ( flushed . thinking ) {
1489- thinking . push ( flushed . thinking ) ;
1490- yield { role : "thinking" , content : flushed . thinking } ;
1491- }
1492- if ( flushed . content ) {
1493- completion . push ( flushed . content ) ;
1494- yield { role : "assistant" , content : flushed . content } ;
1495- }
1253+ yield result . chunk ;
14961254 }
14971255 }
14981256 }
0 commit comments