11import { ChatWrapper , ChatWrapperJinjaMatchConfiguration } from "../ChatWrapper.js" ;
22import {
3- ChatModelFunctions , ChatWrapperCheckModelCompatibilityParams , ChatWrapperGenerateContextStateOptions , ChatWrapperGeneratedContextState ,
4- ChatWrapperSettings , isChatModelResponseSegment
3+ ChatModelFunctions , ChatModelResponse , ChatModelSegment , ChatWrapperCheckModelCompatibilityParams ,
4+ ChatWrapperGenerateContextStateOptions , ChatWrapperGeneratedContextState , ChatWrapperSettings , isChatModelResponseFunctionCall ,
5+ isChatModelResponseSegment
56} from "../types.js" ;
67import { LlamaText , SpecialToken , SpecialTokensText } from "../utils/LlamaText.js" ;
78import { GgufArchitectureType } from "../gguf/types/GgufMetadataTypes.js" ;
@@ -12,40 +13,9 @@ export class QwenChatWrapper extends ChatWrapper {
1213 public readonly wrapperName : string = "Qwen" ;
1314
1415 public readonly keepOnlyLastThought : boolean ;
16+ public readonly thoughts : "auto" | "discourage" ;
1517
16- public override readonly settings : ChatWrapperSettings = {
17- supportsSystemMessages : true ,
18- functions : {
19- call : {
20- optionalPrefixSpace : true ,
21- prefix : LlamaText ( "\n" , new SpecialTokensText ( "<tool_call>" ) , '\n{"name": "' ) ,
22- paramsPrefix : '", "arguments": ' ,
23- suffix : LlamaText ( "}\n" , new SpecialTokensText ( "</tool_call>" ) ) ,
24- emptyCallParamsPlaceholder : { }
25- } ,
26- result : {
27- prefix : LlamaText ( new SpecialTokensText ( "\n<tool_response>\n" ) ) ,
28- suffix : LlamaText ( new SpecialTokensText ( "\n</tool_response>" ) )
29- } ,
30- parallelism : {
31- call : {
32- sectionPrefix : "" ,
33- sectionSuffix : LlamaText ( new SpecialTokensText ( "<|im_end|>\n" ) )
34- } ,
35- result : {
36- sectionPrefix : LlamaText ( new SpecialTokensText ( "<|im_start|>user" ) ) ,
37- sectionSuffix : LlamaText ( new SpecialTokensText ( "<|im_end|>\n<|im_start|>assistant\n" ) )
38- }
39- }
40- } ,
41- segments : {
42- reiterateStackAfterFunctionCalls : true ,
43- thought : {
44- prefix : LlamaText ( new SpecialTokensText ( "<think>" ) ) ,
45- suffix : LlamaText ( new SpecialTokensText ( "</think>" ) )
46- }
47- }
48- } ;
18+ public override readonly settings : ChatWrapperSettings ;
4919
5020 public constructor ( options : {
5121 /**
@@ -55,15 +25,70 @@ export class QwenChatWrapper extends ChatWrapper {
5525 *
5626 * Defaults to `true`.
5727 */
58- keepOnlyLastThought ?: boolean
28+ keepOnlyLastThought ?: boolean ,
29+
30+ /**
31+ * Control the usage of thoughts in the model responses.
32+ *
33+ * Defaults to `"auto"`.
34+ */
35+ thoughts ?: "auto" | "discourage" ,
36+
37+ /** @internal */
38+ _lineBreakBeforeFunctionCallPrefix ?: boolean
5939 } = { } ) {
6040 super ( ) ;
6141
6242 const {
63- keepOnlyLastThought = true
43+ keepOnlyLastThought = true ,
44+ thoughts = "auto" ,
45+ _lineBreakBeforeFunctionCallPrefix = false
6446 } = options ;
6547
6648 this . keepOnlyLastThought = keepOnlyLastThought ;
49+ this . thoughts = thoughts ;
50+
51+ this . settings = {
52+ supportsSystemMessages : true ,
53+ functions : {
54+ call : {
55+ optionalPrefixSpace : true ,
56+ prefix : LlamaText ( [
57+ _lineBreakBeforeFunctionCallPrefix
58+ ? "\n"
59+ : "" ,
60+ new SpecialTokensText ( "<tool_call>" ) , '\n{"name": "'
61+ ] ) ,
62+ paramsPrefix : '", "arguments": ' ,
63+ suffix : LlamaText ( "}\n" , new SpecialTokensText ( "</tool_call>" ) ) ,
64+ emptyCallParamsPlaceholder : { }
65+ } ,
66+ result : {
67+ prefix : LlamaText ( new SpecialTokensText ( "\n<tool_response>\n" ) ) ,
68+ suffix : LlamaText ( new SpecialTokensText ( "\n</tool_response>" ) )
69+ } ,
70+ parallelism : {
71+ call : {
72+ sectionPrefix : "" ,
73+ betweenCalls : _lineBreakBeforeFunctionCallPrefix
74+ ? ""
75+ : "\n" ,
76+ sectionSuffix : LlamaText ( new SpecialTokensText ( "<|im_end|>\n" ) )
77+ } ,
78+ result : {
79+ sectionPrefix : LlamaText ( new SpecialTokensText ( "<|im_start|>user" ) ) ,
80+ sectionSuffix : LlamaText ( new SpecialTokensText ( "<|im_end|>\n<|im_start|>assistant\n" ) )
81+ }
82+ }
83+ } ,
84+ segments : {
85+ reiterateStackAfterFunctionCalls : true ,
86+ thought : {
87+ prefix : LlamaText ( new SpecialTokensText ( "<think>" ) ) ,
88+ suffix : LlamaText ( new SpecialTokensText ( "</think>" ) )
89+ }
90+ }
91+ } ;
6792 }
6893
6994 public override generateContextState ( {
@@ -115,14 +140,18 @@ export class QwenChatWrapper extends ChatWrapper {
115140 } else if ( item . type === "model" ) {
116141 flush ( ) ;
117142
143+ const transformedModelResponse = ( this . thoughts === "discourage" && isLastItem )
144+ ? discourageThoughtsInModelResponse ( item . response )
145+ : item . response ;
146+
118147 currentAggregateFocus = null ;
119148 modelTexts . push (
120149 this . generateModelResponseText (
121150 ( this . keepOnlyLastThought && ! isLastItem )
122- ? item . response . filter ( ( response ) => (
151+ ? transformedModelResponse . filter ( ( response ) => (
123152 ! isChatModelResponseSegment ( response ) || response . segmentType !== "thought"
124153 ) )
125- : item . response
154+ : transformedModelResponse
126155 )
127156 ) ;
128157 } else
@@ -204,13 +233,44 @@ export class QwenChatWrapper extends ChatWrapper {
204233 /** @internal */
205234 public static override _checkModelCompatibility ( options : ChatWrapperCheckModelCompatibilityParams ) : boolean {
206235 const architecture = options . fileInfo ?. metadata . general . architecture ;
207- return architecture == null || architecture === GgufArchitectureType . qwen2 ;
236+ return (
237+ architecture == null ||
238+ architecture === GgufArchitectureType . qwen2 ||
239+ architecture === GgufArchitectureType . qwen2moe ||
240+ architecture === GgufArchitectureType . qwen2vl ||
241+ architecture === GgufArchitectureType . qwen3 ||
242+ architecture === GgufArchitectureType . qwen3moe
243+ ) ;
208244 }
209245
210246 /** @internal */
211247 public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate ( ) : ChatWrapperJinjaMatchConfiguration < typeof this > {
212248 return [
213- [ undefined , { } , { _requireFunctionCallSettingsExtraction : true } ]
249+ [ { } , { } , { _requireFunctionCallSettingsExtraction : true } ] ,
250+ [ { _lineBreakBeforeFunctionCallPrefix : true } , { } , { _requireFunctionCallSettingsExtraction : true } ]
214251 ] ;
215252 }
216253}
254+
255+ function discourageThoughtsInModelResponse ( response : ChatModelResponse [ "response" ] ) {
256+ const emptyThought : ChatModelSegment = {
257+ type : "segment" ,
258+ segmentType : "thought" ,
259+ ended : true ,
260+ text : "\n\n" ,
261+ raw : LlamaText ( new SpecialTokensText ( "<think>\n\n</think>\n\n" ) ) . toJSON ( )
262+ } ;
263+ const res : ChatModelResponse [ "response" ] = [ ...response ] ;
264+
265+ for ( let i = res . length - 1 ; i >= 0 ; i -- ) {
266+ const item = res [ i ] ;
267+
268+ if ( isChatModelResponseFunctionCall ( item ) ) {
269+ res . splice ( i + 1 , 0 , emptyThought ) ;
270+ return res ;
271+ }
272+ }
273+
274+ res . unshift ( emptyThought ) ;
275+ return res ;
276+ }
0 commit comments