Skip to content

Commit 9463ccc

Browse files
committed
feat(QwenChatWrapper): support discouraging the generation of thoughts
1 parent 2e1a7ce commit 9463ccc

File tree

1 file changed

+101
-41
lines changed

1 file changed

+101
-41
lines changed

src/chatWrappers/QwenChatWrapper.ts

Lines changed: 101 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import {ChatWrapper, ChatWrapperJinjaMatchConfiguration} from "../ChatWrapper.js";
22
import {
3-
ChatModelFunctions, ChatWrapperCheckModelCompatibilityParams, ChatWrapperGenerateContextStateOptions, ChatWrapperGeneratedContextState,
4-
ChatWrapperSettings, isChatModelResponseSegment
3+
ChatModelFunctions, ChatModelResponse, ChatModelSegment, ChatWrapperCheckModelCompatibilityParams,
4+
ChatWrapperGenerateContextStateOptions, ChatWrapperGeneratedContextState, ChatWrapperSettings, isChatModelResponseFunctionCall,
5+
isChatModelResponseSegment
56
} from "../types.js";
67
import {LlamaText, SpecialToken, SpecialTokensText} from "../utils/LlamaText.js";
78
import {GgufArchitectureType} from "../gguf/types/GgufMetadataTypes.js";
@@ -12,40 +13,9 @@ export class QwenChatWrapper extends ChatWrapper {
1213
public readonly wrapperName: string = "Qwen";
1314

1415
public readonly keepOnlyLastThought: boolean;
16+
public readonly thoughts: "auto" | "discourage";
1517

16-
public override readonly settings: ChatWrapperSettings = {
17-
supportsSystemMessages: true,
18-
functions: {
19-
call: {
20-
optionalPrefixSpace: true,
21-
prefix: LlamaText("\n", new SpecialTokensText("<tool_call>"), '\n{"name": "'),
22-
paramsPrefix: '", "arguments": ',
23-
suffix: LlamaText("}\n", new SpecialTokensText("</tool_call>")),
24-
emptyCallParamsPlaceholder: {}
25-
},
26-
result: {
27-
prefix: LlamaText(new SpecialTokensText("\n<tool_response>\n")),
28-
suffix: LlamaText(new SpecialTokensText("\n</tool_response>"))
29-
},
30-
parallelism: {
31-
call: {
32-
sectionPrefix: "",
33-
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n"))
34-
},
35-
result: {
36-
sectionPrefix: LlamaText(new SpecialTokensText("<|im_start|>user")),
37-
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n<|im_start|>assistant\n"))
38-
}
39-
}
40-
},
41-
segments: {
42-
reiterateStackAfterFunctionCalls: true,
43-
thought: {
44-
prefix: LlamaText(new SpecialTokensText("<think>")),
45-
suffix: LlamaText(new SpecialTokensText("</think>"))
46-
}
47-
}
48-
};
18+
public override readonly settings: ChatWrapperSettings;
4919

5020
public constructor(options: {
5121
/**
@@ -55,15 +25,70 @@ export class QwenChatWrapper extends ChatWrapper {
5525
*
5626
* Defaults to `true`.
5727
*/
58-
keepOnlyLastThought?: boolean
28+
keepOnlyLastThought?: boolean,
29+
30+
/**
31+
* Control the usage of thoughts in the model responses.
32+
*
33+
* Defaults to `"auto"`.
34+
*/
35+
thoughts?: "auto" | "discourage",
36+
37+
/** @internal */
38+
_lineBreakBeforeFunctionCallPrefix?: boolean
5939
} = {}) {
6040
super();
6141

6242
const {
63-
keepOnlyLastThought = true
43+
keepOnlyLastThought = true,
44+
thoughts = "auto",
45+
_lineBreakBeforeFunctionCallPrefix = false
6446
} = options;
6547

6648
this.keepOnlyLastThought = keepOnlyLastThought;
49+
this.thoughts = thoughts;
50+
51+
this.settings = {
52+
supportsSystemMessages: true,
53+
functions: {
54+
call: {
55+
optionalPrefixSpace: true,
56+
prefix: LlamaText([
57+
_lineBreakBeforeFunctionCallPrefix
58+
? "\n"
59+
: "",
60+
new SpecialTokensText("<tool_call>"), '\n{"name": "'
61+
]),
62+
paramsPrefix: '", "arguments": ',
63+
suffix: LlamaText("}\n", new SpecialTokensText("</tool_call>")),
64+
emptyCallParamsPlaceholder: {}
65+
},
66+
result: {
67+
prefix: LlamaText(new SpecialTokensText("\n<tool_response>\n")),
68+
suffix: LlamaText(new SpecialTokensText("\n</tool_response>"))
69+
},
70+
parallelism: {
71+
call: {
72+
sectionPrefix: "",
73+
betweenCalls: _lineBreakBeforeFunctionCallPrefix
74+
? ""
75+
: "\n",
76+
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n"))
77+
},
78+
result: {
79+
sectionPrefix: LlamaText(new SpecialTokensText("<|im_start|>user")),
80+
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n<|im_start|>assistant\n"))
81+
}
82+
}
83+
},
84+
segments: {
85+
reiterateStackAfterFunctionCalls: true,
86+
thought: {
87+
prefix: LlamaText(new SpecialTokensText("<think>")),
88+
suffix: LlamaText(new SpecialTokensText("</think>"))
89+
}
90+
}
91+
};
6792
}
6893

6994
public override generateContextState({
@@ -115,14 +140,18 @@ export class QwenChatWrapper extends ChatWrapper {
115140
} else if (item.type === "model") {
116141
flush();
117142

143+
const transformedModelResponse = (this.thoughts === "discourage" && isLastItem)
144+
? discourageThoughtsInModelResponse(item.response)
145+
: item.response;
146+
118147
currentAggregateFocus = null;
119148
modelTexts.push(
120149
this.generateModelResponseText(
121150
(this.keepOnlyLastThought && !isLastItem)
122-
? item.response.filter((response) => (
151+
? transformedModelResponse.filter((response) => (
123152
!isChatModelResponseSegment(response) || response.segmentType !== "thought"
124153
))
125-
: item.response
154+
: transformedModelResponse
126155
)
127156
);
128157
} else
@@ -204,13 +233,44 @@ export class QwenChatWrapper extends ChatWrapper {
204233
/** @internal */
205234
public static override _checkModelCompatibility(options: ChatWrapperCheckModelCompatibilityParams): boolean {
206235
const architecture = options.fileInfo?.metadata.general.architecture;
207-
return architecture == null || architecture === GgufArchitectureType.qwen2;
236+
return (
237+
architecture == null ||
238+
architecture === GgufArchitectureType.qwen2 ||
239+
architecture === GgufArchitectureType.qwen2moe ||
240+
architecture === GgufArchitectureType.qwen2vl ||
241+
architecture === GgufArchitectureType.qwen3 ||
242+
architecture === GgufArchitectureType.qwen3moe
243+
);
208244
}
209245

210246
/** @internal */
211247
public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
212248
return [
213-
[undefined, {}, {_requireFunctionCallSettingsExtraction: true}]
249+
[{}, {}, {_requireFunctionCallSettingsExtraction: true}],
250+
[{_lineBreakBeforeFunctionCallPrefix: true}, {}, {_requireFunctionCallSettingsExtraction: true}]
214251
];
215252
}
216253
}
254+
255+
function discourageThoughtsInModelResponse(response: ChatModelResponse["response"]) {
256+
const emptyThought: ChatModelSegment = {
257+
type: "segment",
258+
segmentType: "thought",
259+
ended: true,
260+
text: "\n\n",
261+
raw: LlamaText(new SpecialTokensText("<think>\n\n</think>\n\n")).toJSON()
262+
};
263+
const res: ChatModelResponse["response"] = [...response];
264+
265+
for (let i = res.length - 1; i >= 0; i--) {
266+
const item = res[i];
267+
268+
if (isChatModelResponseFunctionCall(item)) {
269+
res.splice(i + 1, 0, emptyThought);
270+
return res;
271+
}
272+
}
273+
274+
res.unshift(emptyThought);
275+
return res;
276+
}

0 commit comments

Comments
 (0)