Skip to content

Commit 06c0d60

Browse files
committed
fix: naming consistency
1 parent 39373a6 commit 06c0d60

File tree

3 files changed

+18
-18
lines changed

3 files changed

+18
-18
lines changed

docs/guide/chat-session.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -899,8 +899,8 @@ const fullResponse = a1.response
899899
console.log("Full response: " + fullResponse);
900900
```
901901

902-
## Set Thinking Budget {#thinking-budget}
903-
You can set a thinking budget to limit the number of tokens a thinking model can spend on [thought segments](#stream-response-segments).
902+
## Set Reasoning Budget {#reasoning-budget}
903+
You can set a reasoning budget to limit the number of tokens a thinking model can spend on [thought segments](#stream-response-segments).
904904
```typescript
905905
import {
906906
getLlama, LlamaChatSession, resolveModelFile, Token

src/cli/commands/ChatCommand.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ type ChatCommand = {
6262
repeatFrequencyPenalty?: number,
6363
repeatPresencePenalty?: number,
6464
maxTokens: number,
65-
thoughtBudget?: number,
65+
reasoningBudget?: number,
6666
noHistory: boolean,
6767
environmentFunctions: boolean,
6868
tokenPredictionDraftModel?: string,
@@ -263,8 +263,8 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
263263
default: 0,
264264
description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
265265
})
266-
.option("thoughtBudget", {
267-
alias: ["tb", "thinkingBudget", "reasoningBudget"],
266+
.option("reasoningBudget", {
267+
alias: ["tb", "thinkingBudget", "thoughtsBudget"],
268268
type: "number",
269269
default: -1,
270270
defaultDescription: "Unlimited",
@@ -326,7 +326,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
326326
promptFile, wrapper, noJinja, contextSize, batchSize, flashAttention, swaFullCache,
327327
noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK,
328328
topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
329-
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, thoughtBudget, noHistory,
329+
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, reasoningBudget, noHistory,
330330
environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, meter, timing, noMmap, printTimings
331331
}) {
332332
try {
@@ -335,7 +335,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
335335
batchSize, flashAttention, swaFullCache, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads,
336336
temperature, minP, topK, topP, seed,
337337
gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
338-
maxTokens, thoughtBudget, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize,
338+
maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize,
339339
debug, meter, timing, noMmap, printTimings
340340
});
341341
} catch (err) {
@@ -352,12 +352,12 @@ async function RunChat({
352352
contextSize, batchSize, flashAttention, swaFullCache, noTrimWhitespace, grammar: grammarArg,
353353
jsonSchemaGrammarFile: jsonSchemaGrammarFilePath,
354354
threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine,
355-
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, thoughtBudget, noHistory, environmentFunctions, tokenPredictionDraftModel,
355+
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel,
356356
tokenPredictionModelContextSize, debug, meter, timing, noMmap, printTimings
357357
}: ChatCommand) {
358358
if (contextSize === -1) contextSize = undefined;
359359
if (gpuLayers === -1) gpuLayers = undefined;
360-
if (thoughtBudget === -1) thoughtBudget = undefined;
360+
if (reasoningBudget === -1) reasoningBudget = undefined;
361361

362362
const headers = resolveHeaderFlag(headerArg);
363363
const trimWhitespace = !noTrimWhitespace;
@@ -696,7 +696,7 @@ async function RunChat({
696696
signal: abortController.signal,
697697
stopOnAbortSignal: true,
698698
budgets: {
699-
thoughtTokens: thoughtBudget
699+
thoughtTokens: reasoningBudget
700700
},
701701
repeatPenalty: {
702702
penalty: repeatPenalty,

test/modelDependent/qwen3-0.6b/thinkingBudget.test.ts renamed to test/modelDependent/qwen3-0.6b/reasoningBudget.test.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ import {getModelFile} from "../../utils/modelFiles.js";
44
import {getTestLlama} from "../../utils/getTestLlama.js";
55

66
describe("qwen3 0.6b", () => {
7-
describe("thinking budget", () => {
8-
test("doesn't exceed thinking budget", {timeout: 1000 * 60 * 60 * 2}, async () => {
7+
describe("reasoning budget", () => {
8+
test("doesn't exceed reasoning budget", {timeout: 1000 * 60 * 60 * 2}, async () => {
99
const modelPath = await getModelFile("Qwen3-0.6B-Q8_0.gguf");
1010
const llama = await getTestLlama();
1111

@@ -22,9 +22,9 @@ describe("qwen3 0.6b", () => {
2222
const initialChatHistory = chatSession.getChatHistory();
2323

2424
async function promptWithBudget({
25-
prompt, maxTokens, thinkingBudget
25+
prompt, maxTokens, reasoningBudget
2626
}: {
27-
prompt: string, maxTokens: number, thinkingBudget?: number
27+
prompt: string, maxTokens: number, reasoningBudget?: number
2828
}) {
2929
let thoughtTokens = 0;
3030
let totalTokens = 0;
@@ -33,7 +33,7 @@ describe("qwen3 0.6b", () => {
3333
const {responseText, response} = await chatSession.promptWithMeta(prompt, {
3434
maxTokens,
3535
budgets: {
36-
thoughtTokens: thinkingBudget
36+
thoughtTokens: reasoningBudget
3737
},
3838
onResponseChunk(chunk) {
3939
if (chunk.type === "segment" && chunk.segmentType === "thought") {
@@ -57,7 +57,7 @@ describe("qwen3 0.6b", () => {
5757

5858
const res1 = await promptWithBudget({
5959
prompt: "Where do llamas come from?",
60-
thinkingBudget: 10,
60+
reasoningBudget: 10,
6161
maxTokens: 20
6262
});
6363
expect(res1.thoughtTokens).to.be.gt(1);
@@ -67,7 +67,7 @@ describe("qwen3 0.6b", () => {
6767

6868
const res2 = await promptWithBudget({
6969
prompt: "Where do llamas come from?",
70-
thinkingBudget: 0,
70+
reasoningBudget: 0,
7171
maxTokens: 20
7272
});
7373
expect(res2.thoughtTokens).to.be.eq(0);
@@ -76,7 +76,7 @@ describe("qwen3 0.6b", () => {
7676

7777
const res3 = await promptWithBudget({
7878
prompt: "Where do llamas come from?",
79-
thinkingBudget: 20,
79+
reasoningBudget: 20,
8080
maxTokens: 20
8181
});
8282
expect(res3.thoughtTokens).to.be.eq(res3.totalTokens);

0 commit comments

Comments
 (0)