fix: naming consistency

giladgd · giladgd · commit 06c0d60c9846 · 2025-06-04T01:14:30.000+03:00
diff --git a/docs/guide/chat-session.md b/docs/guide/chat-session.md
@@ -899,8 +899,8 @@ const fullResponse = a1.response
 console.log("Full response: " + fullResponse);
 ```
 
-## Set Thinking Budget {#thinking-budget}
-You can set a thinking budget to limit the number of tokens a thinking model can spend on [thought segments](#stream-response-segments).
+## Set Reasoning Budget {#reasoning-budget}
+You can set a reasoning budget to limit the number of tokens a thinking model can spend on [thought segments](#stream-response-segments).
 ```typescript
 import {
     getLlama, LlamaChatSession, resolveModelFile, Token
diff --git a/src/cli/commands/ChatCommand.ts b/src/cli/commands/ChatCommand.ts
@@ -62,7 +62,7 @@ type ChatCommand = {
     repeatFrequencyPenalty?: number,
     repeatPresencePenalty?: number,
     maxTokens: number,
-    thoughtBudget?: number,
+    reasoningBudget?: number,
     noHistory: boolean,
     environmentFunctions: boolean,
     tokenPredictionDraftModel?: string,
@@ -263,8 +263,8 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
                 default: 0,
                 description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
             })
-            .option("thoughtBudget", {
-                alias: ["tb", "thinkingBudget", "reasoningBudget"],
+            .option("reasoningBudget", {
+                alias: ["tb", "thinkingBudget", "thoughtsBudget"],
                 type: "number",
                 default: -1,
                 defaultDescription: "Unlimited",
@@ -326,7 +326,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
         promptFile, wrapper, noJinja, contextSize, batchSize, flashAttention, swaFullCache,
         noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK,
         topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
-        repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, thoughtBudget, noHistory,
+        repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, reasoningBudget, noHistory,
         environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, meter, timing, noMmap, printTimings
     }) {
         try {
@@ -335,7 +335,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
                 batchSize, flashAttention, swaFullCache, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads,
                 temperature, minP, topK, topP, seed,
                 gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
-                maxTokens, thoughtBudget, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize,
+                maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize,
                 debug, meter, timing, noMmap, printTimings
             });
         } catch (err) {
@@ -352,12 +352,12 @@ async function RunChat({
     contextSize, batchSize, flashAttention, swaFullCache, noTrimWhitespace, grammar: grammarArg,
     jsonSchemaGrammarFile: jsonSchemaGrammarFilePath,
     threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine,
-    repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, thoughtBudget, noHistory, environmentFunctions, tokenPredictionDraftModel,
+    repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel,
     tokenPredictionModelContextSize, debug, meter, timing, noMmap, printTimings
 }: ChatCommand) {
     if (contextSize === -1) contextSize = undefined;
     if (gpuLayers === -1) gpuLayers = undefined;
-    if (thoughtBudget === -1) thoughtBudget = undefined;
+    if (reasoningBudget === -1) reasoningBudget = undefined;
 
     const headers = resolveHeaderFlag(headerArg);
     const trimWhitespace = !noTrimWhitespace;
@@ -696,7 +696,7 @@ async function RunChat({
                 signal: abortController.signal,
                 stopOnAbortSignal: true,
                 budgets: {
-                    thoughtTokens: thoughtBudget
+                    thoughtTokens: reasoningBudget
                 },
                 repeatPenalty: {
                     penalty: repeatPenalty,
diff --git a/test/modelDependent/qwen3-0.6b/reasoningBudget.test.ts b/test/modelDependent/qwen3-0.6b/reasoningBudget.test.ts
@@ -4,8 +4,8 @@ import {getModelFile} from "../../utils/modelFiles.js";
 import {getTestLlama} from "../../utils/getTestLlama.js";
 
 describe("qwen3 0.6b", () => {
-    describe("thinking budget", () => {
-        test("doesn't exceed thinking budget", {timeout: 1000 * 60 * 60 * 2}, async () => {
+    describe("reasoning budget", () => {
+        test("doesn't exceed reasoning budget", {timeout: 1000 * 60 * 60 * 2}, async () => {
             const modelPath = await getModelFile("Qwen3-0.6B-Q8_0.gguf");
             const llama = await getTestLlama();
 
@@ -22,9 +22,9 @@ describe("qwen3 0.6b", () => {
             const initialChatHistory = chatSession.getChatHistory();
 
             async function promptWithBudget({
-                prompt, maxTokens, thinkingBudget
+                prompt, maxTokens, reasoningBudget
             }: {
-                prompt: string, maxTokens: number, thinkingBudget?: number
+                prompt: string, maxTokens: number, reasoningBudget?: number
             }) {
                 let thoughtTokens = 0;
                 let totalTokens = 0;
@@ -33,7 +33,7 @@ describe("qwen3 0.6b", () => {
                 const {responseText, response} = await chatSession.promptWithMeta(prompt, {
                     maxTokens,
                     budgets: {
-                        thoughtTokens: thinkingBudget
+                        thoughtTokens: reasoningBudget
                     },
                     onResponseChunk(chunk) {
                         if (chunk.type === "segment" && chunk.segmentType === "thought") {
@@ -57,7 +57,7 @@ describe("qwen3 0.6b", () => {
 
             const res1 = await promptWithBudget({
                 prompt: "Where do llamas come from?",
-                thinkingBudget: 10,
+                reasoningBudget: 10,
                 maxTokens: 20
             });
             expect(res1.thoughtTokens).to.be.gt(1);
@@ -67,7 +67,7 @@ describe("qwen3 0.6b", () => {
 
             const res2 = await promptWithBudget({
                 prompt: "Where do llamas come from?",
-                thinkingBudget: 0,
+                reasoningBudget: 0,
                 maxTokens: 20
             });
             expect(res2.thoughtTokens).to.be.eq(0);
@@ -76,7 +76,7 @@ describe("qwen3 0.6b", () => {
 
             const res3 = await promptWithBudget({
                 prompt: "Where do llamas come from?",
-                thinkingBudget: 20,
+                reasoningBudget: 20,
                 maxTokens: 20
             });
             expect(res3.thoughtTokens).to.be.eq(res3.totalTokens);