Skip to content

Commit bbb5d3c

Browse files
committed
feat: capture and expose thinking tokens
Properly captures and exposes thinking tokens in WCS.
1 parent 7e3b970 commit bbb5d3c

File tree

16 files changed

+90
-23
lines changed

16 files changed

+90
-23
lines changed

examples/environments/remote_env/fake-executor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ export class FakeRemoteExecutor implements Executor {
2929
outputFiles: [{code: 'angular.dev Works', filePath: 'main.ts'}],
3030
reasoning: '',
3131
errors: [],
32-
usage: {inputTokens: 0, totalTokens: 0, outputTokens: 0},
32+
usage: {inputTokens: 0, totalTokens: 0, outputTokens: 0, thinkingTokens: 0},
3333
};
3434
}
3535

report-app/src/app/pages/report-viewer/report-viewer.html

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ <h3>Usage Details</h3>
140140
Output tokens: {{ details.summary.usage.outputTokens | number }}
141141
</li>
142142

143+
@if (details.summary.usage.thinkingTokens != null) {
144+
<li class="status-badge neutral">
145+
Thinking tokens: {{ details.summary.usage.thinkingTokens | number }}
146+
</li>
147+
}
148+
143149
@if (details.summary.usage.totalTokens != null) {
144150
<li class="status-badge neutral">
145151
Total tokens: {{ details.summary.usage.totalTokens | number }}

runner/codegen/ai-sdk-runner.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,12 @@ export class AiSDKRunner implements LlmRunner {
6464
return {
6565
reasoning: response.reasoningText ?? '',
6666
text: response.text,
67-
usage: response.usage,
67+
usage: {
68+
inputTokens: response.usage.inputTokens ?? 0,
69+
outputTokens: response.usage.outputTokens ?? 0,
70+
thinkingTokens: response.usage.reasoningTokens ?? 0,
71+
totalTokens: response.usage.totalTokens ?? 0,
72+
},
6873
// TODO: Consider supporting `toolLogs` and MCP here.
6974
};
7075
}
@@ -85,7 +90,12 @@ export class AiSDKRunner implements LlmRunner {
8590
return {
8691
reasoning: response.reasoning ?? '',
8792
output: response.object,
88-
usage: response.usage,
93+
usage: {
94+
inputTokens: response.usage.inputTokens ?? 0,
95+
outputTokens: response.usage.outputTokens ?? 0,
96+
thinkingTokens: response.usage.reasoningTokens ?? 0,
97+
totalTokens: response.usage.totalTokens ?? 0,
98+
},
8999
// TODO: Consider supporting `toolLogs` and MCP here.
90100
};
91101
}

runner/codegen/genkit/genkit-runner.ts

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import {GenkitLogger} from './genkit-logger.js';
2727
import {MODEL_PROVIDERS} from './models.js';
2828
import {UserFacingError} from '../../utils/errors.js';
2929
import {GenkitModelProvider, PromptDataForCounting} from './model-provider.js';
30-
import {ToolLogEntry} from '../../shared-interfaces.js';
30+
import {ToolLogEntry, Usage} from '../../shared-interfaces.js';
3131
import {combineAbortSignals} from '../../utils/abort-signal.js';
3232
import {toToolDefinition} from 'genkit/tool';
3333

@@ -58,7 +58,12 @@ export class GenkitRunner implements LlmRunner {
5858

5959
return {
6060
output: result.output,
61-
usage: result.usage,
61+
usage: {
62+
inputTokens: result.usage.inputTokens ?? 0,
63+
outputTokens: result.usage.outputTokens ?? 0,
64+
thinkingTokens: result.usage.thoughtsTokens ?? 0,
65+
totalTokens: result.usage.totalTokens ?? 0,
66+
},
6267
reasoning: result.reasoning,
6368
};
6469
}
@@ -93,7 +98,12 @@ export class GenkitRunner implements LlmRunner {
9398

9499
return {
95100
files,
96-
usage: result.usage,
101+
usage: {
102+
inputTokens: result.usage.inputTokens ?? 0,
103+
outputTokens: result.usage.outputTokens ?? 0,
104+
thinkingTokens: result.usage.thoughtsTokens ?? 0,
105+
totalTokens: result.usage.totalTokens ?? 0,
106+
},
97107
reasoning: result.reasoning,
98108
toolLogs: this.flushToolLogs(),
99109
};
@@ -111,7 +121,12 @@ export class GenkitRunner implements LlmRunner {
111121

112122
return {
113123
text: result.text,
114-
usage: result.usage,
124+
usage: {
125+
inputTokens: result.usage.inputTokens ?? 0,
126+
outputTokens: result.usage.outputTokens ?? 0,
127+
thinkingTokens: result.usage.thoughtsTokens ?? 0,
128+
totalTokens: result.usage.totalTokens ?? 0,
129+
},
115130
reasoning: result.reasoning,
116131
toolLogs: this.flushToolLogs(),
117132
};

runner/codegen/llm-runner.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,15 +136,15 @@ export interface LocalLlmConstrainedOutputGenerateResponse<T extends z.ZodTypeAn
136136
/** Result generated by the LLM. */
137137
output: z.infer<T> | null;
138138
/** Token usage data, if available. */
139-
usage?: Partial<Usage>;
139+
usage?: Usage;
140140
/** Reasoning messages from the LLM. */
141141
reasoning: string;
142142
}
143143

144144
/** LLM response. */
145145
interface BaseLlmGenerateResponse {
146146
/** Token usage data, if available. */
147-
usage?: Partial<Usage>;
147+
usage?: Usage;
148148
/** Reasoning messages from the LLM. */
149149
reasoning: string;
150150
/** Tool requests and responses. */

runner/orchestration/build-serve-test-loop.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ export async function attemptBuildAndTest(
7878
const initialAttempt = {
7979
outputFiles: initialResponse.files,
8080
usage: {
81-
...{inputTokens: 0, outputTokens: 0, totalTokens: 0},
81+
...{inputTokens: 0, outputTokens: 0, totalTokens: 0, thinkingTokens: 0},
8282
...initialResponse.usage,
8383
},
8484
reasoning: initialResponse.reasoning,

runner/orchestration/codegen.ts

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ export async function generateCodeWithAI(
4848
inputTokens: response.usage?.inputTokens ?? 0,
4949
outputTokens: response.usage?.outputTokens ?? 0,
5050
totalTokens: response.usage?.totalTokens ?? 0,
51+
thinkingTokens: response.usage?.thinkingTokens ?? 0,
5152
};
5253
reasoning = response.reasoning;
5354
toolLogs = response.toolLogs ?? [];
@@ -65,7 +66,7 @@ export async function generateCodeWithAI(
6566

6667
success = true;
6768
} catch (error) {
68-
usage = {inputTokens: 0, outputTokens: 0, totalTokens: 0};
69+
usage = {inputTokens: 0, outputTokens: 0, totalTokens: 0, thinkingTokens: 0};
6970
success = false;
7071
reasoning = '';
7172
toolLogs = [];
@@ -161,7 +162,20 @@ export function prepareContextFilesMessage(
161162
}
162163

163164
export function createLlmResponseTokenUsageMessage(response: LlmResponse): string | null {
164-
return response.usage.inputTokens || response.usage.outputTokens || response.usage.totalTokens
165-
? `(input tokens: ${response.usage.inputTokens}, output tokens: ${response.usage.outputTokens}, total tokens: ${response.usage.totalTokens})`
166-
: null;
165+
const usage = response?.usage;
166+
if (!usage) {
167+
return null;
168+
}
169+
170+
// 2. Build the token detail string parts
171+
const input = usage.inputTokens !== undefined ? `input tokens: ${usage.inputTokens}` : '';
172+
const output = usage.outputTokens !== undefined ? `output tokens: ${usage.outputTokens}` : '';
173+
const thinking =
174+
usage.thinkingTokens !== undefined ? `thinking tokens: ${usage.thinkingTokens}` : '';
175+
const total = usage.totalTokens !== undefined ? `total tokens: ${usage.totalTokens}` : '';
176+
177+
// 3. Filter out empty strings and join with a separator
178+
const parts = [input, output, thinking, total].filter(part => part !== '');
179+
180+
return parts.length > 0 ? `(${parts.join(', ')})` : null;
167181
}

runner/orchestration/generate-initial-files.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ export async function generateInitialFiles(
5555
usage: {
5656
inputTokens: 0,
5757
outputTokens: 0,
58+
thinkingTokens: 0,
59+
totalTokens: 0,
5860
} satisfies Usage,
5961
// TODO: We could also try save/restore reasoning locally.
6062
reasoning: '',

runner/orchestration/generate-summary.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,25 @@ export async function prepareSummary(
1818
): Promise<RunSummary> {
1919
let inputTokens = 0;
2020
let outputTokens = 0;
21+
let thinkingTokens = 0;
2122
let totalTokens = 0;
2223

2324
assessments.forEach(result => {
2425
// Incorporate usage from running raters.
2526
if (result.score.tokenUsage) {
2627
inputTokens += result.score.tokenUsage.inputTokens;
2728
outputTokens += result.score.tokenUsage.outputTokens;
28-
totalTokens += result.score.tokenUsage.totalTokens ?? 0;
29+
totalTokens += result.score.tokenUsage.totalTokens;
30+
thinkingTokens += result.score.tokenUsage.thinkingTokens;
2931
}
3032

3133
// Incorporate usage numbers from all generate + build attempts.
3234
result.attemptDetails.forEach(attempt => {
3335
if (attempt.usage) {
34-
inputTokens += attempt.usage.inputTokens ?? 0;
35-
outputTokens += attempt.usage.outputTokens ?? 0;
36-
totalTokens += attempt.usage.totalTokens ?? 0;
36+
inputTokens += attempt.usage.inputTokens;
37+
outputTokens += attempt.usage.outputTokens;
38+
totalTokens += attempt.usage.totalTokens;
39+
thinkingTokens += attempt.usage.thinkingTokens;
3740
}
3841
});
3942
});
@@ -45,6 +48,7 @@ export async function prepareSummary(
4548
const result = await summarizeReportWithAI(generateAiSummaryLlm, abortSignal, assessments);
4649
inputTokens += result.usage.inputTokens;
4750
outputTokens += result.usage.outputTokens;
51+
thinkingTokens += result.usage.thinkingTokens;
4852
totalTokens += result.usage.totalTokens;
4953
aiSummary = result.responseHtml;
5054
console.log(`✅ Generated AI summary.`);
@@ -78,6 +82,7 @@ export async function prepareSummary(
7882
usage: {
7983
inputTokens,
8084
outputTokens,
85+
thinkingTokens,
8186
totalTokens,
8287
},
8388
runner: {

runner/orchestration/user-journeys.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ Create a modern, single-page web application that allows users to find recipes b
131131
inputTokens: result.usage?.inputTokens ?? 0,
132132
outputTokens: result.usage?.outputTokens ?? 0,
133133
totalTokens: result.usage?.totalTokens ?? 0,
134+
thinkingTokens: result.usage?.thinkingTokens ?? 0,
134135
},
135136
result: result.output,
136137
};

0 commit comments

Comments
 (0)