Skip to content

Commit e68e935

Browse files
Tanvir/add metrics for tokens (#4787)
Co-authored-by: vercel[bot] <35613825+vercel[bot]@users.noreply.github.com>
1 parent 325a67c commit e68e935

File tree

4 files changed

+74
-18
lines changed

4 files changed

+74
-18
lines changed

packages/fern-docs/search-server/ask-fern/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"ai-fallback": "^1.0.2",
4444
"es-toolkit": "^1.32.0",
4545
"gpt-tokenizer": "2.9.0",
46+
"js-tiktoken": "^1.0.21",
4647
"zod": "catalog:"
4748
},
4849
"devDependencies": {

packages/fern-docs/search-server/ask-fern/src/ask-fern/stream-anthropic.ts

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import {
3030
type TurbopufferRecord
3131
} from "../index";
3232
import { getCodeIndexName } from "../turbopuffer/utils/get-turbopuffer-namespace";
33+
import { estimateTokens, estimateTokensFromArray } from "../utils/estimate-tokens";
3334
import { runQueryTurbopuffer } from "./run-query-turbopuffer";
3435
import { MAX_QUERY_ATTEMPTS, TOP_K, TOP_K_CODE } from "./stream-constants";
3536

@@ -138,6 +139,13 @@ export async function runRouteForAnthropic({
138139
let timeToFirstToken: number | undefined = undefined;
139140
let responseText = "";
140141

142+
const initialSearchResultTokens = estimateTokensFromArray(systemPromptDocuments);
143+
let toolCallResultTokens = 0;
144+
const toolCallDocumentCounts: { documentationSearch: number; codeSearch: number } = {
145+
documentationSearch: 0,
146+
codeSearch: 0
147+
};
148+
141149
const assistantQueryId = crypto.randomUUID();
142150

143151
const uiMessageStream = createUIMessageStream({
@@ -201,19 +209,18 @@ export async function runRouteForAnthropic({
201209
documentIdsToIgnore.push(hit.id);
202210
if (url != null && !urlsToIgnore.includes(url)) {
203211
urlsToIgnore.push(url);
204-
if (hit.attributes.document.length > 20000) {
205-
response.push({
206-
...hit.attributes,
207-
document: hit.attributes.document.slice(0, 20000),
208-
url
209-
});
210-
} else {
211-
response.push({
212-
...hit.attributes,
213-
document: hit.attributes.document,
214-
url
215-
});
216-
}
212+
const document =
213+
hit.attributes.document.length > 20000
214+
? hit.attributes.document.slice(0, 20000)
215+
: hit.attributes.document;
216+
response.push({
217+
...hit.attributes,
218+
document,
219+
url
220+
});
221+
222+
toolCallResultTokens += estimateTokens(document);
223+
toolCallDocumentCounts.documentationSearch++;
217224
if (response.length >= TOP_K) {
218225
return response;
219226
}
@@ -253,13 +260,19 @@ export async function runRouteForAnthropic({
253260
];
254261
}
255262

256-
return result.map((hit) => ({
257-
...hit.attributes,
258-
document:
263+
return result.map((hit) => {
264+
const document =
259265
hit.attributes.document.length > 20000
260266
? hit.attributes.document.slice(0, 20000)
261-
: hit.attributes.document
262-
}));
267+
: hit.attributes.document;
268+
269+
toolCallResultTokens += estimateTokens(document);
270+
toolCallDocumentCounts.codeSearch++;
271+
return {
272+
...hit.attributes,
273+
document
274+
};
275+
});
263276
}
264277
})
265278
},
@@ -327,6 +340,11 @@ export async function runRouteForAnthropic({
327340
namespace: turbopufferNamespace,
328341
numToolCalls,
329342
finishReason: e.finishReason,
343+
estimatedInitialSearchResultTokens: initialSearchResultTokens,
344+
estimatedToolCallResultTokens: toolCallResultTokens,
345+
numInitialSearchResults: searchResults.length,
346+
numDocumentationSearchResults: toolCallDocumentCounts.documentationSearch,
347+
numCodeSearchResults: toolCallDocumentCounts.codeSearch,
330348
...e.usage
331349
});
332350
e.warnings?.forEach((warning) => {
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import { Tiktoken } from "js-tiktoken/lite";
2+
import cl100k_base from "js-tiktoken/ranks/cl100k_base";
3+
4+
// Initialize the tokenizer with cl100k_base encoding (used by GPT-4/GPT-3.5-turbo)
5+
// This provides a good approximation for Claude models as well
6+
const encoding = new Tiktoken(cl100k_base);
7+
8+
/**
9+
* Counts the number of tokens in a given text using tiktoken's cl100k_base encoding.
10+
* This encoding is used by GPT-4 and provides a reasonable approximation for Claude models.
11+
*
12+
* @param text - The text to count tokens for
13+
* @returns Number of tokens
14+
*/
15+
export function estimateTokens(text: string): number {
16+
return encoding.encode(text).length;
17+
}
18+
19+
/**
20+
* Counts the total number of tokens in an array of strings.
21+
*
22+
* @param texts - Array of text strings
23+
* @returns Total number of tokens
24+
*/
25+
export function estimateTokensFromArray(texts: string[]): number {
26+
return texts.reduce((total, text) => total + estimateTokens(text), 0);
27+
}

pnpm-lock.yaml

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)