Tanvir/add metrics for tokens (#4787)

tsbhangu · vercel[bot] · web-flow · commit e68e935f2df9 · 2025-11-05T08:14:49.000-07:00
Co-authored-by: vercel[bot] &lt;35613825+vercel[bot]@users.noreply.github.com&gt;
diff --git a/packages/fern-docs/search-server/ask-fern/package.json b/packages/fern-docs/search-server/ask-fern/package.json
@@ -43,6 +43,7 @@
     "ai-fallback": "^1.0.2",
     "es-toolkit": "^1.32.0",
     "gpt-tokenizer": "2.9.0",
+    "js-tiktoken": "^1.0.21",
     "zod": "catalog:"
   },
   "devDependencies": {
diff --git a/packages/fern-docs/search-server/ask-fern/src/ask-fern/stream-anthropic.ts b/packages/fern-docs/search-server/ask-fern/src/ask-fern/stream-anthropic.ts
@@ -30,6 +30,7 @@ import {
     type TurbopufferRecord
 } from "../index";
 import { getCodeIndexName } from "../turbopuffer/utils/get-turbopuffer-namespace";
+import { estimateTokens, estimateTokensFromArray } from "../utils/estimate-tokens";
 import { runQueryTurbopuffer } from "./run-query-turbopuffer";
 import { MAX_QUERY_ATTEMPTS, TOP_K, TOP_K_CODE } from "./stream-constants";
 
@@ -138,6 +139,13 @@ export async function runRouteForAnthropic({
     let timeToFirstToken: number | undefined = undefined;
     let responseText = "";
 
+    const initialSearchResultTokens = estimateTokensFromArray(systemPromptDocuments);
+    let toolCallResultTokens = 0;
+    const toolCallDocumentCounts: { documentationSearch: number; codeSearch: number } = {
+        documentationSearch: 0,
+        codeSearch: 0
+    };
+
     const assistantQueryId = crypto.randomUUID();
 
     const uiMessageStream = createUIMessageStream({
@@ -201,19 +209,18 @@ export async function runRouteForAnthropic({
                                     documentIdsToIgnore.push(hit.id);
                                     if (url != null && !urlsToIgnore.includes(url)) {
                                         urlsToIgnore.push(url);
-                                        if (hit.attributes.document.length > 20000) {
-                                            response.push({
-                                                ...hit.attributes,
-                                                document: hit.attributes.document.slice(0, 20000),
-                                                url
-                                            });
-                                        } else {
-                                            response.push({
-                                                ...hit.attributes,
-                                                document: hit.attributes.document,
-                                                url
-                                            });
-                                        }
+                                        const document =
+                                            hit.attributes.document.length > 20000
+                                                ? hit.attributes.document.slice(0, 20000)
+                                                : hit.attributes.document;
+                                        response.push({
+                                            ...hit.attributes,
+                                            document,
+                                            url
+                                        });
+
+                                        toolCallResultTokens += estimateTokens(document);
+                                        toolCallDocumentCounts.documentationSearch++;
                                         if (response.length >= TOP_K) {
                                             return response;
                                         }
@@ -253,13 +260,19 @@ export async function runRouteForAnthropic({
                                 ];
                             }
 
-                            return result.map((hit) => ({
-                                ...hit.attributes,
-                                document:
+                            return result.map((hit) => {
+                                const document =
                                     hit.attributes.document.length > 20000
                                         ? hit.attributes.document.slice(0, 20000)
-                                        : hit.attributes.document
-                            }));
+                                        : hit.attributes.document;
+
+                                toolCallResultTokens += estimateTokens(document);
+                                toolCallDocumentCounts.codeSearch++;
+                                return {
+                                    ...hit.attributes,
+                                    document
+                                };
+                            });
                         }
                     })
                 },
@@ -327,6 +340,11 @@ export async function runRouteForAnthropic({
                         namespace: turbopufferNamespace,
                         numToolCalls,
                         finishReason: e.finishReason,
+                        estimatedInitialSearchResultTokens: initialSearchResultTokens,
+                        estimatedToolCallResultTokens: toolCallResultTokens,
+                        numInitialSearchResults: searchResults.length,
+                        numDocumentationSearchResults: toolCallDocumentCounts.documentationSearch,
+                        numCodeSearchResults: toolCallDocumentCounts.codeSearch,
                         ...e.usage
                     });
                     e.warnings?.forEach((warning) => {
diff --git a/packages/fern-docs/search-server/ask-fern/src/utils/estimate-tokens.ts b/packages/fern-docs/search-server/ask-fern/src/utils/estimate-tokens.ts
@@ -0,0 +1,27 @@
+import { Tiktoken } from "js-tiktoken/lite";
+import cl100k_base from "js-tiktoken/ranks/cl100k_base";
+
+// Initialize the tokenizer with cl100k_base encoding (used by GPT-4/GPT-3.5-turbo)
+// This provides a good approximation for Claude models as well
+const encoding = new Tiktoken(cl100k_base);
+
+/**
+ * Counts the number of tokens in a given text using tiktoken's cl100k_base encoding.
+ * This encoding is used by GPT-4 and provides a reasonable approximation for Claude models.
+ *
+ * @param text - The text to count tokens for
+ * @returns Number of tokens
+ */
+export function estimateTokens(text: string): number {
+    return encoding.encode(text).length;
+}
+
+/**
+ * Counts the total number of tokens in an array of strings.
+ *
+ * @param texts - Array of text strings
+ * @returns Total number of tokens
+ */
+export function estimateTokensFromArray(texts: string[]): number {
+    return texts.reduce((total, text) => total + estimateTokens(text), 0);
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml