Merge pull request #769 from thecodacus/token-usage

thecodacus · web-flow · commit 78505ed2f347 · 2024-12-16T20:39:57.000+05:30
feat: Show token usage on LLM call assistant message
diff --git a/app/commit.json b/app/commit.json
@@ -1 +1,2 @@
-{ "commit": "77073a5e7f759ae8e5752628131d0c56df6b5c34" , "version": "" }
+{ "commit": "77073a5e7f759ae8e5752628131d0c56df6b5c34" , "version": "0.0.1" }
+
diff --git a/app/components/chat/AssistantMessage.tsx b/app/components/chat/AssistantMessage.tsx
@@ -1,13 +1,30 @@
 import { memo } from 'react';
 import { Markdown } from './Markdown';
+import type { JSONValue } from 'ai';
 
 interface AssistantMessageProps {
   content: string;
+  annotations?: JSONValue[];
 }
 
-export const AssistantMessage = memo(({ content }: AssistantMessageProps) => {
+export const AssistantMessage = memo(({ content, annotations }: AssistantMessageProps) => {
+  const filteredAnnotations = (annotations?.filter(
+    (annotation: JSONValue) => annotation && typeof annotation === 'object' && Object.keys(annotation).includes('type'),
+  ) || []) as { type: string; value: any }[];
+
+  const usage: {
+    completionTokens: number;
+    promptTokens: number;
+    totalTokens: number;
+  } = filteredAnnotations.find((annotation) => annotation.type === 'usage')?.value;
+
   return (
     <div className="overflow-hidden w-full">
+      {usage && (
+        <div className="text-sm text-bolt-elements-textSecondary mb-2">
+          Tokens: {usage.totalTokens} (prompt: {usage.promptTokens}, completion: {usage.completionTokens})
+        </div>
+      )}
       <Markdown html>{content}</Markdown>
     </div>
   );
diff --git a/app/components/chat/Chat.client.tsx b/app/components/chat/Chat.client.tsx
@@ -116,13 +116,22 @@ export const ChatImpl = memo(
         apiKeys,
         files,
       },
+      sendExtraMessageFields: true,
       onError: (error) => {
         logger.error('Request failed\n\n', error);
         toast.error(
           'There was an error processing your request: ' + (error.message ? error.message : 'No details were returned'),
         );
       },
-      onFinish: () => {
+      onFinish: (message, response) => {
+        const usage = response.usage;
+
+        if (usage) {
+          console.log('Token usage:', usage);
+
+          // You can now use the usage data as needed
+        }
+
         logger.debug('Finished streaming');
       },
       initialMessages,
diff --git a/app/components/chat/Messages.client.tsx b/app/components/chat/Messages.client.tsx
@@ -65,7 +65,11 @@ export const Messages = React.forwardRef<HTMLDivElement, MessagesProps>((props:
                   </div>
                 )}
                 <div className="grid grid-col-1 w-full">
-                  {isUserMessage ? <UserMessage content={content} /> : <AssistantMessage content={content} />}
+                  {isUserMessage ? (
+                    <UserMessage content={content} />
+                  ) : (
+                    <AssistantMessage content={content} annotations={message.annotations} />
+                  )}
                 </div>
                 {!isUserMessage && (
                   <div className="flex gap-2 flex-col lg:flex-row">
diff --git a/app/components/chat/UserMessage.tsx b/app/components/chat/UserMessage.tsx
@@ -12,42 +12,36 @@ interface UserMessageProps {
 export function UserMessage({ content }: UserMessageProps) {
   if (Array.isArray(content)) {
     const textItem = content.find((item) => item.type === 'text');
-    const textContent = sanitizeUserMessage(textItem?.text || '');
+    const textContent = stripMetadata(textItem?.text || '');
     const images = content.filter((item) => item.type === 'image' && item.image);
 
     return (
       <div className="overflow-hidden pt-[4px]">
-        <div className="flex items-start gap-4">
-          <div className="flex-1">
-            <Markdown limitedMarkdown>{textContent}</Markdown>
-          </div>
-          {images.length > 0 && (
-            <div className="flex-shrink-0 w-[160px]">
-              {images.map((item, index) => (
-                <div key={index} className="relative">
-                  <img
-                    src={item.image}
-                    alt={`Uploaded image ${index + 1}`}
-                    className="w-full h-[160px] rounded-lg object-cover border border-bolt-elements-borderColor"
-                  />
-                </div>
-              ))}
-            </div>
-          )}
+        <div className="flex flex-col gap-4">
+          {textContent && <Markdown html>{textContent}</Markdown>}
+          {images.map((item, index) => (
+            <img
+              key={index}
+              src={item.image}
+              alt={`Image ${index + 1}`}
+              className="max-w-full h-auto rounded-lg"
+              style={{ maxHeight: '512px', objectFit: 'contain' }}
+            />
+          ))}
         </div>
       </div>
     );
   }
 
-  const textContent = sanitizeUserMessage(content);
+  const textContent = stripMetadata(content);
 
   return (
     <div className="overflow-hidden pt-[4px]">
-      <Markdown limitedMarkdown>{textContent}</Markdown>
+      <Markdown html>{textContent}</Markdown>
     </div>
   );
 }
 
-function sanitizeUserMessage(content: string) {
+function stripMetadata(content: string) {
   return content.replace(MODEL_REGEX, '').replace(PROVIDER_REGEX, '');
 }
diff --git a/app/components/settings/features/FeaturesTab.tsx b/app/components/settings/features/FeaturesTab.tsx
@@ -3,8 +3,8 @@ import { Switch } from '~/components/ui/Switch';
 import { useSettings } from '~/lib/hooks/useSettings';
 
 export default function FeaturesTab() {
-  const { debug, enableDebugMode, isLocalModel, enableLocalModels, enableEventLogs, latestBranch, enableLatestBranch } =
-    useSettings();
+
+  const { debug, enableDebugMode, isLocalModel, enableLocalModels, enableEventLogs, latestBranch, enableLatestBranch } = useSettings();
 
   const handleToggle = (enabled: boolean) => {
     enableDebugMode(enabled);
diff --git a/app/lib/hooks/useSettings.tsx b/app/lib/hooks/useSettings.tsx
@@ -99,7 +99,6 @@ export function useSettings() {
     if (checkCommit === undefined) {
       checkCommit = commit.commit;
     }
-
     if (savedLatestBranch === undefined || checkCommit !== commit.commit) {
       // If setting hasn't been set by user, check version
       checkIsStableVersion().then((isStable) => {
diff --git a/app/routes/api.chat.ts b/app/routes/api.chat.ts
@@ -1,4 +1,5 @@
 import { type ActionFunctionArgs } from '@remix-run/cloudflare';
+import { createDataStream } from 'ai';
 import { MAX_RESPONSE_SEGMENTS, MAX_TOKENS } from '~/lib/.server/llm/constants';
 import { CONTINUE_PROMPT } from '~/lib/.server/llm/prompts';
 import { streamText, type Messages, type StreamingOptions } from '~/lib/.server/llm/stream-text';
@@ -9,17 +10,15 @@ export async function action(args: ActionFunctionArgs) {
   return chatAction(args);
 }
 
-function parseCookies(cookieHeader: string) {
-  const cookies: any = {};
+function parseCookies(cookieHeader: string): Record<string, string> {
+  const cookies: Record<string, string> = {};
 
-  // Split the cookie string by semicolons and spaces
   const items = cookieHeader.split(';').map((cookie) => cookie.trim());
 
   items.forEach((item) => {
     const [name, ...rest] = item.split('=');
 
     if (name && rest) {
-      // Decode the name and value, and join value parts in case it contains '='
       const decodedName = decodeURIComponent(name.trim());
       const decodedValue = decodeURIComponent(rest.join('=').trim());
       cookies[decodedName] = decodedValue;
@@ -36,21 +35,49 @@ async function chatAction({ context, request }: ActionFunctionArgs) {
   }>();
 
   const cookieHeader = request.headers.get('Cookie');
-
-  // Parse the cookie's value (returns an object or null if no cookie exists)
   const apiKeys = JSON.parse(parseCookies(cookieHeader || '').apiKeys || '{}');
   const providerSettings: Record<string, IProviderSetting> = JSON.parse(
     parseCookies(cookieHeader || '').providers || '{}',
   );
 
   const stream = new SwitchableStream();
 
+  const cumulativeUsage = {
+    completionTokens: 0,
+    promptTokens: 0,
+    totalTokens: 0,
+  };
+
   try {
     const options: StreamingOptions = {
       toolChoice: 'none',
-      onFinish: async ({ text: content, finishReason }) => {
+      onFinish: async ({ text: content, finishReason, usage }) => {
+        console.log('usage', usage);
+
+        if (usage) {
+          cumulativeUsage.completionTokens += usage.completionTokens || 0;
+          cumulativeUsage.promptTokens += usage.promptTokens || 0;
+          cumulativeUsage.totalTokens += usage.totalTokens || 0;
+        }
+
         if (finishReason !== 'length') {
-          return stream.close();
+          return stream
+            .switchSource(
+              createDataStream({
+                async execute(dataStream) {
+                  dataStream.writeMessageAnnotation({
+                    type: 'usage',
+                    value: {
+                      completionTokens: cumulativeUsage.completionTokens,
+                      promptTokens: cumulativeUsage.promptTokens,
+                      totalTokens: cumulativeUsage.totalTokens,
+                    },
+                  });
+                },
+                onError: (error: any) => `Custom error: ${error.message}`,
+              }),
+            )
+            .then(() => stream.close());
         }
 
         if (stream.switches >= MAX_RESPONSE_SEGMENTS) {
@@ -73,7 +100,7 @@ async function chatAction({ context, request }: ActionFunctionArgs) {
           providerSettings,
         });
 
-        return stream.switchSource(result.toAIStream());
+        return stream.switchSource(result.toDataStream());
       },
     };
 
@@ -86,7 +113,7 @@ async function chatAction({ context, request }: ActionFunctionArgs) {
       providerSettings,
     });
 
-    stream.switchSource(result.toAIStream());
+    stream.switchSource(result.toDataStream());
 
     return new Response(stream.readable, {
       status: 200,
@@ -95,7 +122,7 @@ async function chatAction({ context, request }: ActionFunctionArgs) {
       },
     });
   } catch (error: any) {
-    console.log(error);
+    console.error(error);
 
     if (error.message?.includes('API key')) {
       throw new Response('Invalid or missing API key', {
diff --git a/app/routes/api.enhancer.ts b/app/routes/api.enhancer.ts
@@ -1,5 +1,6 @@
 import { type ActionFunctionArgs } from '@remix-run/cloudflare';
-import { StreamingTextResponse, parseStreamPart } from 'ai';
+
+//import { StreamingTextResponse, parseStreamPart } from 'ai';
 import { streamText } from '~/lib/.server/llm/stream-text';
 import { stripIndents } from '~/utils/stripIndent';
 import type { IProviderSetting, ProviderInfo } from '~/types/model';
@@ -73,32 +74,32 @@ async function enhancerAction({ context, request }: ActionFunctionArgs) {
             `[Model: ${model}]\n\n[Provider: ${providerName}]\n\n` +
             stripIndents`
             You are a professional prompt engineer specializing in crafting precise, effective prompts.
-          Your task is to enhance prompts by making them more specific, actionable, and effective.
-
-          I want you to improve the user prompt that is wrapped in \`<original_prompt>\` tags.
-
-          For valid prompts:
-          - Make instructions explicit and unambiguous
-          - Add relevant context and constraints
-          - Remove redundant information
-          - Maintain the core intent
-          - Ensure the prompt is self-contained
-          - Use professional language
-
-          For invalid or unclear prompts:
-          - Respond with a clear, professional guidance message
-          - Keep responses concise and actionable
-          - Maintain a helpful, constructive tone
-          - Focus on what the user should provide
-          - Use a standard template for consistency
-
-          IMPORTANT: Your response must ONLY contain the enhanced prompt text.
-          Do not include any explanations, metadata, or wrapper tags.
-
-          <original_prompt>
-            ${message}
-          </original_prompt>
-        `,
+            Your task is to enhance prompts by making them more specific, actionable, and effective.
+
+            I want you to improve the user prompt that is wrapped in \`<original_prompt>\` tags.
+
+            For valid prompts:
+            - Make instructions explicit and unambiguous
+            - Add relevant context and constraints
+            - Remove redundant information
+            - Maintain the core intent
+            - Ensure the prompt is self-contained
+            - Use professional language
+
+            For invalid or unclear prompts:
+            - Respond with clear, professional guidance
+            - Keep responses concise and actionable
+            - Maintain a helpful, constructive tone
+            - Focus on what the user should provide
+            - Use a standard template for consistency
+
+            IMPORTANT: Your response must ONLY contain the enhanced prompt text.
+            Do not include any explanations, metadata, or wrapper tags.
+
+            <original_prompt>
+              ${message}
+            </original_prompt>
+          `,
         },
       ],
       env: context.cloudflare.env,
@@ -113,7 +114,7 @@ async function enhancerAction({ context, request }: ActionFunctionArgs) {
 
         for (const line of lines) {
           try {
-            const parsed = parseStreamPart(line);
+            const parsed = JSON.parse(line);
 
             if (parsed.type === 'text') {
               controller.enqueue(encoder.encode(parsed.value));
@@ -128,7 +129,12 @@ async function enhancerAction({ context, request }: ActionFunctionArgs) {
 
     const transformedStream = result.toDataStream().pipeThrough(transformStream);
 
-    return new StreamingTextResponse(transformedStream);
+    return new Response(transformedStream, {
+      status: 200,
+      headers: {
+        'Content-Type': 'text/plain; charset=utf-8',
+      },
+    });
   } catch (error: unknown) {
     console.log(error);
 
diff --git a/package.json b/package.json
@@ -73,7 +73,7 @@
     "@xterm/addon-fit": "^0.10.0",
     "@xterm/addon-web-links": "^0.11.0",
     "@xterm/xterm": "^5.5.0",
-    "ai": "^3.4.33",
+    "ai": "^4.0.13",
     "date-fns": "^3.6.0",
     "diff": "^5.2.0",
     "file-saver": "^2.0.5",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-{ "commit": "77073a5e7f759ae8e5752628131d0c56df6b5c34" , "version": "" }`
	`1`	`+{ "commit": "77073a5e7f759ae8e5752628131d0c56df6b5c34" , "version": "0.0.1" }`
	`2`	`+`
Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,6 @@ export function useSettings() {`
`99`	`99`	`if (checkCommit === undefined) {`
`100`	`100`	`checkCommit = commit.commit;`
`101`	`101`	`}`
`102`		`-`
`103`	`102`	`if (savedLatestBranch === undefined \|\| checkCommit !== commit.commit) {`
`104`	`103`	`// If setting hasn't been set by user, check version`
`105`	`104`	`checkIsStableVersion().then((isStable) => {`