feat(participant): filter message history when it goes over maxInputTokens VSCODE-653 (#894)

gagik · web-flow · commit 07ebbd06486b · 2024-12-09T15:39:29.000+01:00
diff --git a/src/participant/participant.ts b/src/participant/participant.ts
@@ -1577,7 +1577,7 @@ export default class ParticipantController {
       log.info('Docs chatbot created for chatId', chatId);
     }
 
-    const history = PromptHistory.getFilteredHistoryForDocs({
+    const history = await PromptHistory.getFilteredHistoryForDocs({
       connectionNames: this._getConnectionNames(),
       context: context,
     });
diff --git a/src/participant/prompts/promptBase.ts b/src/participant/prompts/promptBase.ts
@@ -5,6 +5,7 @@ import type {
   ParticipantPromptProperties,
 } from '../../telemetry/telemetryService';
 import { PromptHistory } from './promptHistory';
+import { getCopilotModel } from '../model';
 import type { ParticipantCommandType } from '../participantTypes';
 
 export interface PromptArgsBase {
@@ -94,34 +95,76 @@ export function isContentEmpty(
   return true;
 }
 
-export abstract class PromptBase<TArgs extends PromptArgsBase> {
-  protected abstract getAssistantPrompt(args: TArgs): string;
+export abstract class PromptBase<PromptArgs extends PromptArgsBase> {
+  protected abstract getAssistantPrompt(args: PromptArgs): string;
 
   protected get internalPurposeForTelemetry(): InternalPromptPurpose {
     return undefined;
   }
 
-  protected getUserPrompt(args: TArgs): Promise<UserPromptResponse> {
+  protected getUserPrompt({
+    request,
+  }: PromptArgs): Promise<UserPromptResponse> {
     return Promise.resolve({
-      prompt: args.request.prompt,
+      prompt: request.prompt,
       hasSampleDocs: false,
     });
   }
 
-  async buildMessages(args: TArgs): Promise<ModelInput> {
-    let historyMessages = PromptHistory.getFilteredHistory({
-      history: args.context?.history,
-      ...args,
+  private async _countRemainingTokens({
+    model,
+    assistantPrompt,
+    requestPrompt,
+  }: {
+    model: vscode.LanguageModelChat | undefined;
+    assistantPrompt: vscode.LanguageModelChatMessage;
+    requestPrompt: string;
+  }): Promise<number | undefined> {
+    if (model) {
+      const [assistantPromptTokens, userPromptTokens] = await Promise.all([
+        model.countTokens(assistantPrompt),
+        model.countTokens(requestPrompt),
+      ]);
+      return model.maxInputTokens - (assistantPromptTokens + userPromptTokens);
+    }
+    return undefined;
+  }
+
+  async buildMessages(args: PromptArgs): Promise<ModelInput> {
+    const { context, request, databaseName, collectionName, connectionNames } =
+      args;
+
+    const model = await getCopilotModel();
+
+    // eslint-disable-next-line new-cap
+    const assistantPrompt = vscode.LanguageModelChatMessage.Assistant(
+      this.getAssistantPrompt(args)
+    );
+
+    const tokenLimit = await this._countRemainingTokens({
+      model,
+      assistantPrompt,
+      requestPrompt: request.prompt,
+    });
+
+    let historyMessages = await PromptHistory.getFilteredHistory({
+      history: context?.history,
+      model,
+      tokenLimit,
+      namespaceIsKnown:
+        databaseName !== undefined && collectionName !== undefined,
+      connectionNames,
     });
+
     // If the current user's prompt is a connection name, and the last
     // message was to connect. We want to use the last
     // message they sent before the connection name as their prompt.
-    if (args.connectionNames?.includes(args.request.prompt)) {
-      const history = args.context?.history;
+    if (connectionNames?.includes(request.prompt)) {
+      const history = context?.history;
       if (!history) {
         return {
           messages: [],
-          stats: this.getStats([], args, false),
+          stats: this.getStats([], { request, context }, false),
         };
       }
       const previousResponse = history[
@@ -132,13 +175,11 @@ export abstract class PromptBase<TArgs extends PromptArgsBase> {
         // Go through the history in reverse order to find the last user message.
         for (let i = history.length - 1; i >= 0; i--) {
           if (history[i] instanceof vscode.ChatRequestTurn) {
+            request.prompt = (history[i] as vscode.ChatRequestTurn).prompt;
             // Rewrite the arguments so that the prompt is the last user message from history
             args = {
               ...args,
-              request: {
-                ...args.request,
-                prompt: (history[i] as vscode.ChatRequestTurn).prompt,
-              },
+              request,
             };
 
             // Remove the item from the history messages array.
@@ -150,23 +191,20 @@ export abstract class PromptBase<TArgs extends PromptArgsBase> {
     }
 
     const { prompt, hasSampleDocs } = await this.getUserPrompt(args);
-    const messages = [
-      // eslint-disable-next-line new-cap
-      vscode.LanguageModelChatMessage.Assistant(this.getAssistantPrompt(args)),
-      ...historyMessages,
-      // eslint-disable-next-line new-cap
-      vscode.LanguageModelChatMessage.User(prompt),
-    ];
+    // eslint-disable-next-line new-cap
+    const userPrompt = vscode.LanguageModelChatMessage.User(prompt);
+
+    const messages = [assistantPrompt, ...historyMessages, userPrompt];
 
     return {
       messages,
-      stats: this.getStats(messages, args, hasSampleDocs),
+      stats: this.getStats(messages, { request, context }, hasSampleDocs),
     };
   }
 
   protected getStats(
     messages: vscode.LanguageModelChatMessage[],
-    { request, context }: TArgs,
+    { request, context }: Pick<PromptArgsBase, 'request' | 'context'>,
     hasSampleDocs: boolean
   ): ParticipantPromptProperties {
     return {
diff --git a/src/participant/prompts/promptHistory.ts b/src/participant/prompts/promptHistory.ts
@@ -106,26 +106,28 @@ export class PromptHistory {
   /** When passing the history to the model we only want contextual messages
   to be passed. This function parses through the history and returns
   the messages that are valuable to keep. */
-  static getFilteredHistory({
+  static async getFilteredHistory({
+    model,
+    tokenLimit,
     connectionNames,
     history,
-    databaseName,
-    collectionName,
+    namespaceIsKnown,
   }: {
+    model?: vscode.LanguageModelChat | undefined;
+    tokenLimit?: number;
     connectionNames?: string[]; // Used to scrape the connecting messages from the history.
     history?: vscode.ChatContext['history'];
-    databaseName?: string;
-    collectionName?: string;
-  }): vscode.LanguageModelChatMessage[] {
+    namespaceIsKnown: boolean;
+  }): Promise<vscode.LanguageModelChatMessage[]> {
     const messages: vscode.LanguageModelChatMessage[] = [];
 
     if (!history) {
       return [];
     }
 
-    const namespaceIsKnown =
-      databaseName !== undefined && collectionName !== undefined;
-    for (let i = 0; i < history.length; i++) {
+    let totalUsedTokens = 0;
+
+    for (let i = history.length - 1; i >= 0; i--) {
       const currentTurn = history[i];
 
       let addedMessage: vscode.LanguageModelChatMessage | undefined;
@@ -147,16 +149,23 @@ export class PromptHistory {
         });
       }
       if (addedMessage) {
+        if (tokenLimit) {
+          totalUsedTokens += (await model?.countTokens(addedMessage)) || 0;
+          if (totalUsedTokens > tokenLimit) {
+            break;
+          }
+        }
+
         messages.push(addedMessage);
       }
     }
 
-    return messages;
+    return messages.reverse();
   }
 
   /** The docs chatbot keeps its own history so we avoid any
    * we need to include history only since last docs message. */
-  static getFilteredHistoryForDocs({
+  static async getFilteredHistoryForDocs({
     connectionNames,
     context,
     databaseName,
@@ -166,7 +175,7 @@ export class PromptHistory {
     context?: vscode.ChatContext;
     databaseName?: string;
     collectionName?: string;
-  }): vscode.LanguageModelChatMessage[] {
+  }): Promise<vscode.LanguageModelChatMessage[]> {
     if (!context) {
       return [];
     }
@@ -192,8 +201,8 @@ export class PromptHistory {
     return this.getFilteredHistory({
       connectionNames,
       history: historySinceLastDocs.reverse(),
-      databaseName,
-      collectionName,
+      namespaceIsKnown:
+        databaseName !== undefined && collectionName !== undefined,
     });
   }
 }
diff --git a/src/participant/sampleDocuments.ts b/src/participant/sampleDocuments.ts
@@ -59,9 +59,11 @@ export async function getStringifiedSampleDocuments({
 
   const stringifiedDocuments = toJSString(additionToPrompt);
 
-  // TODO: model.countTokens will sometimes return undefined - at least in tests. We should investigate why.
-  promptInputTokens =
-    (await model.countTokens(prompt + stringifiedDocuments)) || 0;
+  // Re-evaluate promptInputTokens with less documents if necessary.
+  if (promptInputTokens > model.maxInputTokens) {
+    promptInputTokens =
+      (await model.countTokens(prompt + stringifiedDocuments)) || 0;
+  }
 
   // Add sample documents to the prompt only when it fits in the context window.
   if (promptInputTokens <= model.maxInputTokens) {
diff --git a/src/test/suite/participant/participant.test.ts b/src/test/suite/participant/participant.test.ts

Original file line number	Diff line number	Diff line change
`@@ -1577,7 +1577,7 @@ export default class ParticipantController {`
`1577`	`1577`	`log.info('Docs chatbot created for chatId', chatId);`
`1578`	`1578`	`}`
`1579`	`1579`
`1580`		`- const history = PromptHistory.getFilteredHistoryForDocs({`
	`1580`	`+ const history = await PromptHistory.getFilteredHistoryForDocs({`
`1581`	`1581`	`connectionNames: this._getConnectionNames(),`
`1582`	`1582`	`context: context,`
`1583`	`1583`	`});`