server/llm: finishing v0.2 API update, getting rid of "langchain" pkg and centralizing the history reconstruction

haraldschilly · haraldschilly · commit 3066f0a970c9 · 2024-05-31T20:18:57.000+02:00
diff --git a/src/packages/pnpm-lock.yaml b/src/packages/pnpm-lock.yaml
diff --git a/src/packages/server/llm/anthropic.ts b/src/packages/server/llm/anthropic.ts
@@ -4,7 +4,6 @@ import {
   MessagesPlaceholder,
 } from "@langchain/core/prompts";
 import { RunnableWithMessageHistory } from "@langchain/core/runnables";
-import { ChatMessageHistory } from "langchain/stores/message/in_memory";
 
 import getLogger from "@cocalc/backend/logger";
 import { getServerSettings } from "@cocalc/database/settings";
@@ -14,7 +13,7 @@ import {
   isAnthropicModel,
 } from "@cocalc/util/db-schema/llm-utils";
 import { ChatOutput, History } from "@cocalc/util/types/llm";
-import { AIMessage, HumanMessage } from "@langchain/core/messages";
+import { transformHistoryToMessages } from "./chat-history";
 import { numTokens } from "./chatgpt-numtokens";
 
 const log = getLogger("llm:anthropic");
@@ -93,22 +92,12 @@ export async function evaluateAnthropic(
     config: { configurable: { sessionId: "ignored" } },
     inputMessagesKey: "input",
     historyMessagesKey: "history",
-    getMessageHistory: async (_) => {
-      const chatHistory = new ChatMessageHistory();
-      if (history) {
-        let nextRole: "model" | "user" = "user";
-        for (const { content } of history) {
-          historyTokens += numTokens(content);
-          if (nextRole === "user") {
-            await chatHistory.addMessage(new HumanMessage(content));
-          } else {
-            await chatHistory.addMessage(new AIMessage(content));
-          }
-          nextRole = nextRole === "user" ? "model" : "user";
-        }
-      }
-
-      return chatHistory;
+    getMessageHistory: async () => {
+      const { messageHistory, tokens } = await transformHistoryToMessages(
+        history,
+      );
+      historyTokens = tokens;
+      return messageHistory;
     },
   });
 
diff --git a/src/packages/server/llm/chat-history.ts b/src/packages/server/llm/chat-history.ts
@@ -0,0 +1,29 @@
+import { InMemoryChatMessageHistory } from "@langchain/core/chat_history";
+import { AIMessage, HumanMessage } from "@langchain/core/messages";
+
+import { History } from "@cocalc/util/types/llm";
+import { numTokens } from "./chatgpt-numtokens";
+
+// reconstruct the chat history from CoCalc's data
+// TODO: must be robust for repeated messages from the same user and ending in an assistant message
+export async function transformHistoryToMessages(
+  history?: History,
+): Promise<{ messageHistory: InMemoryChatMessageHistory; tokens: number }> {
+  let tokens = 0;
+
+  const messageHistory = new InMemoryChatMessageHistory();
+  if (history) {
+    let nextRole: "model" | "user" = "user";
+    for (const { content } of history) {
+      tokens += numTokens(content);
+      if (nextRole === "user") {
+        await messageHistory.addMessage(new HumanMessage(content));
+      } else {
+        await messageHistory.addMessage(new AIMessage(content));
+      }
+      nextRole = nextRole === "user" ? "model" : "user";
+    }
+  }
+
+  return { messageHistory, tokens };
+}
diff --git a/src/packages/server/llm/custom-openai.ts b/src/packages/server/llm/custom-openai.ts
@@ -1,101 +1,93 @@
 import {
-    ChatPromptTemplate,
-    MessagesPlaceholder,
-  } from "@langchain/core/prompts";
-  import { RunnableWithMessageHistory } from "@langchain/core/runnables";
-  import { ChatMessageHistory } from "langchain/stores/message/in_memory";
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from "@langchain/core/prompts";
+import { RunnableWithMessageHistory } from "@langchain/core/runnables";
 
-  import getLogger from "@cocalc/backend/logger";
-  import { fromCustomOpenAIModel, isCustomOpenAI } from "@cocalc/util/db-schema/llm-utils";
-  import { ChatOutput, History } from "@cocalc/util/types/llm";
-  import { AIMessage, HumanMessage } from "@langchain/core/messages";
-  import { numTokens } from "./chatgpt-numtokens";
-  import { getCustomOpenAI } from "./client";
-
-  const log = getLogger("llm:custom_openai");
+import getLogger from "@cocalc/backend/logger";
+import {
+  fromCustomOpenAIModel,
+  isCustomOpenAI,
+} from "@cocalc/util/db-schema/llm-utils";
+import { ChatOutput, History } from "@cocalc/util/types/llm";
+import { transformHistoryToMessages } from "./chat-history";
+import { numTokens } from "./chatgpt-numtokens";
+import { getCustomOpenAI } from "./client";
 
-  // subset of ChatOptions, but model is a string
-  interface CustomOpenAIOpts {
-    input: string; // new input that user types
-    system?: string; // extra setup that we add for relevance and context
-    history?: History;
-    model: string; // this must be custom_openai-[model]
-    stream?: (output?: string) => void;
-    maxTokens?: number;
-  }
+const log = getLogger("llm:custom_openai");
 
-  export async function evaluateCustomOpenAI(
-    opts: Readonly<CustomOpenAIOpts>,
-  ): Promise<ChatOutput> {
-    if (!isCustomOpenAI(opts.model)) {
-      throw new Error(`model ${opts.model} not supported`);
-    }
-    const model = fromCustomOpenAIModel(opts.model);
-    const { system, history, input, maxTokens, stream } = opts;
-    log.debug("evaluateCustomOpenAI", {
-      input,
-      history,
-      system,
-      model,
-      stream: stream != null,
-      maxTokens,
-    });
+// subset of ChatOptions, but model is a string
+interface CustomOpenAIOpts {
+  input: string; // new input that user types
+  system?: string; // extra setup that we add for relevance and context
+  history?: History;
+  model: string; // this must be custom_openai-[model]
+  stream?: (output?: string) => void;
+  maxTokens?: number;
+}
 
-    const customOpenAI = await getCustomOpenAI(model);
+export async function evaluateCustomOpenAI(
+  opts: Readonly<CustomOpenAIOpts>,
+): Promise<ChatOutput> {
+  if (!isCustomOpenAI(opts.model)) {
+    throw new Error(`model ${opts.model} not supported`);
+  }
+  const model = fromCustomOpenAIModel(opts.model);
+  const { system, history, input, maxTokens, stream } = opts;
+  log.debug("evaluateCustomOpenAI", {
+    input,
+    history,
+    system,
+    model,
+    stream: stream != null,
+    maxTokens,
+  });
 
-    const prompt = ChatPromptTemplate.fromMessages([
-      ["system", system ?? ""],
-      new MessagesPlaceholder("chat_history"),
-      ["human", "{input}"],
-    ]);
+  const customOpenAI = await getCustomOpenAI(model);
 
-    const chain = prompt.pipe(customOpenAI);
+  const prompt = ChatPromptTemplate.fromMessages([
+    ["system", system ?? ""],
+    new MessagesPlaceholder("chat_history"),
+    ["human", "{input}"],
+  ]);
 
-    let historyTokens = 0;
+  const chain = prompt.pipe(customOpenAI);
 
-    const chainWithHistory = new RunnableWithMessageHistory({
-      runnable: chain,
-      config: { configurable: { sessionId: "ignored" } },
-      inputMessagesKey: "input",
-      historyMessagesKey: "chat_history",
-      getMessageHistory: async (_) => {
-        const chatHistory = new ChatMessageHistory();
-        if (history) {
-          let nextRole: "model" | "user" = "user";
-          for (const { content } of history) {
-            historyTokens = numTokens(content);
-            if (nextRole === "user") {
-              await chatHistory.addMessage(new HumanMessage(content));
-            } else {
-              await chatHistory.addMessage(new AIMessage(content));
-            }
-            nextRole = nextRole === "user" ? "model" : "user";
-          }
-        }
+  let historyTokens = 0;
 
-        return chatHistory;
-      },
-    });
+  const chainWithHistory = new RunnableWithMessageHistory({
+    runnable: chain,
+    config: { configurable: { sessionId: "ignored" } },
+    inputMessagesKey: "input",
+    historyMessagesKey: "chat_history",
+    getMessageHistory: async () => {
+      const { messageHistory, tokens } = await transformHistoryToMessages(
+        history,
+      );
+      historyTokens = tokens;
+      return messageHistory;
+    },
+  });
 
-    const chunks = await chainWithHistory.stream({ input });
+  const chunks = await chainWithHistory.stream({ input });
 
-    let output = "";
-    for await (const chunk of chunks) {
-      output += chunk;
-      opts.stream?.(chunk);
-    }
+  let output = "";
+  for await (const chunk of chunks) {
+    output += chunk;
+    opts.stream?.(chunk);
+  }
 
-    // and an empty call when done
-    opts.stream?.();
+  // and an empty call when done
+  opts.stream?.();
 
-    // we use that GPT3 tokenizer to get an approximate number of tokens
-    const prompt_tokens = numTokens(input) + historyTokens;
-    const completion_tokens = numTokens(output);
+  // we use that GPT3 tokenizer to get an approximate number of tokens
+  const prompt_tokens = numTokens(input) + historyTokens;
+  const completion_tokens = numTokens(output);
 
-    return {
-      output,
-      total_tokens: prompt_tokens + completion_tokens,
-      completion_tokens,
-      prompt_tokens,
-    };
-  }
+  return {
+    output,
+    total_tokens: prompt_tokens + completion_tokens,
+    completion_tokens,
+    prompt_tokens,
+  };
+}
diff --git a/src/packages/server/llm/google-genai-client.ts b/src/packages/server/llm/google-genai-client.ts
@@ -5,14 +5,12 @@
  */
 
 import { GenerativeModel, GoogleGenerativeAI } from "@google/generative-ai";
-import { AIMessage, HumanMessage } from "@langchain/core/messages";
 import {
   ChatPromptTemplate,
   MessagesPlaceholder,
 } from "@langchain/core/prompts";
 import { RunnableWithMessageHistory } from "@langchain/core/runnables";
 import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
-import { ChatMessageHistory } from "langchain/stores/message/in_memory";
 
 import getLogger from "@cocalc/backend/logger";
 import { getServerSettings } from "@cocalc/database/settings";
@@ -23,6 +21,7 @@ import {
   isGoogleModel,
 } from "@cocalc/util/db-schema/llm-utils";
 import { ChatOutput, History } from "@cocalc/util/types/llm";
+import { transformHistoryToMessages } from "./chat-history";
 
 const log = getLogger("llm:google-genai");
 
@@ -133,21 +132,9 @@ export class GoogleGenAIClient {
       config: { configurable: { sessionId: "ignored" } },
       inputMessagesKey: "input",
       historyMessagesKey: "history",
-      getMessageHistory: async (_) => {
-        const chatHistory = new ChatMessageHistory();
-        if (history) {
-          let nextRole: "model" | "user" = "user";
-          for (const { content } of history) {
-            if (nextRole === "user") {
-              await chatHistory.addMessage(new HumanMessage(content));
-            } else {
-              await chatHistory.addMessage(new AIMessage(content));
-            }
-            nextRole = nextRole === "user" ? "model" : "user";
-          }
-        }
-
-        return chatHistory;
+      getMessageHistory: async () => {
+        const { messageHistory } = await transformHistoryToMessages(history);
+        return messageHistory;
       },
     });
 
diff --git a/src/packages/server/llm/mistral.ts b/src/packages/server/llm/mistral.ts
@@ -4,13 +4,12 @@ import {
 } from "@langchain/core/prompts";
 import { RunnableWithMessageHistory } from "@langchain/core/runnables";
 import { ChatMistralAI } from "@langchain/mistralai";
-import { ChatMessageHistory } from "langchain/stores/message/in_memory";
-import { AIMessage, HumanMessage } from "@langchain/core/messages";
 
 import getLogger from "@cocalc/backend/logger";
 import { getServerSettings } from "@cocalc/database/settings";
 import { isMistralModel } from "@cocalc/util/db-schema/llm-utils";
 import { ChatOutput, History } from "@cocalc/util/types/llm";
+import { transformHistoryToMessages } from "./chat-history";
 import { numTokens } from "./chatgpt-numtokens";
 
 const log = getLogger("llm:mistral");
@@ -72,22 +71,12 @@ export async function evaluateMistral(
     config: { configurable: { sessionId: "ignored" } },
     inputMessagesKey: "input",
     historyMessagesKey: "history",
-    getMessageHistory: async (_) => {
-      const chatHistory = new ChatMessageHistory();
-      if (history) {
-        let nextRole: "model" | "user" = "user";
-        for (const { content } of history) {
-          historyTokens += numTokens(content);
-          if (nextRole === "user") {
-            await chatHistory.addMessage(new HumanMessage(content));
-          } else {
-            await chatHistory.addMessage(new AIMessage(content));
-          }
-          nextRole = nextRole === "user" ? "model" : "user";
-        }
-      }
-
-      return chatHistory;
+    getMessageHistory: async () => {
+      const { messageHistory, tokens } = await transformHistoryToMessages(
+        history,
+      );
+      historyTokens = tokens;
+      return messageHistory;
     },
   });
 
diff --git a/src/packages/server/llm/ollama.ts b/src/packages/server/llm/ollama.ts
@@ -3,12 +3,11 @@ import {
   MessagesPlaceholder,
 } from "@langchain/core/prompts";
 import { RunnableWithMessageHistory } from "@langchain/core/runnables";
-import { ChatMessageHistory } from "langchain/stores/message/in_memory";
 
 import getLogger from "@cocalc/backend/logger";
 import { fromOllamaModel, isOllamaLLM } from "@cocalc/util/db-schema/llm-utils";
 import { ChatOutput, History } from "@cocalc/util/types/llm";
-import { AIMessage, HumanMessage } from "@langchain/core/messages";
+import { transformHistoryToMessages } from "./chat-history";
 import { numTokens } from "./chatgpt-numtokens";
 import { getOllama } from "./client";
 
@@ -58,22 +57,12 @@ export async function evaluateOllama(
     config: { configurable: { sessionId: "ignored" } },
     inputMessagesKey: "input",
     historyMessagesKey: "chat_history",
-    getMessageHistory: async (_) => {
-      const chatHistory = new ChatMessageHistory();
-      if (history) {
-        let nextRole: "model" | "user" = "user";
-        for (const { content } of history) {
-          historyTokens = numTokens(content);
-          if (nextRole === "user") {
-            await chatHistory.addMessage(new HumanMessage(content));
-          } else {
-            await chatHistory.addMessage(new AIMessage(content));
-          }
-          nextRole = nextRole === "user" ? "model" : "user";
-        }
-      }
-
-      return chatHistory;
+    getMessageHistory: async () => {
+      const { messageHistory, tokens } = await transformHistoryToMessages(
+        history,
+      );
+      historyTokens = tokens;
+      return messageHistory;
     },
   });
 
diff --git a/src/packages/server/package.json b/src/packages/server/package.json
@@ -91,7 +91,6 @@
     "json-stable-stringify": "^1.0.1",
     "jwt-decode": "^3.1.2",
     "lambda-cloud-node-api": "^1.0.1",
-    "langchain": "^0.2.4",
     "libsodium-wrappers": "^0.7.13",
     "lodash": "^4.17.21",
     "lru-cache": "^7.14.1",