Fix/chat context (#3)

ido-pluto · web-flow · commit ed4debaadf39 · 2023-08-16T18:50:31.000Z
diff --git a/src/cli/commands/ChatCommand.ts b/src/cli/commands/ChatCommand.ts
@@ -60,14 +60,14 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
 
 
 async function RunChat({model: modelArg, systemInfo, systemPrompt, wrapper}: ChatCommand) {
-    const {LlamaChatSession} = await import("../../LlamaChatSession.js");
-    const {LlamaModel} = await import("../../LlamaModel.js");
+    const {LlamaChatSession} = await import("../../llamaEvaluator/LlamaChatSession.js");
+    const {LlamaModel} = await import("../../llamaEvaluator/LlamaModel.js");
 
     const model = new LlamaModel({
         modelPath: modelArg
     });
     const session = new LlamaChatSession({
-        model,
+        context: model.createContext(),
         printLLamaSystemInfo: systemInfo,
         systemPrompt,
         promptWrapper: createChatWrapper(wrapper)
@@ -102,7 +102,7 @@ async function RunChat({model: modelArg, systemInfo, systemPrompt, wrapper}: Cha
 
         process.stdout.write(startColor);
         await session.prompt(input, (chunk) => {
-            process.stdout.write(model.decode(Uint32Array.from(chunk)));
+            process.stdout.write(session.context.decode(Uint32Array.from(chunk)));
         });
         process.stdout.write(endColor);
         console.log();
diff --git a/src/index.ts b/src/index.ts
@@ -1,13 +1,16 @@
-import {LlamaChatSession} from "./LlamaChatSession.js";
-import {LlamaModel} from "./LlamaModel.js";
+import {LlamaChatSession} from "./llamaEvaluator/LlamaChatSession.js";
+import {LlamaModel} from "./llamaEvaluator/LlamaModel.js";
 import {AbortError} from "./AbortError.js";
 import {ChatPromptWrapper} from "./ChatPromptWrapper.js";
 import {EmptyChatPromptWrapper} from "./chatWrappers/EmptyChatPromptWrapper.js";
 import {LlamaChatPromptWrapper} from "./chatWrappers/LlamaChatPromptWrapper.js";
 import {GeneralChatPromptWrapper} from "./chatWrappers/GeneralChatPromptWrapper.js";
+import {LlamaContext} from "./llamaEvaluator/LlamaContext.js";
+
 
 export {
     LlamaModel,
+    LlamaContext,
     LlamaChatSession,
     AbortError,
     ChatPromptWrapper,
diff --git a/src/llamaEvaluator/LlamaBins.ts b/src/llamaEvaluator/LlamaBins.ts
@@ -0,0 +1,6 @@
+import {loadBin, type LLAMAModel, type LLAMAContext} from "../utils/getBin.js";
+
+export const llamaCppNode = await loadBin();
+const {LLAMAModel, LLAMAContext} = llamaCppNode;
+
+export {LLAMAModel, LLAMAContext};
diff --git a/src/llamaEvaluator/LlamaChatSession.ts b/src/llamaEvaluator/LlamaChatSession.ts
@@ -1,32 +1,33 @@
-import {defaultChatSystemPrompt} from "./config.js";
-import {withLock} from "./utils/withLock.js";
+import {defaultChatSystemPrompt} from "../config.js";
+import {withLock} from "../utils/withLock.js";
+import {ChatPromptWrapper} from "../ChatPromptWrapper.js";
+import {AbortError} from "../AbortError.js";
+import {GeneralChatPromptWrapper} from "../chatWrappers/GeneralChatPromptWrapper.js";
 import {LlamaModel} from "./LlamaModel.js";
-import {ChatPromptWrapper} from "./ChatPromptWrapper.js";
-import {AbortError} from "./AbortError.js";
-import {GeneralChatPromptWrapper} from "./chatWrappers/GeneralChatPromptWrapper.js";
+import {LlamaContext} from "./LlamaContext.js";
 
 const UNKNOWN_UNICODE_CHAR = "�";
 
 export class LlamaChatSession {
-    private readonly _model: LlamaModel;
     private readonly _systemPrompt: string;
     private readonly _printLLamaSystemInfo: boolean;
     private readonly _promptWrapper: ChatPromptWrapper;
     private _promptIndex: number = 0;
     private _initialized: boolean = false;
+    private readonly _ctx: LlamaContext;
 
     public constructor({
-        model,
+        context,
         printLLamaSystemInfo = false,
         promptWrapper = new GeneralChatPromptWrapper(),
         systemPrompt = defaultChatSystemPrompt
     }: {
-        model: LlamaModel,
+        context: LlamaContext,
         printLLamaSystemInfo?: boolean,
         promptWrapper?: ChatPromptWrapper,
         systemPrompt?: string,
     }) {
-        this._model = model;
+        this._ctx = context;
         this._printLLamaSystemInfo = printLLamaSystemInfo;
         this._promptWrapper = promptWrapper;
 
@@ -37,8 +38,8 @@ export class LlamaChatSession {
         return this._initialized;
     }
 
-    public get model() {
-        return this._model;
+    public get context() {
+        return this._ctx;
     }
 
     public async init() {
@@ -47,7 +48,7 @@ export class LlamaChatSession {
                 return;
 
             if (this._printLLamaSystemInfo)
-                console.log("Llama system info", this._model.systemInfo);
+                console.log("Llama system info", LlamaModel.systemInfo);
 
             this._initialized = true;
         });
@@ -61,20 +62,20 @@ export class LlamaChatSession {
             const promptText = this._promptWrapper.wrapPrompt(prompt, {systemPrompt: this._systemPrompt, promptIndex: this._promptIndex});
             this._promptIndex++;
 
-            return await this._evalTokens(this._model.encode(promptText), onToken, {signal});
+            return await this._evalTokens(this._ctx.encode(promptText), onToken, {signal});
         });
     }
 
     private async _evalTokens(tokens: Uint32Array, onToken?: (tokens: number[]) => void, {signal}: { signal?: AbortSignal } = {}) {
-        const decodeTokens = (tokens: number[]) => this._model.decode(Uint32Array.from(tokens));
+        const decodeTokens = (tokens: number[]) => this._ctx.decode(Uint32Array.from(tokens));
 
         const stopStrings = this._promptWrapper.getStopStrings();
         const stopStringIndexes = Array(stopStrings.length).fill(0);
         const skippedChunksQueue: number[] = [];
         const res: number[] = [];
 
 
-        for await (const chunk of this._model.evaluate(tokens)) {
+        for await (const chunk of this._ctx.evaluate(tokens)) {
             if (signal?.aborted)
                 throw new AbortError();
 
diff --git a/src/llamaEvaluator/LlamaContext.ts b/src/llamaEvaluator/LlamaContext.ts
@@ -1,27 +1,16 @@
-import {loadBin, type LLAMAModel, type LLAMAContext} from "./utils/getBin.js";
+import {LLAMAContext, llamaCppNode} from "./LlamaBins.js";
 
-const llamaCppNode = await loadBin();
-const {LLAMAModel, LLAMAContext} = llamaCppNode;
-
-export class LlamaModel {
-    private readonly _model: LLAMAModel;
+type LlamaContextConstructorParameters = {prependBos: boolean, ctx: LLAMAContext};
+export class LlamaContext {
     private readonly _ctx: LLAMAContext;
     private _prependBos: boolean;
 
-    public constructor({
-        modelPath, prependBos = true
-    }: {
-        modelPath: string, prependBos?: boolean
-    }) {
-        this._model = new LLAMAModel(modelPath);
-        this._ctx = new LLAMAContext(this._model);
+    /** @internal */
+    public constructor( {ctx, prependBos}: LlamaContextConstructorParameters ) {
+        this._ctx = ctx;
         this._prependBos = prependBos;
     }
 
-    public get systemInfo() {
-        return llamaCppNode.systemInfo();
-    }
-
     public encode(text: string): Uint32Array {
         return this._ctx.encode(text);
     }
@@ -34,10 +23,10 @@ export class LlamaModel {
         let evalTokens = tokens;
 
         if (this._prependBos) {
-            const tokensArray = Array.from(tokens);
-            tokensArray.unshift(llamaCppNode.tokenBos());
+            const tokenArray = Array.from(tokens);
+            tokenArray.unshift(llamaCppNode.tokenBos());
 
-            evalTokens = Uint32Array.from(tokensArray);
+            evalTokens = Uint32Array.from(tokenArray);
             this._prependBos = false;
         }
 
@@ -56,4 +45,5 @@ export class LlamaModel {
             evalTokens = Uint32Array.from([nextToken]);
         }
     }
+
 }
diff --git a/src/llamaEvaluator/LlamaModel.ts b/src/llamaEvaluator/LlamaModel.ts
@@ -0,0 +1,24 @@
+import {LlamaContext} from "./LlamaContext.js";
+import {LLAMAContext, llamaCppNode, LLAMAModel} from "./LlamaBins.js";
+
+
+export class LlamaModel {
+    private readonly _model: LLAMAModel;
+    private readonly _prependBos: boolean;
+
+    public constructor({modelPath, prependBos = true}: { modelPath: string, prependBos?: boolean }) {
+        this._model = new LLAMAModel(modelPath);
+        this._prependBos = prependBos;
+    }
+
+    public createContext() {
+        return new LlamaContext({
+            ctx: new LLAMAContext(this._model),
+            prependBos: this._prependBos
+        });
+    }
+
+    public static get systemInfo() {
+        return llamaCppNode.systemInfo();
+    }
+}