feat: export class options types (#29)

giladgd · web-flow · commit 74be3980a59f · 2023-09-01T19:13:59.000+03:00
diff --git a/src/index.ts b/src/index.ts
@@ -1,12 +1,12 @@
-import {LlamaGrammar} from "./llamaEvaluator/LlamaGrammar.js";
-import {LlamaChatSession} from "./llamaEvaluator/LlamaChatSession.js";
-import {LlamaModel} from "./llamaEvaluator/LlamaModel.js";
+import {LlamaModel, type LlamaModelOptions} from "./llamaEvaluator/LlamaModel.js";
+import {LlamaGrammar, type LlamaGrammarOptions} from "./llamaEvaluator/LlamaGrammar.js";
+import {LlamaContext, type LlamaContextOptions} from "./llamaEvaluator/LlamaContext.js";
+import {LlamaChatSession, type LlamaChatSessionOptions} from "./llamaEvaluator/LlamaChatSession.js";
 import {AbortError} from "./AbortError.js";
 import {ChatPromptWrapper} from "./ChatPromptWrapper.js";
 import {EmptyChatPromptWrapper} from "./chatWrappers/EmptyChatPromptWrapper.js";
 import {LlamaChatPromptWrapper} from "./chatWrappers/LlamaChatPromptWrapper.js";
 import {GeneralChatPromptWrapper} from "./chatWrappers/GeneralChatPromptWrapper.js";
-import {LlamaContext} from "./llamaEvaluator/LlamaContext.js";
 import {ChatMLPromptWrapper} from "./chatWrappers/ChatMLPromptWrapper.js";
 import {getChatWrapperByBos} from "./chatWrappers/createChatWrapperByBos.js";
 
@@ -15,9 +15,13 @@ import {type Token} from "./types.js";
 
 export {
     LlamaModel,
+    type LlamaModelOptions,
     LlamaGrammar,
+    type LlamaGrammarOptions,
     LlamaContext,
+    type LlamaContextOptions,
     LlamaChatSession,
+    type LlamaChatSessionOptions,
     AbortError,
     ChatPromptWrapper,
     EmptyChatPromptWrapper,
diff --git a/src/llamaEvaluator/LlamaChatSession.ts b/src/llamaEvaluator/LlamaChatSession.ts
@@ -10,6 +10,14 @@ import {LlamaContext} from "./LlamaContext.js";
 
 const UNKNOWN_UNICODE_CHAR = "\ufffd";
 
+
+export type LlamaChatSessionOptions = {
+    context: LlamaContext,
+    printLLamaSystemInfo?: boolean,
+    promptWrapper?: ChatPromptWrapper | "auto",
+    systemPrompt?: string
+};
+
 export class LlamaChatSession {
     private readonly _systemPrompt: string;
     private readonly _printLLamaSystemInfo: boolean;
@@ -25,12 +33,7 @@ export class LlamaChatSession {
         printLLamaSystemInfo = false,
         promptWrapper = new GeneralChatPromptWrapper(),
         systemPrompt = defaultChatSystemPrompt
-    }: {
-        context: LlamaContext,
-        printLLamaSystemInfo?: boolean,
-        promptWrapper?: ChatPromptWrapper | "auto",
-        systemPrompt?: string,
-    }) {
+    }: LlamaChatSessionOptions) {
         this._ctx = context;
         this._printLLamaSystemInfo = printLLamaSystemInfo;
         this._systemPrompt = systemPrompt;
diff --git a/src/llamaEvaluator/LlamaContext.ts b/src/llamaEvaluator/LlamaContext.ts
@@ -4,11 +4,18 @@ import {LLAMAContext} from "./LlamaBins.js";
 import {LlamaModel} from "./LlamaModel.js";
 import {LlamaGrammar} from "./LlamaGrammar.js";
 
+
+export type LlamaContextOptions = {
+    model: LlamaModel,
+    grammar?: LlamaGrammar,
+    prependBos?: boolean
+};
+
 export class LlamaContext {
     private readonly _ctx: LLAMAContext;
     private _prependBos: boolean;
 
-    public constructor({model, grammar, prependBos = true}: {model: LlamaModel, grammar?: LlamaGrammar, prependBos?: boolean}) {
+    public constructor({model, grammar, prependBos = true}: LlamaContextOptions) {
         this._ctx = new LLAMAContext(model._model, removeNullFields({
             grammar: grammar?._grammar
         }));
diff --git a/src/llamaEvaluator/LlamaGrammar.ts b/src/llamaEvaluator/LlamaGrammar.ts
@@ -4,6 +4,14 @@ import {getGrammarsFolder} from "../utils/getGrammarsFolder.js";
 import {LLAMAGrammar} from "./LlamaBins.js";
 
 
+export type LlamaGrammarOptions = {
+    /** GBNF grammar */
+    grammar: string,
+
+    /** print the grammar to stdout */
+    printGrammar?: boolean
+};
+
 export class LlamaGrammar {
     /** @internal */
     public readonly _grammar: LLAMAGrammar;
@@ -15,7 +23,7 @@ export class LlamaGrammar {
      * @param {string} options.grammar - GBNF grammar
      * @param {boolean} [options.printGrammar] - print the grammar to stdout
      */
-    public constructor({grammar, printGrammar = false}: {grammar: string, printGrammar?: boolean}) {
+    public constructor({grammar, printGrammar = false}: LlamaGrammarOptions) {
         this._grammar = new LLAMAGrammar(grammar, {
             printGrammar
         });
diff --git a/src/llamaEvaluator/LlamaModel.ts b/src/llamaEvaluator/LlamaModel.ts
@@ -2,6 +2,75 @@ import {removeNullFields} from "../utils/removeNullFields.js";
 import {llamaCppNode, LLAMAModel} from "./LlamaBins.js";
 
 
+export type LlamaModelOptions = {
+    /** path to the model on the filesystem */
+    modelPath: string,
+
+    /** If null, a random seed will be used */
+    seed?: number | null,
+
+    /** text context size */
+    contextSize?: number,
+
+    /** prompt processing batch size */
+    batchSize?: number,
+
+    /** number of layers to store in VRAM */
+    gpuLayers?: number,
+
+    /** if true, reduce VRAM usage at the cost of performance */
+    lowVram?: boolean,
+
+    /**
+     * Temperature is a hyperparameter that controls the randomness of the generated text.
+     * It affects the probability distribution of the model's output tokens.
+     * A higher temperature (e.g., 1.5) makes the output more random and creative,
+     * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
+     * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
+     * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
+     *
+     * Set to `0` to disable.
+     */
+    temperature?: number,
+
+    /**
+     * Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
+     * An integer number between `1` and the size of the vocabulary.
+     * Set to `0` to disable (which uses the full vocabulary).
+     *
+     * Only relevant when `temperature` is set to a value greater than 0.
+     * */
+    topK?: number,
+
+    /**
+     * Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
+     * and samples the next token only from this set.
+     * A float number between `0` and `1`.
+     * Set to `1` to disable.
+     *
+     * Only relevant when `temperature` is set to a value greater than `0`.
+     * */
+    topP?: number,
+
+    /** use fp16 for KV cache */
+    f16Kv?: boolean,
+
+    /** the llama_eval() call computes all logits, not just the last one */
+    logitsAll?: boolean,
+
+    /** only load the vocabulary, no weights */
+    vocabOnly?: boolean,
+
+    /** use mmap if possible */
+    useMmap?: boolean,
+
+    /** force system to keep model in RAM */
+    useMlock?: boolean,
+
+    /** embedding mode only */
+    embedding?: boolean
+};
+
 export class LlamaModel {
     /** @internal */
     public readonly _model: LLAMAModel;
@@ -46,74 +115,7 @@ export class LlamaModel {
     public constructor({
         modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers,
         lowVram, temperature = 0, topK = 40, topP = 0.95, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding
-    }: {
-        /** path to the model on the filesystem */
-        modelPath: string,
-
-        /** If null, a random seed will be used */
-        seed?: number | null,
-
-        /** text context size */
-        contextSize?: number,
-
-        /** prompt processing batch size */
-        batchSize?: number,
-
-        /** number of layers to store in VRAM */
-        gpuLayers?: number,
-
-        /** if true, reduce VRAM usage at the cost of performance */
-        lowVram?: boolean,
-
-        /**
-         * Temperature is a hyperparameter that controls the randomness of the generated text.
-         * It affects the probability distribution of the model's output tokens.
-         * A higher temperature (e.g., 1.5) makes the output more random and creative,
-         * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
-         * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
-         * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
-         *
-         * Set to `0` to disable.
-         */
-        temperature?: number,
-
-        /**
-         * Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
-         * An integer number between `1` and the size of the vocabulary.
-         * Set to `0` to disable (which uses the full vocabulary).
-         *
-         * Only relevant when `temperature` is set to a value greater than 0.
-         * */
-        topK?: number,
-
-        /**
-         * Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
-         * and samples the next token only from this set.
-         * A float number between `0` and `1`.
-         * Set to `1` to disable.
-         *
-         * Only relevant when `temperature` is set to a value greater than `0`.
-         * */
-        topP?: number,
-
-        /** use fp16 for KV cache */
-        f16Kv?: boolean,
-
-        /** the llama_eval() call computes all logits, not just the last one */
-        logitsAll?: boolean,
-
-        /** only load the vocabulary, no weights */
-        vocabOnly?: boolean,
-
-        /** use mmap if possible */
-        useMmap?: boolean,
-
-        /** force system to keep model in RAM */
-        useMlock?: boolean,
-
-        /** embedding mode only */
-        embedding?: boolean
-    }) {
+    }: LlamaModelOptions) {
         this._model = new LLAMAModel(modelPath, removeNullFields({
             seed: seed != null ? Math.max(-1, seed) : undefined,
             contextSize,