Skip to content

Commit 74be398

Browse files
authored
feat: export class options types (#29)
1 parent 5fc0d18 commit 74be398

File tree

5 files changed

+104
-80
lines changed

5 files changed

+104
-80
lines changed

src/index.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import {LlamaGrammar} from "./llamaEvaluator/LlamaGrammar.js";
2-
import {LlamaChatSession} from "./llamaEvaluator/LlamaChatSession.js";
3-
import {LlamaModel} from "./llamaEvaluator/LlamaModel.js";
1+
import {LlamaModel, type LlamaModelOptions} from "./llamaEvaluator/LlamaModel.js";
2+
import {LlamaGrammar, type LlamaGrammarOptions} from "./llamaEvaluator/LlamaGrammar.js";
3+
import {LlamaContext, type LlamaContextOptions} from "./llamaEvaluator/LlamaContext.js";
4+
import {LlamaChatSession, type LlamaChatSessionOptions} from "./llamaEvaluator/LlamaChatSession.js";
45
import {AbortError} from "./AbortError.js";
56
import {ChatPromptWrapper} from "./ChatPromptWrapper.js";
67
import {EmptyChatPromptWrapper} from "./chatWrappers/EmptyChatPromptWrapper.js";
78
import {LlamaChatPromptWrapper} from "./chatWrappers/LlamaChatPromptWrapper.js";
89
import {GeneralChatPromptWrapper} from "./chatWrappers/GeneralChatPromptWrapper.js";
9-
import {LlamaContext} from "./llamaEvaluator/LlamaContext.js";
1010
import {ChatMLPromptWrapper} from "./chatWrappers/ChatMLPromptWrapper.js";
1111
import {getChatWrapperByBos} from "./chatWrappers/createChatWrapperByBos.js";
1212

@@ -15,9 +15,13 @@ import {type Token} from "./types.js";
1515

1616
export {
1717
LlamaModel,
18+
type LlamaModelOptions,
1819
LlamaGrammar,
20+
type LlamaGrammarOptions,
1921
LlamaContext,
22+
type LlamaContextOptions,
2023
LlamaChatSession,
24+
type LlamaChatSessionOptions,
2125
AbortError,
2226
ChatPromptWrapper,
2327
EmptyChatPromptWrapper,

src/llamaEvaluator/LlamaChatSession.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ import {LlamaContext} from "./LlamaContext.js";
1010

1111
const UNKNOWN_UNICODE_CHAR = "\ufffd";
1212

13+
14+
export type LlamaChatSessionOptions = {
15+
context: LlamaContext,
16+
printLLamaSystemInfo?: boolean,
17+
promptWrapper?: ChatPromptWrapper | "auto",
18+
systemPrompt?: string
19+
};
20+
1321
export class LlamaChatSession {
1422
private readonly _systemPrompt: string;
1523
private readonly _printLLamaSystemInfo: boolean;
@@ -25,12 +33,7 @@ export class LlamaChatSession {
2533
printLLamaSystemInfo = false,
2634
promptWrapper = new GeneralChatPromptWrapper(),
2735
systemPrompt = defaultChatSystemPrompt
28-
}: {
29-
context: LlamaContext,
30-
printLLamaSystemInfo?: boolean,
31-
promptWrapper?: ChatPromptWrapper | "auto",
32-
systemPrompt?: string,
33-
}) {
36+
}: LlamaChatSessionOptions) {
3437
this._ctx = context;
3538
this._printLLamaSystemInfo = printLLamaSystemInfo;
3639
this._systemPrompt = systemPrompt;

src/llamaEvaluator/LlamaContext.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,18 @@ import {LLAMAContext} from "./LlamaBins.js";
44
import {LlamaModel} from "./LlamaModel.js";
55
import {LlamaGrammar} from "./LlamaGrammar.js";
66

7+
8+
export type LlamaContextOptions = {
9+
model: LlamaModel,
10+
grammar?: LlamaGrammar,
11+
prependBos?: boolean
12+
};
13+
714
export class LlamaContext {
815
private readonly _ctx: LLAMAContext;
916
private _prependBos: boolean;
1017

11-
public constructor({model, grammar, prependBos = true}: {model: LlamaModel, grammar?: LlamaGrammar, prependBos?: boolean}) {
18+
public constructor({model, grammar, prependBos = true}: LlamaContextOptions) {
1219
this._ctx = new LLAMAContext(model._model, removeNullFields({
1320
grammar: grammar?._grammar
1421
}));

src/llamaEvaluator/LlamaGrammar.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ import {getGrammarsFolder} from "../utils/getGrammarsFolder.js";
44
import {LLAMAGrammar} from "./LlamaBins.js";
55

66

7+
export type LlamaGrammarOptions = {
8+
/** GBNF grammar */
9+
grammar: string,
10+
11+
/** print the grammar to stdout */
12+
printGrammar?: boolean
13+
};
14+
715
export class LlamaGrammar {
816
/** @internal */
917
public readonly _grammar: LLAMAGrammar;
@@ -15,7 +23,7 @@ export class LlamaGrammar {
1523
* @param {string} options.grammar - GBNF grammar
1624
* @param {boolean} [options.printGrammar] - print the grammar to stdout
1725
*/
18-
public constructor({grammar, printGrammar = false}: {grammar: string, printGrammar?: boolean}) {
26+
public constructor({grammar, printGrammar = false}: LlamaGrammarOptions) {
1927
this._grammar = new LLAMAGrammar(grammar, {
2028
printGrammar
2129
});

src/llamaEvaluator/LlamaModel.ts

Lines changed: 70 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,75 @@ import {removeNullFields} from "../utils/removeNullFields.js";
22
import {llamaCppNode, LLAMAModel} from "./LlamaBins.js";
33

44

5+
export type LlamaModelOptions = {
6+
/** path to the model on the filesystem */
7+
modelPath: string,
8+
9+
/** If null, a random seed will be used */
10+
seed?: number | null,
11+
12+
/** text context size */
13+
contextSize?: number,
14+
15+
/** prompt processing batch size */
16+
batchSize?: number,
17+
18+
/** number of layers to store in VRAM */
19+
gpuLayers?: number,
20+
21+
/** if true, reduce VRAM usage at the cost of performance */
22+
lowVram?: boolean,
23+
24+
/**
25+
* Temperature is a hyperparameter that controls the randomness of the generated text.
26+
* It affects the probability distribution of the model's output tokens.
27+
* A higher temperature (e.g., 1.5) makes the output more random and creative,
28+
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
29+
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
30+
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
31+
*
32+
* Set to `0` to disable.
33+
*/
34+
temperature?: number,
35+
36+
/**
37+
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
38+
* An integer number between `1` and the size of the vocabulary.
39+
* Set to `0` to disable (which uses the full vocabulary).
40+
*
41+
* Only relevant when `temperature` is set to a value greater than 0.
42+
* */
43+
topK?: number,
44+
45+
/**
46+
* Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
47+
* and samples the next token only from this set.
48+
* A float number between `0` and `1`.
49+
* Set to `1` to disable.
50+
*
51+
* Only relevant when `temperature` is set to a value greater than `0`.
52+
* */
53+
topP?: number,
54+
55+
/** use fp16 for KV cache */
56+
f16Kv?: boolean,
57+
58+
/** the llama_eval() call computes all logits, not just the last one */
59+
logitsAll?: boolean,
60+
61+
/** only load the vocabulary, no weights */
62+
vocabOnly?: boolean,
63+
64+
/** use mmap if possible */
65+
useMmap?: boolean,
66+
67+
/** force system to keep model in RAM */
68+
useMlock?: boolean,
69+
70+
/** embedding mode only */
71+
embedding?: boolean
72+
};
73+
574
export class LlamaModel {
675
/** @internal */
776
public readonly _model: LLAMAModel;
@@ -46,74 +115,7 @@ export class LlamaModel {
46115
public constructor({
47116
modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers,
48117
lowVram, temperature = 0, topK = 40, topP = 0.95, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding
49-
}: {
50-
/** path to the model on the filesystem */
51-
modelPath: string,
52-
53-
/** If null, a random seed will be used */
54-
seed?: number | null,
55-
56-
/** text context size */
57-
contextSize?: number,
58-
59-
/** prompt processing batch size */
60-
batchSize?: number,
61-
62-
/** number of layers to store in VRAM */
63-
gpuLayers?: number,
64-
65-
/** if true, reduce VRAM usage at the cost of performance */
66-
lowVram?: boolean,
67-
68-
/**
69-
* Temperature is a hyperparameter that controls the randomness of the generated text.
70-
* It affects the probability distribution of the model's output tokens.
71-
* A higher temperature (e.g., 1.5) makes the output more random and creative,
72-
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
73-
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
74-
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
75-
*
76-
* Set to `0` to disable.
77-
*/
78-
temperature?: number,
79-
80-
/**
81-
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
82-
* An integer number between `1` and the size of the vocabulary.
83-
* Set to `0` to disable (which uses the full vocabulary).
84-
*
85-
* Only relevant when `temperature` is set to a value greater than 0.
86-
* */
87-
topK?: number,
88-
89-
/**
90-
* Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
91-
* and samples the next token only from this set.
92-
* A float number between `0` and `1`.
93-
* Set to `1` to disable.
94-
*
95-
* Only relevant when `temperature` is set to a value greater than `0`.
96-
* */
97-
topP?: number,
98-
99-
/** use fp16 for KV cache */
100-
f16Kv?: boolean,
101-
102-
/** the llama_eval() call computes all logits, not just the last one */
103-
logitsAll?: boolean,
104-
105-
/** only load the vocabulary, no weights */
106-
vocabOnly?: boolean,
107-
108-
/** use mmap if possible */
109-
useMmap?: boolean,
110-
111-
/** force system to keep model in RAM */
112-
useMlock?: boolean,
113-
114-
/** embedding mode only */
115-
embedding?: boolean
116-
}) {
118+
}: LlamaModelOptions) {
117119
this._model = new LLAMAModel(modelPath, removeNullFields({
118120
seed: seed != null ? Math.max(-1, seed) : undefined,
119121
contextSize,

0 commit comments

Comments
 (0)