|
1 |
| -import {LlamaContext} from "./LlamaContext.js"; |
2 |
| -import {LLAMAContext, llamaCppNode, LLAMAModel} from "./LlamaBins.js"; |
| 1 | +import {llamaCppNode, LLAMAModel} from "./LlamaBins.js"; |
3 | 2 |
|
4 | 3 |
|
5 | 4 | export class LlamaModel {
|
6 |
| - private readonly _model: LLAMAModel; |
7 |
| - private readonly _prependBos: boolean; |
| 5 | + /** @internal */ |
| 6 | + public readonly _model: LLAMAModel; |
8 | 7 |
|
9 |
| - public constructor({modelPath, prependBos = true}: { modelPath: string, prependBos?: boolean }) { |
10 |
| - this._model = new LLAMAModel(modelPath); |
11 |
| - this._prependBos = prependBos; |
12 |
| - } |
13 |
| - |
14 |
| - public createContext() { |
15 |
| - return new LlamaContext({ |
16 |
| - ctx: new LLAMAContext(this._model), |
17 |
| - prependBos: this._prependBos |
18 |
| - }); |
| 8 | + /** |
| 9 | + * options source: |
| 10 | + * https://github.com/ggerganov/llama.cpp/blob/b5ffb2849d23afe73647f68eec7b68187af09be6/llama.h#L102 (struct llama_context_params) |
| 11 | + * @param {object} options |
| 12 | + * @param {string} options.modelPath - path to the model on the filesystem |
| 13 | + * @param {number | null} [options.seed] - If null, a random seed will be used |
| 14 | + * @param {number} [options.contextSize] - text context size |
| 15 | + * @param {number} [options.batchSize] - prompt processing batch size |
| 16 | + * @param {number} [options.gpuCores] - number of layers to store in VRAM |
| 17 | + * @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance |
| 18 | + * @param {boolean} [options.f16Kv] - use fp16 for KV cache |
| 19 | + * @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one |
| 20 | + * @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights |
| 21 | + * @param {boolean} [options.useMmap] - use mmap if possible |
| 22 | + * @param {boolean} [options.useMlock] - force system to keep model in RAM |
| 23 | + * @param {boolean} [options.embedding] - embedding mode only |
| 24 | + */ |
| 25 | + public constructor({ |
| 26 | + modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuCores, |
| 27 | + lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding |
| 28 | + }: { |
| 29 | + modelPath: string, seed?: number | null, contextSize?: number, batchSize?: number, gpuCores?: number, |
| 30 | + lowVram?: boolean, f16Kv?: boolean, logitsAll?: boolean, vocabOnly?: boolean, useMmap?: boolean, useMlock?: boolean, |
| 31 | + embedding?: boolean |
| 32 | + }) { |
| 33 | + this._model = new LLAMAModel(modelPath, removeNullFields({ |
| 34 | + seed: seed != null ? Math.max(-1, seed) : undefined, |
| 35 | + contextSize, |
| 36 | + batchSize, |
| 37 | + gpuCores, |
| 38 | + lowVram, |
| 39 | + f16Kv, |
| 40 | + logitsAll, |
| 41 | + vocabOnly, |
| 42 | + useMmap, |
| 43 | + useMlock, |
| 44 | + embedding |
| 45 | + })); |
19 | 46 | }
|
20 | 47 |
|
21 | 48 | public static get systemInfo() {
|
22 | 49 | return llamaCppNode.systemInfo();
|
23 | 50 | }
|
24 | 51 | }
|
| 52 | + |
| 53 | +function removeNullFields<T extends object>(obj: T): T { |
| 54 | + const newObj: T = Object.assign({}, obj); |
| 55 | + |
| 56 | + for (const key in obj) { |
| 57 | + if (newObj[key] == null) |
| 58 | + delete newObj[key]; |
| 59 | + } |
| 60 | + |
| 61 | + return newObj; |
| 62 | +} |
0 commit comments