|
1 | 1 | // @ts-check |
2 | 2 | /** |
3 | 3 | * @module ai |
4 | | - * |
5 | | - * Provides high level classes for common AI tasks. |
6 | | - * |
7 | | - * If you download a model like `mistral-7b-openorca.Q4_0.gguf` from Hugging |
8 | | - * Face, you can construct in JavaScript with a prompt. Prompt syntax isn't |
9 | | - * concrete like programming syntax, so you'll usually want to know what the |
10 | | - * author has to say about prompting, for example this might be worth reading... |
11 | | - * |
12 | | - * https://docs.mistral.ai/guides/prompting_capabilities |
13 | | - * |
14 | | - * Example usage: |
15 | | - * |
16 | | - * ```js |
17 | | - * import { LLM } from 'socket:ai' |
18 | | - * |
19 | | - * const llm = new LLM({ |
20 | | - * path: 'model.gguf', |
21 | | - * prompt: '...' // insert your prompt here. |
22 | | - * }) |
23 | | - * |
24 | | - * llm.on('end', () => { |
25 | | - * // end of the token stream. |
26 | | - * }) |
27 | | - * |
28 | | - * llm.on('data', data => { |
29 | | - * // a new token has arrived in the token stream. |
30 | | - * }) |
31 | | - * ``` |
32 | 4 | */ |
33 | | -import { EventEmitter } from './events.js' |
34 | | -import { rand64 } from './crypto.js' |
35 | | -import process from './process.js' |
36 | | -import ipc from './ipc.js' |
37 | | -import gc from './gc.js' |
38 | | - |
39 | | -import * as exports from './ai.js' |
40 | | - |
41 | | -/** |
42 | | - * A class to interact with large language models (using llama.cpp) |
43 | | - */ |
44 | | -export class LLM extends EventEmitter { |
45 | | - /** |
46 | | - * Constructs an LLM instance. Each parameter is designed to configure and control |
47 | | - * the behavior of the underlying large language model provided by llama.cpp. |
48 | | - * @param {Object} options - Configuration options for the LLM instance. |
49 | | - * @param {string} options.path - The file path to the model in .gguf format. This model file contains |
50 | | - * the weights and configuration necessary for initializing the language model. |
51 | | - * @param {string} options.prompt - The initial input text to the model, setting the context or query |
52 | | - * for generating responses. The model uses this as a starting point for text generation. |
53 | | - * @param {string} [options.id] - An optional unique identifier for this specific instance of the model, |
54 | | - * useful for tracking or referencing the model in multi-model setups. |
55 | | - * @param {number} [options.n_ctx=1024] - Specifies the maximum number of tokens that the model can consider |
56 | | - * for a single query. This is crucial for managing memory and computational |
57 | | - * efficiency. Exceeding the model's configuration may lead to errors or truncated outputs. |
58 | | - * @param {number} [options.n_threads=8] - The number of threads allocated for the model's computation, |
59 | | - * affecting performance and speed of response generation. |
60 | | - * @param {number} [options.temp=1.1] - Sampling temperature controls the randomness of predictions. |
61 | | - * Higher values increase diversity, potentially at the cost of coherence. |
62 | | - * @param {number} [options.max_tokens=512] - The upper limit on the number of tokens that the model can generate |
63 | | - * in response to a single prompt. This prevents runaway generations. |
64 | | - * @param {number} [options.n_gpu_layers=32] - The number of GPU layers dedicated to the model processing. |
65 | | - * More layers can increase accuracy and complexity of the outputs. |
66 | | - * @param {number} [options.n_keep=0] - Determines how many of the top generated responses are retained after |
67 | | - * the initial generation phase. Useful for models that generate multiple outputs. |
68 | | - * @param {number} [options.n_batch=0] - The size of processing batches. Larger batch sizes can reduce |
69 | | - * the time per token generation by parallelizing computations. |
70 | | - * @param {number} [options.n_predict=0] - Specifies how many forward predictions the model should make |
71 | | - * from the current state. This can pre-generate responses or calculate probabilities. |
72 | | - * @param {number} [options.grp_attn_n=0] - Group attention parameter 'N' modifies how attention mechanisms |
73 | | - * within the model are grouped and interact, affecting the model’s focus and accuracy. |
74 | | - * @param {number} [options.grp_attn_w=0] - Group attention parameter 'W' adjusts the width of each attention group, |
75 | | - * influencing the breadth of context considered by each attention group. |
76 | | - * @param {number} [options.seed=0] - A seed for the random number generator used in the model. Setting this ensures |
77 | | - * consistent results in model outputs, important for reproducibility in experiments. |
78 | | - * @param {number} [options.top_k=0] - Limits the model's output choices to the top 'k' most probable next words, |
79 | | - * reducing the risk of less likely, potentially nonsensical outputs. |
80 | | - * @param {number} [options.tok_p=0.0] - Top-p (nucleus) sampling threshold, filtering the token selection pool |
81 | | - * to only those whose cumulative probability exceeds this value, enhancing output relevance. |
82 | | - * @param {number} [options.min_p=0.0] - Sets a minimum probability filter for token generation, ensuring |
83 | | - * that generated tokens have at least this likelihood of being relevant or coherent. |
84 | | - * @param {number} [options.tfs_z=0.0] - Temperature factor scale for zero-shot learning scenarios, adjusting how |
85 | | - * the model weights novel or unseen prompts during generation. |
86 | | - * @throws {Error} Throws an error if the model path is not provided, as the model cannot initialize without it. |
87 | | - */ |
88 | | - |
89 | | - constructor (options = null) { |
90 | | - super() |
91 | | - |
92 | | - options = { ...options } |
93 | | - if (!options.path) { |
94 | | - throw new Error('expected a path to a valid model (.gguf)') |
95 | | - } |
96 | | - |
97 | | - this.path = options.path |
98 | | - this.prompt = options.prompt |
99 | | - this.id = options.id || rand64() |
100 | | - |
101 | | - const opts = { |
102 | | - id: this.id, |
103 | | - path: this.path, |
104 | | - prompt: this.prompt, |
105 | | - // @ts-ignore |
106 | | - antiprompt: options.antiprompt, |
107 | | - // @ts-ignore |
108 | | - conversation: options.conversation === true, |
109 | | - // @ts-ignore |
110 | | - chatml: options.chatml === true, |
111 | | - // @ts-ignore |
112 | | - instruct: options.instruct === true, |
113 | | - n_ctx: options.n_ctx || 1024, // simplified, assuming default value of 1024 if not specified |
114 | | - n_threads: options.n_threads || 8, |
115 | | - temp: options.temp || 1.1, // assuming `temp` should be a number, not a string |
116 | | - max_tokens: options.max_tokens || 512, |
117 | | - n_gpu_layers: options.n_gpu_layers || 32, |
118 | | - n_keep: options.n_keep || 0, |
119 | | - n_batch: options.n_batch || 0, |
120 | | - n_predict: options.n_predict || 0, |
121 | | - grp_attn_n: options.grp_attn_n || 0, |
122 | | - grp_attn_w: options.grp_attn_w || 0, |
123 | | - seed: options.seed || 0, |
124 | | - top_k: options.top_k || 0, |
125 | | - tok_p: options.tok_p || 0.0, |
126 | | - min_p: options.min_p || 0.0, |
127 | | - tfs_z: options.tfs_z || 0.0 |
128 | | - } |
129 | | - |
130 | | - globalThis.addEventListener('data', event => { |
131 | | - // @ts-ignore |
132 | | - const detail = event.detail |
133 | | - const { err, data, source } = detail.params |
134 | | - |
135 | | - if (err && BigInt(err.id) === this.id) { |
136 | | - return this.emit('error', err) |
137 | | - } |
138 | | - |
139 | | - if (!data || BigInt(data.id) !== this.id) return |
140 | | - |
141 | | - if (source === 'ai.llm.log') { |
142 | | - this.emit('log', data.message) |
143 | | - return |
144 | | - } |
145 | | - |
146 | | - if (source === 'ai.llm.chat') { |
147 | | - if (data.complete) { |
148 | | - return this.emit('end') |
149 | | - } |
150 | | - |
151 | | - this.emit('data', decodeURIComponent(data.token)) |
152 | | - } |
153 | | - }) |
154 | | - |
155 | | - ipc.request('ai.llm.create', opts) |
156 | | - .then((result) => { |
157 | | - if (result.err) { |
158 | | - this.emit('error', result.err) |
159 | | - } |
160 | | - }, (err) => { |
161 | | - this.emit('error', err) |
162 | | - }) |
163 | | - } |
164 | | - |
165 | | - /** |
166 | | - * Tell the LLM to stop after the next token. |
167 | | - * @returns {Promise<void>} A promise that resolves when the LLM stops. |
168 | | - */ |
169 | | - async stop () { |
170 | | - return await ipc.request('ai.llm.stop', { id: this.id }) |
171 | | - } |
172 | | - |
173 | | - /** |
174 | | - * @ignore |
175 | | - */ |
176 | | - [gc.finalizer] (options) { |
177 | | - return { |
178 | | - args: [this.id, options], |
179 | | - async handle (id) { |
180 | | - if (process.env.DEBUG) { |
181 | | - console.warn('Closing LLM on garbage collection') |
182 | | - } |
183 | | - |
184 | | - await ipc.request('ai.llm.destroy', { id }, options) |
185 | | - } |
186 | | - } |
187 | | - } |
188 | | - |
189 | | - /** |
190 | | - * Send a message to the chat. |
191 | | - * @param {string} message - The message to send to the chat. |
192 | | - * @returns {Promise<any>} A promise that resolves with the response from the chat. |
193 | | - */ |
194 | | - async chat (message) { |
195 | | - return await ipc.request('ai.llm.chat', { id: this.id, message }) |
196 | | - } |
197 | | -} |
198 | | - |
199 | | -export default exports |
| 5 | +import llm from './ai/llm.js' |
| 6 | +import chat from './ai/chat.js' |
| 7 | +export { llm, chat } |
| 8 | +export default { llm, chat } |
0 commit comments