Skip to content

Commit 4fd0b11

Browse files
committed
refactor(): improve concurrency, upgrade llamacpp, improve 'socket:ai'
1 parent 8b470f9 commit 4fd0b11

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+7329
-4966
lines changed

api/ai.js

Lines changed: 4 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -1,199 +1,8 @@
11
// @ts-check
22
/**
33
* @module ai
4-
*
5-
* Provides high level classes for common AI tasks.
6-
*
7-
* If you download a model like `mistral-7b-openorca.Q4_0.gguf` from Hugging
8-
* Face, you can construct in JavaScript with a prompt. Prompt syntax isn't
9-
* concrete like programming syntax, so you'll usually want to know what the
10-
* author has to say about prompting, for example this might be worth reading...
11-
*
12-
* https://docs.mistral.ai/guides/prompting_capabilities
13-
*
14-
* Example usage:
15-
*
16-
* ```js
17-
* import { LLM } from 'socket:ai'
18-
*
19-
* const llm = new LLM({
20-
* path: 'model.gguf',
21-
* prompt: '...' // insert your prompt here.
22-
* })
23-
*
24-
* llm.on('end', () => {
25-
* // end of the token stream.
26-
* })
27-
*
28-
* llm.on('data', data => {
29-
* // a new token has arrived in the token stream.
30-
* })
31-
* ```
324
*/
33-
import { EventEmitter } from './events.js'
34-
import { rand64 } from './crypto.js'
35-
import process from './process.js'
36-
import ipc from './ipc.js'
37-
import gc from './gc.js'
38-
39-
import * as exports from './ai.js'
40-
41-
/**
42-
* A class to interact with large language models (using llama.cpp)
43-
*/
44-
export class LLM extends EventEmitter {
45-
/**
46-
* Constructs an LLM instance. Each parameter is designed to configure and control
47-
* the behavior of the underlying large language model provided by llama.cpp.
48-
* @param {Object} options - Configuration options for the LLM instance.
49-
* @param {string} options.path - The file path to the model in .gguf format. This model file contains
50-
* the weights and configuration necessary for initializing the language model.
51-
* @param {string} options.prompt - The initial input text to the model, setting the context or query
52-
* for generating responses. The model uses this as a starting point for text generation.
53-
* @param {string} [options.id] - An optional unique identifier for this specific instance of the model,
54-
* useful for tracking or referencing the model in multi-model setups.
55-
* @param {number} [options.n_ctx=1024] - Specifies the maximum number of tokens that the model can consider
56-
* for a single query. This is crucial for managing memory and computational
57-
* efficiency. Exceeding the model's configuration may lead to errors or truncated outputs.
58-
* @param {number} [options.n_threads=8] - The number of threads allocated for the model's computation,
59-
* affecting performance and speed of response generation.
60-
* @param {number} [options.temp=1.1] - Sampling temperature controls the randomness of predictions.
61-
* Higher values increase diversity, potentially at the cost of coherence.
62-
* @param {number} [options.max_tokens=512] - The upper limit on the number of tokens that the model can generate
63-
* in response to a single prompt. This prevents runaway generations.
64-
* @param {number} [options.n_gpu_layers=32] - The number of GPU layers dedicated to the model processing.
65-
* More layers can increase accuracy and complexity of the outputs.
66-
* @param {number} [options.n_keep=0] - Determines how many of the top generated responses are retained after
67-
* the initial generation phase. Useful for models that generate multiple outputs.
68-
* @param {number} [options.n_batch=0] - The size of processing batches. Larger batch sizes can reduce
69-
* the time per token generation by parallelizing computations.
70-
* @param {number} [options.n_predict=0] - Specifies how many forward predictions the model should make
71-
* from the current state. This can pre-generate responses or calculate probabilities.
72-
* @param {number} [options.grp_attn_n=0] - Group attention parameter 'N' modifies how attention mechanisms
73-
* within the model are grouped and interact, affecting the model’s focus and accuracy.
74-
* @param {number} [options.grp_attn_w=0] - Group attention parameter 'W' adjusts the width of each attention group,
75-
* influencing the breadth of context considered by each attention group.
76-
* @param {number} [options.seed=0] - A seed for the random number generator used in the model. Setting this ensures
77-
* consistent results in model outputs, important for reproducibility in experiments.
78-
* @param {number} [options.top_k=0] - Limits the model's output choices to the top 'k' most probable next words,
79-
* reducing the risk of less likely, potentially nonsensical outputs.
80-
* @param {number} [options.tok_p=0.0] - Top-p (nucleus) sampling threshold, filtering the token selection pool
81-
* to only those whose cumulative probability exceeds this value, enhancing output relevance.
82-
* @param {number} [options.min_p=0.0] - Sets a minimum probability filter for token generation, ensuring
83-
* that generated tokens have at least this likelihood of being relevant or coherent.
84-
* @param {number} [options.tfs_z=0.0] - Temperature factor scale for zero-shot learning scenarios, adjusting how
85-
* the model weights novel or unseen prompts during generation.
86-
* @throws {Error} Throws an error if the model path is not provided, as the model cannot initialize without it.
87-
*/
88-
89-
constructor (options = null) {
90-
super()
91-
92-
options = { ...options }
93-
if (!options.path) {
94-
throw new Error('expected a path to a valid model (.gguf)')
95-
}
96-
97-
this.path = options.path
98-
this.prompt = options.prompt
99-
this.id = options.id || rand64()
100-
101-
const opts = {
102-
id: this.id,
103-
path: this.path,
104-
prompt: this.prompt,
105-
// @ts-ignore
106-
antiprompt: options.antiprompt,
107-
// @ts-ignore
108-
conversation: options.conversation === true,
109-
// @ts-ignore
110-
chatml: options.chatml === true,
111-
// @ts-ignore
112-
instruct: options.instruct === true,
113-
n_ctx: options.n_ctx || 1024, // simplified, assuming default value of 1024 if not specified
114-
n_threads: options.n_threads || 8,
115-
temp: options.temp || 1.1, // assuming `temp` should be a number, not a string
116-
max_tokens: options.max_tokens || 512,
117-
n_gpu_layers: options.n_gpu_layers || 32,
118-
n_keep: options.n_keep || 0,
119-
n_batch: options.n_batch || 0,
120-
n_predict: options.n_predict || 0,
121-
grp_attn_n: options.grp_attn_n || 0,
122-
grp_attn_w: options.grp_attn_w || 0,
123-
seed: options.seed || 0,
124-
top_k: options.top_k || 0,
125-
tok_p: options.tok_p || 0.0,
126-
min_p: options.min_p || 0.0,
127-
tfs_z: options.tfs_z || 0.0
128-
}
129-
130-
globalThis.addEventListener('data', event => {
131-
// @ts-ignore
132-
const detail = event.detail
133-
const { err, data, source } = detail.params
134-
135-
if (err && BigInt(err.id) === this.id) {
136-
return this.emit('error', err)
137-
}
138-
139-
if (!data || BigInt(data.id) !== this.id) return
140-
141-
if (source === 'ai.llm.log') {
142-
this.emit('log', data.message)
143-
return
144-
}
145-
146-
if (source === 'ai.llm.chat') {
147-
if (data.complete) {
148-
return this.emit('end')
149-
}
150-
151-
this.emit('data', decodeURIComponent(data.token))
152-
}
153-
})
154-
155-
ipc.request('ai.llm.create', opts)
156-
.then((result) => {
157-
if (result.err) {
158-
this.emit('error', result.err)
159-
}
160-
}, (err) => {
161-
this.emit('error', err)
162-
})
163-
}
164-
165-
/**
166-
* Tell the LLM to stop after the next token.
167-
* @returns {Promise<void>} A promise that resolves when the LLM stops.
168-
*/
169-
async stop () {
170-
return await ipc.request('ai.llm.stop', { id: this.id })
171-
}
172-
173-
/**
174-
* @ignore
175-
*/
176-
[gc.finalizer] (options) {
177-
return {
178-
args: [this.id, options],
179-
async handle (id) {
180-
if (process.env.DEBUG) {
181-
console.warn('Closing LLM on garbage collection')
182-
}
183-
184-
await ipc.request('ai.llm.destroy', { id }, options)
185-
}
186-
}
187-
}
188-
189-
/**
190-
* Send a message to the chat.
191-
* @param {string} message - The message to send to the chat.
192-
* @returns {Promise<any>} A promise that resolves with the response from the chat.
193-
*/
194-
async chat (message) {
195-
return await ipc.request('ai.llm.chat', { id: this.id, message })
196-
}
197-
}
198-
199-
export default exports
5+
import llm from './ai/llm.js'
6+
import chat from './ai/chat.js'
7+
export { llm, chat }
8+
export default { llm, chat }

0 commit comments

Comments
 (0)