Skip to content

Commit fdf0c07

Browse files
committed
move old files to legacy folder
1 parent 120d05b commit fdf0c07

19 files changed

+1526
-43
lines changed

Makefile

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,21 +1455,8 @@ llama-server: \
14551455
examples/server/server.cpp \
14561456
examples/server/utils.hpp \
14571457
examples/server/httplib.h \
1458-
examples/server/colorthemes.css.hpp \
1459-
examples/server/style.css.hpp \
1460-
examples/server/theme-beeninorder.css.hpp \
1461-
examples/server/theme-ketivah.css.hpp \
1462-
examples/server/theme-mangotango.css.hpp \
1463-
examples/server/theme-playground.css.hpp \
1464-
examples/server/theme-polarnight.css.hpp \
1465-
examples/server/theme-snowstorm.css.hpp \
14661458
examples/server/index.html.hpp \
1467-
examples/server/index-new.html.hpp \
1468-
examples/server/index.js.hpp \
14691459
examples/server/completion.js.hpp \
1470-
examples/server/system-prompts.js.hpp \
1471-
examples/server/prompt-formats.js.hpp \
1472-
examples/server/json-schema-to-grammar.mjs.hpp \
14731460
examples/server/loading.html.hpp \
14741461
common/json.hpp \
14751462
common/stb_image.h \
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
const paramDefaults = {
2+
stream: true,
3+
n_predict: 500,
4+
temperature: 0.2,
5+
stop: ["</s>"]
6+
};
7+
8+
let generation_settings = null;
9+
10+
11+
// Completes the prompt as a generator. Recommended for most use cases.
12+
//
13+
// Example:
14+
//
15+
// import { llama } from '/completion.js'
16+
//
17+
// const request = llama("Tell me a joke", {n_predict: 800})
18+
// for await (const chunk of request) {
19+
// document.write(chunk.data.content)
20+
// }
21+
//
22+
export async function* llama(prompt, params = {}, config = {}) {
23+
let controller = config.controller;
24+
const api_url = config.api_url?.replace(/\/+$/, '') || "";
25+
26+
if (!controller) {
27+
controller = new AbortController();
28+
}
29+
30+
const completionParams = { ...paramDefaults, ...params, prompt };
31+
32+
const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
33+
method: 'POST',
34+
body: JSON.stringify(completionParams),
35+
headers: {
36+
'Connection': 'keep-alive',
37+
'Content-Type': 'application/json',
38+
'Accept': 'text/event-stream',
39+
...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
40+
},
41+
signal: controller.signal,
42+
});
43+
44+
const reader = response.body.getReader();
45+
const decoder = new TextDecoder();
46+
47+
let content = "";
48+
let leftover = ""; // Buffer for partially read lines
49+
50+
try {
51+
let cont = true;
52+
53+
while (cont) {
54+
const result = await reader.read();
55+
if (result.done) {
56+
break;
57+
}
58+
59+
// Add any leftover data to the current chunk of data
60+
const text = leftover + decoder.decode(result.value);
61+
62+
// Check if the last character is a line break
63+
const endsWithLineBreak = text.endsWith('\n');
64+
65+
// Split the text into lines
66+
let lines = text.split('\n');
67+
68+
// If the text doesn't end with a line break, then the last line is incomplete
69+
// Store it in leftover to be added to the next chunk of data
70+
if (!endsWithLineBreak) {
71+
leftover = lines.pop();
72+
} else {
73+
leftover = ""; // Reset leftover if we have a line break at the end
74+
}
75+
76+
// Parse all sse events and add them to result
77+
const regex = /^(\S+):\s(.*)$/gm;
78+
for (const line of lines) {
79+
const match = regex.exec(line);
80+
if (match) {
81+
result[match[1]] = match[2];
82+
if (result.data === '[DONE]') {
83+
cont = false;
84+
break;
85+
}
86+
87+
// since we know this is llama.cpp, let's just decode the json in data
88+
if (result.data) {
89+
result.data = JSON.parse(result.data);
90+
content += result.data.content;
91+
92+
// yield
93+
yield result;
94+
95+
// if we got a stop token from server, we will break here
96+
if (result.data.stop) {
97+
if (result.data.generation_settings) {
98+
generation_settings = result.data.generation_settings;
99+
}
100+
cont = false;
101+
break;
102+
}
103+
}
104+
if (result.error) {
105+
try {
106+
result.error = JSON.parse(result.error);
107+
if (result.error.message.includes('slot unavailable')) {
108+
// Throw an error to be caught by upstream callers
109+
throw new Error('slot unavailable');
110+
} else {
111+
console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
112+
}
113+
} catch(e) {
114+
console.error(`llama.cpp error ${result.error}`)
115+
}
116+
}
117+
}
118+
}
119+
}
120+
} catch (e) {
121+
if (e.name !== 'AbortError') {
122+
console.error("llama error: ", e);
123+
}
124+
throw e;
125+
}
126+
finally {
127+
controller.abort();
128+
}
129+
130+
return content;
131+
}
132+
133+
// Call llama, return an event target that you can subscribe to
134+
//
135+
// Example:
136+
//
137+
// import { llamaEventTarget } from '/completion.js'
138+
//
139+
// const conn = llamaEventTarget(prompt)
140+
// conn.addEventListener("message", (chunk) => {
141+
// document.write(chunk.detail.content)
142+
// })
143+
//
144+
export const llamaEventTarget = (prompt, params = {}, config = {}) => {
145+
const eventTarget = new EventTarget();
146+
(async () => {
147+
let content = "";
148+
for await (const chunk of llama(prompt, params, config)) {
149+
if (chunk.data) {
150+
content += chunk.data.content;
151+
eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
152+
}
153+
if (chunk.data.generation_settings) {
154+
eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
155+
}
156+
if (chunk.data.timings) {
157+
eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
158+
}
159+
}
160+
eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
161+
})();
162+
return eventTarget;
163+
}
164+
165+
// Call llama, return a promise that resolves to the completed text. This does not support streaming
166+
//
167+
// Example:
168+
//
169+
// llamaPromise(prompt).then((content) => {
170+
// document.write(content)
171+
// })
172+
//
173+
// or
174+
//
175+
// const content = await llamaPromise(prompt)
176+
// document.write(content)
177+
//
178+
export const llamaPromise = (prompt, params = {}, config = {}) => {
179+
return new Promise(async (resolve, reject) => {
180+
let content = "";
181+
try {
182+
for await (const chunk of llama(prompt, params, config)) {
183+
content += chunk.data.content;
184+
}
185+
resolve(content);
186+
} catch (error) {
187+
reject(error);
188+
}
189+
});
190+
};
191+
192+
/**
193+
* (deprecated)
194+
*/
195+
export const llamaComplete = async (params, controller, callback) => {
196+
for await (const chunk of llama(params.prompt, params, { controller })) {
197+
callback(chunk);
198+
}
199+
}
200+
201+
// Get the model info from the server. This is useful for getting the context window and so on.
202+
export const llamaModelInfo = async (config = {}) => {
203+
if (!generation_settings) {
204+
const api_url = config.api_url?.replace(/\/+$/, '') || "";
205+
const props = await fetch(`${api_url}/props`).then(r => r.json());
206+
generation_settings = props.default_generation_settings;
207+
}
208+
return generation_settings;
209+
}
File renamed without changes.

0 commit comments

Comments
 (0)