Skip to content

Commit ba98442

Browse files
committed
Add support of fp16 shaders in canary (#116)
1 parent ace6160 commit ba98442

File tree

11 files changed

+115
-67
lines changed

11 files changed

+115
-67
lines changed

examples/simple-chat/package.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
"build": "cp src/gh-config.js src/app-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash --public-url /web-llm"
99
},
1010
"devDependencies": {
11+
"buffer": "^5.7.1",
1112
"parcel": "^2.8.3",
12-
"typescript": "^4.9.5",
13-
"tslib": "^2.3.1"
13+
"process": "^0.11.10",
14+
"tslib": "^2.3.1",
15+
"typescript": "^4.9.5"
1416
},
1517
"dependencies": {
1618
"@mlc-ai/web-llm": "file:../.."

examples/simple-chat/src/gh-config.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,16 @@ export default {
77
{
88
"model_url": "https://huggingface.co/mlc-ai/mlc-chat-vicuna-v1-7b-q4f32_0/resolve/main/",
99
"local_id": "vicuna-v1-7b-q4f32_0"
10+
},
11+
{
12+
"model_url": "https://huggingface.co/mlc-ai/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f16_0/resolve/main/",
13+
"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_0",
14+
"required_features": ["shader-f16"],
1015
}
1116
],
1217
"model_lib_map": {
1318
"vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/vicuna-v1-7b-q4f32_0-webgpu.wasm",
14-
"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm"
19+
"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm",
20+
"RedPajama-INCITE-Chat-3B-v1-q4f16_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f16_0-webgpu.wasm"
1521
}
1622
}
Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,26 @@
11
// config used when serving from local mlc-llm/dist
22
// use web-llm/script/serve_mlc_llm_dist.sh to start the artifact server
33
export default {
4-
"model_list": [
5-
{
6-
"model_url": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f32_0/params/",
7-
"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f32_0"
8-
},
9-
{
10-
"model_url": "http://localhost:8000/vicuna-v1-7b-q4f32_0/params/",
11-
"local_id": "vicuna-v1-7b-q4f32_0"
12-
}
13-
],
14-
"model_lib_map": {
15-
"vicuna-v1-7b-q4f32_0": "http://localhost:8000/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm",
16-
"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f32_0/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm"
17-
}
4+
"model_list": [
5+
{
6+
"model_url": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f32_0/params/",
7+
"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f32_0"
8+
},
9+
{
10+
"model_url": "http://localhost:8000/vicuna-v1-7b-q4f32_0/params/",
11+
"local_id": "vicuna-v1-7b-q4f32_0"
12+
},
13+
// fp16 options are enabled through chrome canary flags
14+
// chrome --enale-dawn-features=enable_unsafe_apis
15+
{
16+
"model_url": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f16_0/params/",
17+
"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_0",
18+
"required_features": ["shader-f16"]
19+
}
20+
],
21+
"model_lib_map": {
22+
"vicuna-v1-7b-q4f32_0": "http://localhost:8000/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm",
23+
"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f32_0/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm",
24+
"RedPajama-INCITE-Chat-3B-v1-q4f16_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f16_0/RedPajama-INCITE-Chat-3B-v1-q4f16_0-webgpu.wasm"
25+
}
1826
}

examples/simple-chat/src/simple_chat.ts

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import appConfig from "./app-config";
2-
import { ChatModule } from "@mlc-ai/web-llm";
2+
import { ChatModule, ModelRecord } from "@mlc-ai/web-llm";
33

44
function getElementAndCheck(id: string): HTMLElement {
55
const element = document.getElementById(id);
@@ -9,11 +9,6 @@ function getElementAndCheck(id: string): HTMLElement {
99
return element;
1010
}
1111

12-
interface ModelRecord {
13-
model_url: string;
14-
local_id: string;
15-
}
16-
1712
interface AppConfig {
1813
model_list: Array<ModelRecord>;
1914
model_lib_map?: Record<string, string>;
@@ -159,7 +154,7 @@ class ChatUI {
159154
}
160155

161156
private resetChatHistory() {
162-
const clearTags = ["left", "right", "init"];
157+
const clearTags = ["left", "right", "init", "error"];
163158
for (const tag of clearTags) {
164159
const matches = this.uiChat.getElementsByClassName(`msg ${tag}-msg`);
165160
for (const item of matches) {
@@ -173,13 +168,23 @@ class ChatUI {
173168

174169
private async asyncInitChat() {
175170
if (this.chatLoaded) return;
176-
171+
this.requestInProgress = true;
177172
this.appendMessage("init", "");
178173
const initProgressCallback = (report) => {
179174
this.updateLastMessage("init", report.text);
180175
}
181176
this.chat.setInitProgressCallback(initProgressCallback);
182-
await this.chat.reload(this.selectedModel, undefined, this.config);
177+
178+
try {
179+
await this.chat.reload(this.selectedModel, undefined, this.config);
180+
} catch (err) {
181+
this.appendMessage("error", "Init error, " + err.toString());
182+
console.log(err.stack);
183+
this.unloadChat();
184+
this.requestInProgress = false;
185+
return;
186+
}
187+
this.requestInProgress = false;
183188
this.chatLoaded = true;
184189
}
185190

@@ -192,16 +197,8 @@ class ChatUI {
192197
* Run generate
193198
*/
194199
private async asyncGenerate() {
200+
await this.asyncInitChat();
195201
this.requestInProgress = true;
196-
try {
197-
await this.asyncInitChat();
198-
} catch (err) {
199-
this.appendMessage("error", "Init error, " + err.toString());
200-
console.log(err.stack);
201-
this.unloadChat();
202-
this.requestInProgress = false;
203-
return;
204-
}
205202
const prompt = this.uiChatInput.value;
206203
if (prompt == "") {
207204
this.requestInProgress = false;

package-lock.json

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@mlc-ai/web-llm",
3-
"version": "0.1.0",
3+
"version": "0.1.2",
44
"description": "Hardware accelerated language model chats on browsers",
55
"main": "lib/index.js",
66
"types": "lib/index.d.ts",

site/index.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,17 @@ Won’t it be even more amazing if we can simply open up a browser and directly
2727

2828
## Instructions
2929

30-
WebGPU just shipped to Chrome. You can try out the latest Chrome 113. Chrome version ≤ 112 is not supported, and if you are using it, the demo will raise an error like `Find an error initializing the WebGPU device OperationError: Required limit (1073741824) is greater than the supported limit (268435456). - While validating maxBufferSize - While validating required limits.`
30+
WebGPU just shipped to Chrome. You can try out the latest Chrome 113. Chrome version ≤ 112 is not supported, and if you are using it,
31+
the demo will raise an error like `Find an error initializing the WebGPU device OperationError: Required limit (1073741824) is greater than the supported limit (268435456). - While validating maxBufferSize - While validating required limits.`
3132
We have tested it on Windows and Mac, you will need a GPU with about 6GB memory to run Vicuna-7B and about 3GB memory to run RedPajama-3B.
33+
Some of the models requires fp16 support. To enable fp16 shaders, you will need to use the following instruction(`allow_unsafe_apis`) to turn it on in Chrome Canary.
3234

3335
If you have a Mac computer with Apple silicon, here are the instructions for you to run the chatbot demo on your browser locally:
3436

3537
- Upgrade Chrome to version ≥ 113.
3638
- Launch Chrome. You are recommended to launch from terminal with the following command:
3739
```
38-
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --enable-dawn-features=disable_robustness
40+
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --enable-dawn-features=allow_unsafe_apis,disable_robustness
3941
```
4042
This command turns off the robustness check from Chrome that slows down chatbot reply to times. It is not necessary, but we strongly recommend you to start Chrome with this command.
4143
- Select the model you want to try out. Enter your inputs, click “Send” – we are ready to go! The chat bot will first fetch model parameters into local cache. The download may take a few minutes, only for the first run. The subsequent refreshes and runs will be faster.

src/chat_module.ts

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ export class ChatModule implements ChatInterface {
1818
private pipeline?: LLMChatPipeline;
1919
private initProgressCallback?: InitProgressCallback;
2020
private interruptSignal = false;
21-
private artifactCache = new tvmjs.ArtifactCache();
2221

2322
setInitProgressCallback(initProgressCallback: InitProgressCallback) {
2423
this.initProgressCallback = initProgressCallback;
@@ -31,22 +30,25 @@ export class ChatModule implements ChatInterface {
3130
appConfig = prebuiltAppConfig;
3231
}
3332

34-
const findModelUrl = () => {
33+
const findModelRecord = () => {
3534
const matchedItem = appConfig?.model_list.find(
3635
item => item.local_id == localId
3736
);
38-
if (matchedItem !== undefined) return matchedItem.model_url;
37+
if (matchedItem !== undefined) return matchedItem;
3938
throw Error("Cannot find model_url for " + localId);
4039
}
4140

42-
let modelUrl = findModelUrl();
41+
const modelRecord = findModelRecord();
42+
let modelUrl = modelRecord.model_url;
4343
if (!modelUrl.startsWith("http")) {
4444
modelUrl = new URL(modelUrl, document.URL).href;
4545
}
46+
const configCache = new tvmjs.ArtifactCache("webllm/config");
47+
4648
// load config
4749
const configUrl = new URL("mlc-chat-config.json", modelUrl).href;
4850
const config = await (
49-
await this.artifactCache.fetchWithCache(configUrl)
51+
await configCache.fetchWithCache(configUrl)
5052
).json() as ChatConfig;
5153

5254

@@ -62,10 +64,22 @@ export class ChatModule implements ChatInterface {
6264
}
6365

6466
// load tvm wasm
67+
const wasmCache = new tvmjs.ArtifactCache("webllm/wasm");
6568
const wasmUrl = findWasmUrl();
66-
const wasmSource = await (
67-
await this.artifactCache.fetchWithCache(wasmUrl)
68-
).arrayBuffer();
69+
const fetchWasmSource = async () => {
70+
if (wasmUrl.includes("localhost")) {
71+
// do not cache wasm on local host as we might update code frequently
72+
return await fetch(wasmUrl);
73+
} else if (!wasmUrl.startsWith("http")) {
74+
// do not cache wasm on the same server as it can also refresh
75+
// rely on the normal caching strategy
76+
return await fetch(new URL(wasmUrl, document.URL).href);
77+
} else {
78+
// use cache
79+
return await wasmCache.fetchWithCache(wasmUrl);
80+
}
81+
};
82+
const wasmSource = await(await fetchWasmSource()).arrayBuffer();
6983

7084
const tvm = await tvmjs.instantiate(
7185
new Uint8Array(wasmSource),
@@ -88,9 +102,27 @@ export class ChatModule implements ChatInterface {
88102
} else {
89103
gpuLabel += " - " + gpuDetectOutput.adapterInfo.vendor;
90104
}
105+
if (modelRecord.required_features !== undefined) {
106+
for (const feature of modelRecord.required_features) {
107+
if (!gpuDetectOutput.device.features.has(feature)) {
108+
if (feature == "shader-f16") {
109+
throw Error(
110+
"This model requires WebGPU extension shader-f16, " +
111+
"which is not enabled in this browser. " +
112+
"You can try Chrome Canary with flag --enable-dawn-features=allow_unsafe_api"
113+
);
114+
}
115+
throw Error(
116+
"This model requires feature " + feature +
117+
", which is not yet supported by this browser. "
118+
);
119+
}
120+
}
121+
}
122+
91123
tvm.initWebGPU(gpuDetectOutput.device);
92124
const tokenizer = await this.asyncLoadTokenizer(modelUrl, config);
93-
await tvm.fetchNDArrayCache(modelUrl, tvm.webgpu());
125+
await tvm.fetchNDArrayCache(modelUrl, tvm.webgpu(), "webllm/model");
94126

95127
this.pipeline = new LLMChatPipeline(tvm, tokenizer, config);
96128
await this.pipeline?.asyncLoadWebGPUPiplines();
@@ -192,13 +224,14 @@ export class ChatModule implements ChatInterface {
192224
baseUrl: string,
193225
config: ChatConfig
194226
): Promise<Tokenizer> {
227+
const modelCache = new tvmjs.ArtifactCache("webllm/model");
195228
if (config.tokenizer_files.includes("tokenizer.model")) {
196229
const url = new URL("tokenizer.model", baseUrl).href;
197-
const model = await (await this.artifactCache.fetchWithCache(url)).arrayBuffer();
230+
const model = await (await modelCache.fetchWithCache(url)).arrayBuffer();
198231
return Tokenizer.fromSentencePiece(model);
199232
} else if (config.tokenizer_files.includes("tokenizer.json")) {
200233
const url = new URL("tokenizer.json", baseUrl).href;
201-
const model = await (await this.artifactCache.fetchWithCache(url)).arrayBuffer();
234+
const model = await (await modelCache.fetchWithCache(url)).arrayBuffer();
202235
return Tokenizer.fromJSON(model);
203236
}
204237
throw Error("Cannot handle tokenizer files " + config.tokenizer_files)

src/config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ export interface ChatConfig {
2929
export interface ModelRecord {
3030
model_url: string;
3131
local_id: string;
32+
required_features?: Array<string>;
3233
}
3334
/**
3435
* Extra configuration taht can be

src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
export {
2-
AppConfig
2+
ModelRecord, AppConfig
33
} from "./config";
44

55

0 commit comments

Comments
 (0)