Skip to content

Commit 654ffc5

Browse files
authored
[Example] ggml: add CI for phi-3, yi-1.5-9b (#143)
* [Example] ggml: rename phi-3-mini to phi-3, test more phi-3 models Signed-off-by: dm4 <[email protected]> * [CI] llama: use b2963 Signed-off-by: dm4 <[email protected]> * [Example] ggml: update chatml example for better CI testing Signed-off-by: dm4 <[email protected]> * [CI] llama: use latest plugin with both wasmedge 0.13 and 0.14 Signed-off-by: dm4 <[email protected]> --------- Signed-off-by: dm4 <[email protected]>
1 parent 8b3991c commit 654ffc5

File tree

8 files changed

+130
-42
lines changed

8 files changed

+130
-42
lines changed

.github/workflows/llama.yml

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ jobs:
2424
build:
2525
strategy:
2626
matrix:
27-
runner: [ubuntu-20.04, macos-13, macos-m1]
27+
runner: [ubuntu-20.04, macos-m1]
28+
wasmedge: ["0.13.5", "0.14.0"]
2829
plugin: [wasi_nn-ggml]
2930
job:
3031
- name: "Tiny Llama"
@@ -224,6 +225,34 @@ jobs:
224225
default \
225226
'<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
226227
228+
- name: Yi 1.5 9B 16K
229+
run: |
230+
test -f ~/.wasmedge/env && source ~/.wasmedge/env
231+
cd wasmedge-ggml/chatml
232+
curl -LO https://huggingface.co/second-state/Yi-1.5-9B-Chat-16K-GGUF/resolve/main/Yi-1.5-9B-Chat-16K-Q5_K_M.gguf
233+
cargo build --target wasm32-wasi --release
234+
time wasmedge --dir .:. \
235+
--env n_gpu_layers="$NGL" \
236+
--env reverse_prompt='<|im_end|>' \
237+
--nn-preload default:GGML:AUTO:Yi-1.5-9B-Chat-16K-Q5_K_M.gguf \
238+
target/wasm32-wasi/release/wasmedge-ggml-chatml.wasm \
239+
default \
240+
$'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
241+
242+
- name: Yi 1.5 9B
243+
run: |
244+
test -f ~/.wasmedge/env && source ~/.wasmedge/env
245+
cd wasmedge-ggml/chatml
246+
curl -LO https://huggingface.co/second-state/Yi-1.5-9B-Chat-GGUF/resolve/main/Yi-1.5-9B-Chat-Q5_K_M.gguf
247+
cargo build --target wasm32-wasi --release
248+
time wasmedge --dir .:. \
249+
--env n_gpu_layers="$NGL" \
250+
--env reverse_prompt='<|im_end|>' \
251+
--nn-preload default:GGML:AUTO:Yi-1.5-9B-Chat-Q5_K_M.gguf \
252+
target/wasm32-wasi/release/wasmedge-ggml-chatml.wasm \
253+
default \
254+
$'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
255+
227256
- name: Grammar Example
228257
run: |
229258
test -f ~/.wasmedge/env && source ~/.wasmedge/env
@@ -259,26 +288,48 @@ jobs:
259288
default \
260289
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
261290
262-
- name: Phi 3 Mini
291+
- name: Phi 3 Mini 4k
263292
run: |
264293
test -f ~/.wasmedge/env && source ~/.wasmedge/env
265-
cd wasmedge-ggml/test/phi-3-mini
294+
cd wasmedge-ggml/test/phi-3
266295
curl -LO https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf
267296
cargo build --target wasm32-wasi --release
268297
time wasmedge --dir .:. \
298+
--env n_gpu_layers="$NGL" \
269299
--nn-preload default:GGML:AUTO:Phi-3-mini-4k-instruct-q4.gguf \
270-
target/wasm32-wasi/release/wasmedge-ggml-phi-3-mini.wasm \
300+
target/wasm32-wasi/release/wasmedge-ggml-phi-3.wasm \
271301
default \
272302
$'<|user|>\nWhat is the capital of Japan?<|end|>\n<|assistant|>'
273303
274-
- name: Build llama-stream
304+
- name: Phi 3 Mini 128k
275305
run: |
276-
cd wasmedge-ggml/llama-stream
306+
test -f ~/.wasmedge/env && source ~/.wasmedge/env
307+
cd wasmedge-ggml/test/phi-3
308+
curl -LO https://huggingface.co/second-state/Phi-3-mini-128k-instruct-GGUF/resolve/main/Phi-3-mini-128k-instruct-Q5_K_M.gguf
277309
cargo build --target wasm32-wasi --release
310+
time wasmedge --dir .:. \
311+
--env n_gpu_layers="$NGL" \
312+
--nn-preload default:GGML:AUTO:Phi-3-mini-128k-instruct-Q5_K_M.gguf \
313+
target/wasm32-wasi/release/wasmedge-ggml-phi-3.wasm \
314+
default \
315+
$'<|user|>\nWhat is the capital of Japan?<|end|>\n<|assistant|>'
278316
279-
- name: Build chatml
317+
- name: Phi 3 Medium 4k
280318
run: |
281-
cd wasmedge-ggml/chatml
319+
test -f ~/.wasmedge/env && source ~/.wasmedge/env
320+
cd wasmedge-ggml/test/phi-3
321+
curl -LO https://huggingface.co/second-state/Phi-3-medium-4k-instruct-GGUF/resolve/main/Phi-3-medium-4k-instruct-Q5_K_M.gguf
322+
cargo build --target wasm32-wasi --release
323+
time wasmedge --dir .:. \
324+
--env n_gpu_layers="$NGL" \
325+
--nn-preload default:GGML:AUTO:Phi-3-medium-4k-instruct-Q5_K_M.gguf \
326+
target/wasm32-wasi/release/wasmedge-ggml-phi-3.wasm \
327+
default \
328+
$'<|user|>\nWhat is the capital of Japan?<|end|>\n<|assistant|>'
329+
330+
- name: Build llama-stream
331+
run: |
332+
cd wasmedge-ggml/llama-stream
282333
cargo build --target wasm32-wasi --release
283334
284335
- name: Build llava-base64-stream
@@ -290,6 +341,7 @@ jobs:
290341
- runner: macos-m1
291342
ngl: 100
292343
- runner: ubuntu-20.04
344+
wasmedge: "0.14.0"
293345
plugin: wasi_nn-ggml
294346
job:
295347
name: C4AI Command-R v01
@@ -305,7 +357,7 @@ jobs:
305357
default \
306358
'<|START_OF_TURN_TOKEN|><|USER_TOKEN|>What is the capital of the United States?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'
307359
308-
name: ${{ matrix.runner }} - ${{ matrix.job.name }} - ${{ matrix.plugin }}
360+
name: ${{ matrix.runner }} - ${{ matrix.job.name }} - ${{ matrix.wasmedge }} - ${{ matrix.plugin }}
309361
runs-on: ${{ matrix.runner }}
310362
steps:
311363
- uses: actions/checkout@v4
@@ -316,8 +368,7 @@ jobs:
316368
317369
- name: Install WasmEdge + WASI-NN + GGML
318370
run: |
319-
VERSION=0.13.5
320-
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- -v $VERSION --plugins ${{ matrix.plugin }}
371+
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- -v ${{ matrix.wasmedge }} --plugins ${{ matrix.plugin }}
321372
322373
- name: Set environment variable
323374
run: echo "NGL=${{ matrix.ngl || 0 }}" >> $GITHUB_ENV

wasmedge-ggml/chatml/src/main.rs

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use serde_json::Value;
2-
use std::collections::HashMap;
1+
use serde_json::{json, Value};
32
use std::env;
43
use std::io;
54
use wasmedge_wasi_nn::{
@@ -19,6 +18,27 @@ fn read_input() -> String {
1918
}
2019
}
2120

21+
fn get_options_from_env() -> Value {
22+
let mut options = json!({});
23+
if let Ok(val) = env::var("enable_log") {
24+
options["enable-log"] = serde_json::from_str(val.as_str())
25+
.expect("invalid value for enable-log option (true/false)")
26+
}
27+
if let Ok(val) = env::var("n_gpu_layers") {
28+
options["n-gpu-layers"] =
29+
serde_json::from_str(val.as_str()).expect("invalid ngl value (unsigned integer")
30+
}
31+
if let Ok(val) = env::var("ctx_size") {
32+
options["ctx-size"] =
33+
serde_json::from_str(val.as_str()).expect("invalid ctx-size value (unsigned integer")
34+
}
35+
if let Ok(val) = env::var("reverse_prompt") {
36+
options["reverse-prompt"] = json!(val.as_str())
37+
}
38+
39+
options
40+
}
41+
2242
fn set_data_to_context(context: &mut GraphExecutionContext, data: Vec<u8>) -> Result<(), Error> {
2343
context.set_input(0, TensorType::U8, &[1], &data)
2444
}
@@ -56,11 +76,9 @@ fn main() {
5676
let args: Vec<String> = env::args().collect();
5777
let model_name: &str = &args[1];
5878

59-
// Set options for the graph. Check our README for more details.
60-
let mut options = HashMap::new();
61-
options.insert("enable-log", Value::from(false));
62-
options.insert("n-gpu-layers", Value::from(0));
63-
options.insert("ctx-size", Value::from(512));
79+
// Set options for the graph. Check our README for more details:
80+
// https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml#parameters
81+
let options = get_options_from_env();
6482

6583
// Create graph and initialize context.
6684
let graph = GraphBuilder::new(GraphEncoding::Ggml, ExecutionTarget::AUTO)
@@ -82,6 +100,48 @@ fn main() {
82100
// )
83101
// .expect("Failed to set metadata");
84102

103+
// If there is a third argument, use it as the prompt and enter non-interactive mode.
104+
// This is mainly for the CI workflow.
105+
if args.len() >= 3 {
106+
let prompt = &args[2];
107+
// Set the prompt.
108+
println!("Prompt:\n{}", prompt);
109+
let tensor_data = prompt.as_bytes().to_vec();
110+
context
111+
.set_input(0, TensorType::U8, &[1], &tensor_data)
112+
.expect("Failed to set input");
113+
println!("Response:");
114+
115+
// Get the number of input tokens and llama.cpp versions.
116+
let input_metadata = get_metadata_from_context(&context);
117+
println!("[INFO] llama_commit: {}", input_metadata["llama_commit"]);
118+
println!(
119+
"[INFO] llama_build_number: {}",
120+
input_metadata["llama_build_number"]
121+
);
122+
println!(
123+
"[INFO] Number of input tokens: {}",
124+
input_metadata["input_tokens"]
125+
);
126+
127+
// Get the output.
128+
context.compute().expect("Failed to compute");
129+
let output = get_output_from_context(&context);
130+
println!("{}", output.trim());
131+
132+
// Retrieve the output metadata.
133+
let metadata = get_metadata_from_context(&context);
134+
println!(
135+
"[INFO] Number of input tokens: {}",
136+
metadata["input_tokens"]
137+
);
138+
println!(
139+
"[INFO] Number of output tokens: {}",
140+
metadata["output_tokens"]
141+
);
142+
std::process::exit(0);
143+
}
144+
85145
let mut saved_prompt = String::new();
86146
let system_prompt = String::from("You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe." );
87147

@@ -101,18 +161,6 @@ fn main() {
101161
set_data_to_context(&mut context, saved_prompt.as_bytes().to_vec())
102162
.expect("Failed to set input");
103163

104-
// Get the number of input tokens and llama.cpp versions.
105-
// let input_metadata = get_metadata_from_context(&context);
106-
// println!("[INFO] llama_commit: {}", input_metadata["llama_commit"]);
107-
// println!(
108-
// "[INFO] llama_build_number: {}",
109-
// input_metadata["llama_build_number"]
110-
// );
111-
// println!(
112-
// "[INFO] Number of input tokens: {}",
113-
// input_metadata["input_tokens"]
114-
// );
115-
116164
// Execute the inference.
117165
let mut reset_prompt = false;
118166
match context.compute() {
@@ -141,16 +189,5 @@ fn main() {
141189
output = output.trim().to_string();
142190
saved_prompt = format!("{}{}<|im_end|>\n", saved_prompt, output);
143191
}
144-
145-
// Retrieve the output metadata.
146-
// let metadata = get_metadata_from_context(&context);
147-
// println!(
148-
// "[INFO] Number of input tokens: {}",
149-
// metadata["input_tokens"]
150-
// );
151-
// println!(
152-
// "[INFO] Number of output tokens: {}",
153-
// metadata["output_tokens"]
154-
// );
155192
}
156193
}
-380 KB
Binary file not shown.
-2.13 MB
Binary file not shown.

wasmedge-ggml/test/phi-3-mini/Cargo.toml renamed to wasmedge-ggml/test/phi-3/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[package]
2-
name = "wasmedge-ggml-phi-3-mini"
2+
name = "wasmedge-ggml-phi-3"
33
version = "0.1.0"
44
edition = "2021"
55

File renamed without changes.
File renamed without changes.
1.72 MB
Binary file not shown.

0 commit comments

Comments
 (0)