Skip to content

Commit feee750

Browse files
authored
[Example] Add Qwen example (#137)
Signed-off-by: jokemanfire <[email protected]>
1 parent f0762dd commit feee750

File tree

3 files changed

+168
-0
lines changed

3 files changed

+168
-0
lines changed

wasmedge-ggml/qwen/Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[package]
2+
name = "wasmedge-ggml-qwen"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
serde_json = "1.0"
10+
wasmedge-wasi-nn = "0.7.1"

wasmedge-ggml/qwen/README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# `通义千问`
2+
3+
## Execute - Tong Yi Qwen
4+
5+
### Model Download Link
6+
7+
```console
8+
wget https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GGUF/resolve/main/qwen1_5-0_5b-chat-q2_k.gguf
9+
```
10+
11+
### Execution Command
12+
13+
Please make sure you have the `qwen1_5-0_5b-chat-q2_k.gguf` file in the current directory.
14+
If you want to enable GPU support, please set the `n_gpu_layers` environment variable.
15+
You can also change the `ctx_size` to have a larger context window via `--env ctx_size=8192`. The default value is 1024.
16+
17+
```console
18+
$ wasmedge --dir .:. \
19+
--env n_gpu_layers=10 \
20+
--nn-preload default:GGML:AUTO:qwen1_5-0_5b-chat-q2_k.gguf \
21+
wasmedge-ggml-qwen.wasm default
22+
23+
USER:
24+
你好
25+
ASSISTANT:
26+
你好!有什么我能帮你的吗?
27+
USER:
28+
你是谁
29+
ASSISTANT:
30+
我是一个人工智能助手,我叫通义千问。有什么我可以帮助你的吗?
31+
USER:
32+
能帮助我写Rust代码吗?
33+
ASSISTANT:
34+
当然可以!我可以帮助你使用Rust语言编写代码,帮助你理解和编写出高质量的代码。我可以帮你编写函数、函数和类,提供使用Python解释器编译和运行代码的建议,提供使用Node.js、Python或Java的代码示例,以及更多关于如何使用Python、Java或C++的代码示例。
35+
```

wasmedge-ggml/qwen/src/main.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
use serde_json::json;
2+
use serde_json::Value;
3+
use std::env;
4+
use std::io;
5+
use wasmedge_wasi_nn::{
6+
self, BackendError, Error, ExecutionTarget, GraphBuilder, GraphEncoding, GraphExecutionContext,
7+
TensorType,
8+
};
9+
10+
fn read_input() -> String {
11+
loop {
12+
let mut answer = String::new();
13+
io::stdin()
14+
.read_line(&mut answer)
15+
.expect("Failed to read line");
16+
if !answer.is_empty() && answer != "\n" && answer != "\r\n" {
17+
return answer.trim().to_string();
18+
}
19+
}
20+
}
21+
22+
fn get_options_from_env() -> Value {
23+
let mut options = json!({});
24+
if let Ok(val) = env::var("enable_log") {
25+
options["enable-log"] = serde_json::from_str(val.as_str())
26+
.expect("invalid value for enable-log option (true/false)")
27+
} else {
28+
options["enable-log"] = serde_json::from_str("false").unwrap()
29+
}
30+
if let Ok(val) = env::var("n_gpu_layers") {
31+
options["n-gpu-layers"] =
32+
serde_json::from_str(val.as_str()).expect("invalid ngl value (unsigned integer")
33+
} else {
34+
options["n-gpu-layers"] = serde_json::from_str("0").unwrap()
35+
}
36+
options["ctx-size"] = serde_json::from_str("1024").unwrap();
37+
38+
options
39+
}
40+
41+
fn set_data_to_context(context: &mut GraphExecutionContext, data: Vec<u8>) -> Result<(), Error> {
42+
context.set_input(0, TensorType::U8, &[1], &data)
43+
}
44+
45+
fn get_data_from_context(context: &GraphExecutionContext, index: usize) -> String {
46+
// Preserve for 4096 tokens with average token length 6
47+
const MAX_OUTPUT_BUFFER_SIZE: usize = 4096 * 6;
48+
let mut output_buffer = vec![0u8; MAX_OUTPUT_BUFFER_SIZE];
49+
let mut output_size = context
50+
.get_output(index, &mut output_buffer)
51+
.expect("Failed to get output");
52+
output_size = std::cmp::min(MAX_OUTPUT_BUFFER_SIZE, output_size);
53+
54+
String::from_utf8((output_buffer[..output_size]).to_vec())
55+
.unwrap()
56+
.to_string()
57+
}
58+
59+
fn get_output_from_context(context: &GraphExecutionContext) -> String {
60+
get_data_from_context(context, 0)
61+
}
62+
63+
fn main() {
64+
let args: Vec<String> = env::args().collect();
65+
let model_name: &str = &args[1];
66+
67+
// Set options for the graph. Check our README for more details:
68+
// https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml#parameters
69+
let options = get_options_from_env();
70+
71+
// Create graph and initialize context.
72+
let graph = GraphBuilder::new(GraphEncoding::Ggml, ExecutionTarget::AUTO)
73+
.config(serde_json::to_string(&options).expect("Failed to serialize options"))
74+
.build_from_cache(model_name)
75+
.expect("Failed to build graph");
76+
let mut context = graph
77+
.init_execution_context()
78+
.expect("Failed to init context");
79+
80+
let mut saved_prompt =
81+
String::from("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n");
82+
83+
loop {
84+
println!("USER:");
85+
let input = read_input();
86+
87+
saved_prompt = format!("{}\n<|im_start|>user\n{}<|im_end|>\n", saved_prompt, input);
88+
let contex_prompt = saved_prompt.clone() + "<|im_start|>assistant\n";
89+
90+
// Set prompt to the input tensor.
91+
set_data_to_context(&mut context, contex_prompt.as_bytes().to_vec())
92+
.expect("Failed to set input");
93+
94+
// Execute the inference.
95+
let mut reset_prompt = false;
96+
match context.compute() {
97+
Ok(_) => (),
98+
Err(Error::BackendError(BackendError::ContextFull)) => {
99+
println!("\n[INFO] Context full, we'll reset the context and continue.");
100+
reset_prompt = true;
101+
}
102+
Err(Error::BackendError(BackendError::PromptTooLong)) => {
103+
println!("\n[INFO] Prompt too long, we'll reset the context and continue.");
104+
reset_prompt = true;
105+
}
106+
Err(err) => {
107+
println!("\n[ERROR] {}", err);
108+
}
109+
}
110+
111+
// Retrieve the output.
112+
let mut output = get_output_from_context(&context);
113+
println!("ASSISTANT:\n{}", output.trim());
114+
115+
// Update the saved prompt.
116+
if reset_prompt {
117+
saved_prompt.clear();
118+
} else {
119+
output = output.trim().to_string();
120+
saved_prompt = format!("{} {}", saved_prompt, output);
121+
}
122+
}
123+
}

0 commit comments

Comments
 (0)