@@ -33,10 +33,10 @@ Export to XNNPack, no quantization:
33
33
QWEN_CHECKPOINT=path/to/checkpoint.pth
34
34
35
35
python -m extension.llm.export.export_llm \
36
- --config examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml
36
+ --config examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml \
37
37
+base.model_class="qwen2_5" \
38
38
+base.checkpoint="${QWEN_CHECKPOINT:?}" \
39
- +base.params="examples/models/qwen2_5/1_5b_config.json" \
39
+ +base.params="examples/models/qwen2_5/config/ 1_5b_config.json" \
40
40
+export.output_name="qwen2_5-1_5b.pte" \
41
41
```
42
42
@@ -45,14 +45,14 @@ Run using the executor runner:
45
45
# Currently a work in progress, just need to enable HuggingFace json tokenizer in C++.
46
46
# In the meantime, can run with an example Python runner with pybindings:
47
47
48
- python -m examples.models.llama.runner.native
49
- --model qwen2_5
50
- --pte <path-to-pte>
51
- -kv
52
- --tokenizer <path-to-tokenizer>/tokenizer.json
53
- --tokenizer_config <path-to_tokenizer>/tokenizer_config.json
54
- --prompt "Who is the founder of Meta?"
55
- --params examples/models/qwen2_5/1_5b_config.json
56
- --max_len 64
48
+ python -m examples.models.llama.runner.native \
49
+ --model qwen2_5 \
50
+ --pte <path-to-pte> \
51
+ -kv \
52
+ --tokenizer <path-to-tokenizer>/tokenizer.json \
53
+ --tokenizer_config <path-to_tokenizer>/tokenizer_config.json \
54
+ --prompt "Who is the founder of Meta?" \
55
+ --params examples/models/qwen2_5/config/ 1_5b_config.json \
56
+ --max_len 64 \
57
57
--temperature 0
58
58
```
0 commit comments