@@ -33,10 +33,10 @@ Export to XNNPack, no quantization:
3333QWEN_CHECKPOINT=path/to/checkpoint.pth
3434
3535python -m extension.llm.export.export_llm \
36- --config examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml
36+ --config examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml \
3737 +base.model_class="qwen2_5" \
3838 +base.checkpoint="${QWEN_CHECKPOINT:?}" \
39- +base.params="examples/models/qwen2_5/1_5b_config.json" \
39+ +base.params="examples/models/qwen2_5/config/ 1_5b_config.json" \
4040 +export.output_name="qwen2_5-1_5b.pte" \
4141```
4242
@@ -45,14 +45,14 @@ Run using the executor runner:
4545# Currently a work in progress, just need to enable HuggingFace json tokenizer in C++.
4646# In the meantime, can run with an example Python runner with pybindings:
4747
48- python -m examples.models.llama.runner.native
49- --model qwen2_5
50- --pte <path-to-pte>
51- -kv
52- --tokenizer <path-to-tokenizer>/tokenizer.json
53- --tokenizer_config <path-to_tokenizer>/tokenizer_config.json
54- --prompt "Who is the founder of Meta?"
55- --params examples/models/qwen2_5/1_5b_config.json
56- --max_len 64
48+ python -m examples.models.llama.runner.native \
49+ --model qwen2_5 \
50+ --pte <path-to-pte> \
51+ -kv \
52+ --tokenizer <path-to-tokenizer>/tokenizer.json \
53+ --tokenizer_config <path-to_tokenizer>/tokenizer_config.json \
54+ --prompt "Who is the founder of Meta?" \
55+ --params examples/models/qwen2_5/config/ 1_5b_config.json \
56+ --max_len 64 \
5757 --temperature 0
5858```
0 commit comments