File tree Expand file tree Collapse file tree 2 files changed +18
-0
lines changed Expand file tree Collapse file tree 2 files changed +18
-0
lines changed Original file line number Diff line number Diff line change 4343 "coreml_fp16" ,
4444 "mps" ,
4545 "llama3_coreml_ane" ,
46+ "llama3_mps_8da4w" ,
4647 ],
4748}
4849
Original file line number Diff line number Diff line change @@ -298,6 +298,23 @@ jobs:
298298 --coreml-compute-units cpu_and_ne \
299299 --output_name="${OUT_ET_MODEL_NAME}.pte"
300300 ls -lh "${OUT_ET_MODEL_NAME}.pte"
301+ elif [[ ${{ matrix.config }} == "llama3_mps_8da4w" ]]; then
302+ # MPS 8da4w
303+ DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
304+ ${CONDA_RUN} python -m examples.models.llama.export_llama \
305+ --model "llama3_2" \
306+ --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
307+ --params "${DOWNLOADED_PATH}/params.json" \
308+ -kv \
309+ --use_sdpa_with_kv_cache \
310+ --disable_dynamic_shape \
311+ --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
312+ --mps \
313+ -qmode 8da4w \
314+ --group_size 32 \
315+ --embedding-quantize 4,32 \
316+ --output_name="${OUT_ET_MODEL_NAME}.pte"
317+ ls -lh "${OUT_ET_MODEL_NAME}.pte"
301318 else
302319 # By default, test with the Hugging Face model and the xnnpack recipe
303320 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model")
You can’t perform that action at this time.
0 commit comments