Skip to content

Commit 397e880

Browse files
Add performance benchmark config: MPS 8da4w
1 parent 89dc36c commit 397e880

File tree

2 files changed

+18
-0
lines changed

2 files changed

+18
-0
lines changed

.ci/scripts/gather_benchmark_configs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"coreml_fp16",
4444
"mps",
4545
"llama3_coreml_ane",
46+
"llama3_mps_8da4w",
4647
],
4748
}
4849

.github/workflows/apple-perf.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,23 @@ jobs:
298298
--coreml-compute-units cpu_and_ne \
299299
--output_name="${OUT_ET_MODEL_NAME}.pte"
300300
ls -lh "${OUT_ET_MODEL_NAME}.pte"
301+
elif [[ ${{ matrix.config }} == "llama3_mps_8da4w" ]]; then
302+
# MPS 8da4w
303+
DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
304+
${CONDA_RUN} python -m examples.models.llama.export_llama \
305+
--model "llama3_2" \
306+
--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
307+
--params "${DOWNLOADED_PATH}/params.json" \
308+
-kv \
309+
--use_sdpa_with_kv_cache \
310+
--disable_dynamic_shape \
311+
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
312+
--mps \
313+
-qmode 8da4w \
314+
--group_size 32 \
315+
--embedding-quantize 4,32 \
316+
--output_name="${OUT_ET_MODEL_NAME}.pte"
317+
ls -lh "${OUT_ET_MODEL_NAME}.pte"
301318
else
302319
# By default, test with the Hugging Face model and the xnnpack recipe
303320
DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model")

0 commit comments

Comments
 (0)