Add performance benchmark config: MPS 8da4w

manuelcandales · manuelcandales · commit 397e880ee467 · 2025-02-12T17:00:23.000-05:00
diff --git a/.ci/scripts/gather_benchmark_configs.py b/.ci/scripts/gather_benchmark_configs.py
@@ -43,6 +43,7 @@
         "coreml_fp16",
         "mps",
         "llama3_coreml_ane",
+        "llama3_mps_8da4w",
     ],
 }
 
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
@@ -298,6 +298,23 @@ jobs:
                 --coreml-compute-units cpu_and_ne \
                 --output_name="${OUT_ET_MODEL_NAME}.pte"
               ls -lh "${OUT_ET_MODEL_NAME}.pte"
+            elif [[ ${{ matrix.config }} == "llama3_mps_8da4w" ]]; then
+              # MPS 8da4w
+              DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
+              ${CONDA_RUN} python -m examples.models.llama.export_llama \
+                --model "llama3_2" \
+                --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
+                --params "${DOWNLOADED_PATH}/params.json" \
+                -kv \
+                --use_sdpa_with_kv_cache \
+                --disable_dynamic_shape \
+                --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
+                --mps \
+                -qmode 8da4w \
+                --group_size 32 \
+                --embedding-quantize 4,32 \
+                --output_name="${OUT_ET_MODEL_NAME}.pte"
+              ls -lh "${OUT_ET_MODEL_NAME}.pte"
             else
               # By default, test with the Hugging Face model and the xnnpack recipe
               DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model")

Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@`
`43`	`43`	`"coreml_fp16",`
`44`	`44`	`"mps",`
`45`	`45`	`"llama3_coreml_ane",`
	`46`	`+ "llama3_mps_8da4w",`
`46`	`47`	`],`
`47`	`48`	`}`
`48`	`49`