@@ -209,6 +209,8 @@ jobs:
209209
210210 # Convert HF checkpoint to ET via etLLM path
211211 if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then
212+ # The benchmark app replies on the _llm suffix to determine whether the model is a LLM or not
213+ OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm
212214 # Llama models on Hugging Face
213215 if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then
214216 # SpinQuant
@@ -311,6 +313,7 @@ jobs:
311313 ls -lh "${OUT_ET_MODEL_NAME}.pte"
312314 fi
313315 elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then
316+ OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm
314317 if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then
315318 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
316319 ${CONDA_RUN} python -m examples.models.llama.export_llama \
@@ -321,7 +324,9 @@ jobs:
321324 -d fp32 \
322325 -X \
323326 --xnnpack-extended-ops \
324- -qmode 8da4w -G 32 -E 8,0 \
327+ -qmode 8da4w \
328+ -G 32 \
329+ -E 8,0 \
325330 --metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \
326331 --output_name="${OUT_ET_MODEL_NAME}.pte"
327332 ls -lh "${OUT_ET_MODEL_NAME}.pte"
@@ -367,12 +372,13 @@ jobs:
367372 ${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}"
368373 popd
369374
375+ # The benchmark app replies on the _llm suffix to determine whether the model is a LLM or not
376+ OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm
370377 mv model.pte ${OUT_ET_MODEL_NAME}.pte
371378 ls -lh "${OUT_ET_MODEL_NAME}.pte"
372379 fi
373380
374- # zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.*
375- zip -j model.zip ${OUT_ET_MODEL_NAME}.pte
381+ zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.*
376382 ls -lh model.zip
377383 mkdir -p "${ARTIFACTS_DIR_NAME}"
378384 mv model.zip "${ARTIFACTS_DIR_NAME}"
0 commit comments