7272 # Separate default values from the workflow dispatch. To ensure defaults are accessible
7373 # during scheduled runs and to provide flexibility for different defaults between
7474 # on-demand and periodic benchmarking.
75- CRON_DEFAULT_MODELS : ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
75+ CRON_DEFAULT_MODELS : ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it ' || 'Qwen/Qwen3-0.6B' }}
7676 CRON_DEFAULT_DEVICES : apple_iphone_15
7777 run : |
7878 set -eux
@@ -322,7 +322,7 @@ jobs:
322322 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
323323 ${CONDA_RUN} python -m extension.llm.export.export_llm \
324324 base.model_class=qwen3_0_6b \
325- base.params=examples/models/qwen3/0_6b_config.json \
325+ base.params=examples/models/qwen3/config/ 0_6b_config.json \
326326 model.use_kv_cache=true \
327327 model.use_sdpa_with_kv_cache=true \
328328 model.dtype_override=fp32 \
@@ -346,10 +346,11 @@ jobs:
346346 echo "tokenizer.json is downloaded to $DOWNLOADED_PATH"
347347
348348 # Install optimum-executorch
349+ OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
349350 git clone https://github.com/huggingface/optimum-executorch
350351 pushd optimum-executorch
351352 # There is no release yet, for CI stability, always test from the same commit on main
352- git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
353+ git checkout $OPTIMUM_ET_COMMIT
353354 ${CONDA_RUN} python install_dev.py --skip_override_torch
354355 pip list
355356
@@ -358,21 +359,12 @@ jobs:
358359 "--task" "text-generation"
359360 "--recipe" "xnnpack"
360361 "--use_custom_sdpa"
362+ "--use_custom_kv_cache"
361363 "--qlinear"
362364 "--qembedding"
363365 "--output_dir" ".."
364366 )
365367
366- # Add conditional arguments based on model
367- case "${HF_MODEL_REPO}" in
368- *"google/gemma-3-1b-it"*)
369- echo "--use_custom_kv_cache can not be used for HybridCache"
370- ;;
371- *)
372- ARGS+=("--use_custom_kv_cache")
373- ;;
374- esac
375-
376368 ${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}"
377369 popd
378370
0 commit comments