@@ -555,11 +555,11 @@ jobs:
555555 strategy :
556556 matrix :
557557 hf_model_id : [
558- google/gemma-2-2b ,
559- Qwen/Qwen2.5 -0.5B ,
558+ google/gemma-3-1b-it ,
559+ Qwen/Qwen3 -0.6B ,
560560 HuggingFaceTB/SmolLM2-135M,
561561 meta-llama/Llama-3.2-1B,
562- allenai/OLMo-1B-hf
562+ allenai/OLMo-1B-hf,
563563 ]
564564 fail-fast : false
565565 with :
@@ -583,25 +583,46 @@ jobs:
583583 git clone https://github.com/huggingface/optimum-executorch
584584 cd optimum-executorch
585585 # There is no release yet, for CI stability, always test from the same commit on main
586- git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
586+ git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8
587587 pip install .[tests]
588+
589+ if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then
590+ # Fixes for gemma-3 is not available in the released version
591+ git clone https://github.com/huggingface/transformers.git
592+ pushd transformers
593+ git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
594+ pip install -e .
595+ popd
596+ fi
588597 pip list
589598 echo "::endgroup::"
590599
591600 echo "::group::Export and Run ${{ matrix.hf_model_id }}"
592601 # Pass matrix variable as environment variable
593602 export MODEL_ID="${{ matrix.hf_model_id }}"
603+ export OUTPUT_DIR="${MODEL_ID}_custom_sdpa_8da4w"
604+
605+ optimum-cli export executorch \
606+ --model ${MODEL_ID} \
607+ --task text-generation \
608+ --recipe xnnpack \
609+ --use_custom_sdpa \
610+ --output_dir ${OUTPUT_DIR} \
611+ --qlinear
612+
613+ ls -FlAGhp ${OUTPUT_DIR}
614+
594615 python -c "
595616 import os
596617 from optimum.executorch import ExecuTorchModelForCausalLM
597618 from transformers import AutoTokenizer
598619
599620 model_id = os.getenv('MODEL_ID')
600- print(f'Loading model: {model_id} ')
601- model = ExecuTorchModelForCausalLM.from_pretrained( model_id, recipe='xnnpack ')
602- tokenizer = AutoTokenizer .from_pretrained(model_id )
621+ pte_dir = os.getenv('OUTPUT_DIR ')
622+ print(f'Loading model { model_id} from {pte_dir}. ')
623+ model = ExecuTorchModelForCausalLM .from_pretrained(pte_dir )
603624 generated_text = model.text_generation(
604- tokenizer=tokenizer ,
625+ tokenizer=AutoTokenizer.from_pretrained(model_id) ,
605626 prompt='Simply put, the theory of relativity states that',
606627 max_seq_len=64
607628 )
0 commit comments