Skip to content

Commit fac9438

Browse files
author
Guang Yang
committed
Update CI for HF Optimum models
1 parent adde519 commit fac9438

File tree

1 file changed

+29
-8
lines changed

1 file changed

+29
-8
lines changed

.github/workflows/trunk.yml

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -555,11 +555,11 @@ jobs:
555555
strategy:
556556
matrix:
557557
hf_model_id: [
558-
google/gemma-2-2b,
559-
Qwen/Qwen2.5-0.5B,
558+
google/gemma-3-1b-it,
559+
Qwen/Qwen3-0.6B,
560560
HuggingFaceTB/SmolLM2-135M,
561561
meta-llama/Llama-3.2-1B,
562-
allenai/OLMo-1B-hf
562+
allenai/OLMo-1B-hf,
563563
]
564564
fail-fast: false
565565
with:
@@ -583,25 +583,46 @@ jobs:
583583
git clone https://github.com/huggingface/optimum-executorch
584584
cd optimum-executorch
585585
# There is no release yet, for CI stability, always test from the same commit on main
586-
git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
586+
git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8
587587
pip install .[tests]
588+
589+
if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then
590+
# Fixes for gemma-3 is not available in the released version
591+
git clone https://github.com/huggingface/transformers.git
592+
pushd transformers
593+
git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
594+
pip install -e .
595+
popd
596+
fi
588597
pip list
589598
echo "::endgroup::"
590599
591600
echo "::group::Export and Run ${{ matrix.hf_model_id }}"
592601
# Pass matrix variable as environment variable
593602
export MODEL_ID="${{ matrix.hf_model_id }}"
603+
export OUTPUT_DIR="${MODEL_ID}_custom_sdpa_8da4w"
604+
605+
optimum-cli export executorch \
606+
--model ${MODEL_ID} \
607+
--task text-generation \
608+
--recipe xnnpack \
609+
--use_custom_sdpa \
610+
--output_dir ${OUTPUT_DIR} \
611+
--qlinear
612+
613+
ls -FlAGhp ${OUTPUT_DIR}
614+
594615
python -c "
595616
import os
596617
from optimum.executorch import ExecuTorchModelForCausalLM
597618
from transformers import AutoTokenizer
598619
599620
model_id = os.getenv('MODEL_ID')
600-
print(f'Loading model: {model_id}')
601-
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
602-
tokenizer = AutoTokenizer.from_pretrained(model_id)
621+
pte_dir = os.getenv('OUTPUT_DIR')
622+
print(f'Loading model {model_id} from {pte_dir}.')
623+
model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
603624
generated_text = model.text_generation(
604-
tokenizer=tokenizer,
625+
tokenizer=AutoTokenizer.from_pretrained(model_id),
605626
prompt='Simply put, the theory of relativity states that',
606627
max_seq_len=64
607628
)

0 commit comments

Comments
 (0)