Skip to content

Commit 62cf849

Browse files
guangy10Guang Yang
andauthored
Update CI for HF Optimum models (#10820)
### Summary Updated pinned `optimum-executorch`. Use `optimum-cli` to generate pte files with xnnpack+custom_sdpa+8da4w. Getting the CI ready to be profiling those ptes using `executor_runner`. ### Test plan CI will export native HF models via `optimum-cli` and validate those models via python API `from_pretrained`. Co-authored-by: Guang Yang <[email protected]>
1 parent abaee69 commit 62cf849

File tree

1 file changed

+68
-10
lines changed

1 file changed

+68
-10
lines changed

.github/workflows/trunk.yml

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -555,11 +555,11 @@ jobs:
555555
strategy:
556556
matrix:
557557
hf_model_id: [
558-
google/gemma-2-2b,
559-
Qwen/Qwen2.5-0.5B,
558+
google/gemma-3-1b-it,
559+
Qwen/Qwen3-0.6B,
560560
HuggingFaceTB/SmolLM2-135M,
561561
meta-llama/Llama-3.2-1B,
562-
allenai/OLMo-1B-hf
562+
allenai/OLMo-1B-hf,
563563
]
564564
fail-fast: false
565565
with:
@@ -569,44 +569,102 @@ jobs:
569569
submodules: 'recursive'
570570
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
571571
timeout: 90
572+
upload-artifact: profiling-artifacts-${{ strategy.job-index }}
572573
script: |
573574
echo "::group::Set up ExecuTorch"
574575
# The generic Linux job chooses to use base env, not the one setup by the image
575576
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
576577
conda activate "${CONDA_ENV}"
577578
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
579+
# Build executor_runner with ETdump enabled
580+
PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
581+
-DCMAKE_INSTALL_PREFIX=cmake-out \
582+
-DEXECUTORCH_ENABLE_LOGGING=1 \
583+
-DCMAKE_BUILD_TYPE=Release \
584+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
585+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
586+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
587+
-DEXECUTORCH_BUILD_XNNPACK=ON \
588+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
589+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
590+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
591+
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
592+
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
593+
-Bcmake-out .
594+
cmake --build cmake-out -j16 --target install --config Release
578595
echo "::endgroup::"
579596
580597
echo "::group::Set up Hugging Face"
581598
pip install -U "huggingface_hub[cli]"
582599
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
583600
git clone https://github.com/huggingface/optimum-executorch
584-
cd optimum-executorch
601+
pushd optimum-executorch
585602
# There is no release yet, for CI stability, always test from the same commit on main
586-
git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
603+
git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8
587604
pip install .[tests]
605+
popd
606+
607+
if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then
608+
# Fixes for gemma-3 is not available in the released version
609+
git clone https://github.com/huggingface/transformers.git
610+
pushd transformers
611+
git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
612+
pip install -e .
613+
popd
614+
fi
588615
pip list
589616
echo "::endgroup::"
590617
591-
echo "::group::Export and Run ${{ matrix.hf_model_id }}"
618+
echo "::group::Export to ExecuTorch"
592619
# Pass matrix variable as environment variable
593620
export MODEL_ID="${{ matrix.hf_model_id }}"
621+
export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w"
622+
pushd optimum-executorch
623+
624+
optimum-cli export executorch \
625+
--model ${MODEL_ID} \
626+
--task text-generation \
627+
--recipe xnnpack \
628+
--use_custom_sdpa \
629+
--output_dir ${OUTPUT_DIR} \
630+
--qlinear
631+
632+
ls -FlAGhp ${OUTPUT_DIR}
633+
popd
634+
echo "::endgroup::"
635+
636+
echo "::group::Inference using python API"
637+
pushd optimum-executorch
594638
python -c "
595639
import os
596640
from optimum.executorch import ExecuTorchModelForCausalLM
597641
from transformers import AutoTokenizer
598642
599643
model_id = os.getenv('MODEL_ID')
600-
print(f'Loading model: {model_id}')
601-
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
602-
tokenizer = AutoTokenizer.from_pretrained(model_id)
644+
pte_dir = os.getenv('OUTPUT_DIR')
645+
print(f'Loading model {model_id} from {pte_dir}.')
646+
model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
603647
generated_text = model.text_generation(
604-
tokenizer=tokenizer,
648+
tokenizer=AutoTokenizer.from_pretrained(model_id),
605649
prompt='Simply put, the theory of relativity states that',
606650
max_seq_len=64
607651
)
608652
print(generated_text)
609653
"
654+
popd
655+
echo "::endgroup::"
656+
657+
echo "::group::Inference using executor_runner with ETDump"
658+
./cmake-out/executor_runner \
659+
--model_path ${OUTPUT_DIR}/model.pte \
660+
--etdump_path ${OUTPUT_DIR}/etdump.etdp
661+
662+
export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
663+
mkdir -p $(dirname "$TSV_PATH")
664+
python3 -m devtools.inspector.inspector_cli \
665+
--etdump_path ${OUTPUT_DIR}/etdump.etdp \
666+
--tsv_path ${TSV_PATH}
667+
610668
echo "::endgroup::"
611669
612670

0 commit comments

Comments
 (0)