@@ -555,11 +555,11 @@ jobs:
555
555
strategy :
556
556
matrix :
557
557
hf_model_id : [
558
- google/gemma-2-2b ,
559
- Qwen/Qwen2.5 -0.5B ,
558
+ google/gemma-3-1b-it ,
559
+ Qwen/Qwen3 -0.6B ,
560
560
HuggingFaceTB/SmolLM2-135M,
561
561
meta-llama/Llama-3.2-1B,
562
- allenai/OLMo-1B-hf
562
+ allenai/OLMo-1B-hf,
563
563
]
564
564
fail-fast : false
565
565
with :
@@ -569,44 +569,102 @@ jobs:
569
569
submodules : ' recursive'
570
570
ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
571
571
timeout : 90
572
+ upload-artifact : profiling-artifacts-${{ strategy.job-index }}
572
573
script : |
573
574
echo "::group::Set up ExecuTorch"
574
575
# The generic Linux job chooses to use base env, not the one setup by the image
575
576
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
576
577
conda activate "${CONDA_ENV}"
577
578
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
579
+ # Build executor_runner with ETdump enabled
580
+ PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
581
+ -DCMAKE_INSTALL_PREFIX=cmake-out \
582
+ -DEXECUTORCH_ENABLE_LOGGING=1 \
583
+ -DCMAKE_BUILD_TYPE=Release \
584
+ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
585
+ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
586
+ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
587
+ -DEXECUTORCH_BUILD_XNNPACK=ON \
588
+ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
589
+ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
590
+ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
591
+ -DEXECUTORCH_BUILD_DEVTOOLS=ON \
592
+ -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
593
+ -Bcmake-out .
594
+ cmake --build cmake-out -j16 --target install --config Release
578
595
echo "::endgroup::"
579
596
580
597
echo "::group::Set up Hugging Face"
581
598
pip install -U "huggingface_hub[cli]"
582
599
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
583
600
git clone https://github.com/huggingface/optimum-executorch
584
- cd optimum-executorch
601
+ pushd optimum-executorch
585
602
# There is no release yet, for CI stability, always test from the same commit on main
586
- git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
603
+ git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8
587
604
pip install .[tests]
605
+ popd
606
+
607
+ if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then
608
+ # Fixes for gemma-3 is not available in the released version
609
+ git clone https://github.com/huggingface/transformers.git
610
+ pushd transformers
611
+ git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
612
+ pip install -e .
613
+ popd
614
+ fi
588
615
pip list
589
616
echo "::endgroup::"
590
617
591
- echo "::group::Export and Run ${{ matrix.hf_model_id }} "
618
+ echo "::group::Export to ExecuTorch "
592
619
# Pass matrix variable as environment variable
593
620
export MODEL_ID="${{ matrix.hf_model_id }}"
621
+ export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w"
622
+ pushd optimum-executorch
623
+
624
+ optimum-cli export executorch \
625
+ --model ${MODEL_ID} \
626
+ --task text-generation \
627
+ --recipe xnnpack \
628
+ --use_custom_sdpa \
629
+ --output_dir ${OUTPUT_DIR} \
630
+ --qlinear
631
+
632
+ ls -FlAGhp ${OUTPUT_DIR}
633
+ popd
634
+ echo "::endgroup::"
635
+
636
+ echo "::group::Inference using python API"
637
+ pushd optimum-executorch
594
638
python -c "
595
639
import os
596
640
from optimum.executorch import ExecuTorchModelForCausalLM
597
641
from transformers import AutoTokenizer
598
642
599
643
model_id = os.getenv('MODEL_ID')
600
- print(f'Loading model: {model_id} ')
601
- model = ExecuTorchModelForCausalLM.from_pretrained( model_id, recipe='xnnpack ')
602
- tokenizer = AutoTokenizer .from_pretrained(model_id )
644
+ pte_dir = os.getenv('OUTPUT_DIR ')
645
+ print(f'Loading model { model_id} from {pte_dir}. ')
646
+ model = ExecuTorchModelForCausalLM .from_pretrained(pte_dir )
603
647
generated_text = model.text_generation(
604
- tokenizer=tokenizer ,
648
+ tokenizer=AutoTokenizer.from_pretrained(model_id) ,
605
649
prompt='Simply put, the theory of relativity states that',
606
650
max_seq_len=64
607
651
)
608
652
print(generated_text)
609
653
"
654
+ popd
655
+ echo "::endgroup::"
656
+
657
+ echo "::group::Inference using executor_runner with ETDump"
658
+ ./cmake-out/executor_runner \
659
+ --model_path ${OUTPUT_DIR}/model.pte \
660
+ --etdump_path ${OUTPUT_DIR}/etdump.etdp
661
+
662
+ export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
663
+ mkdir -p $(dirname "$TSV_PATH")
664
+ python3 -m devtools.inspector.inspector_cli \
665
+ --etdump_path ${OUTPUT_DIR}/etdump.etdp \
666
+ --tsv_path ${TSV_PATH}
667
+
610
668
echo "::endgroup::"
611
669
612
670
0 commit comments