@@ -555,11 +555,11 @@ jobs:
555555    strategy :
556556      matrix :
557557        hf_model_id : [ 
558-           google/gemma-2-2b , 
559-           Qwen/Qwen2.5 -0.5B , 
558+           google/gemma-3-1b-it , 
559+           Qwen/Qwen3 -0.6B , 
560560          HuggingFaceTB/SmolLM2-135M, 
561561          meta-llama/Llama-3.2-1B, 
562-           allenai/OLMo-1B-hf 
562+           allenai/OLMo-1B-hf,  
563563        ] 
564564      fail-fast : false 
565565    with :
@@ -569,44 +569,102 @@ jobs:
569569      submodules : ' recursive' 
570570      ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
571571      timeout : 90 
572+       upload-artifact : profiling-artifacts-${{ strategy.job-index }} 
572573      script : | 
573574        echo "::group::Set up ExecuTorch" 
574575        # The generic Linux job chooses to use base env, not the one setup by the image 
575576        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") 
576577        conda activate "${CONDA_ENV}" 
577578        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake 
579+         # Build executor_runner with ETdump enabled 
580+         PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \ 
581+           -DCMAKE_INSTALL_PREFIX=cmake-out \ 
582+           -DEXECUTORCH_ENABLE_LOGGING=1 \ 
583+           -DCMAKE_BUILD_TYPE=Release \ 
584+           -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ 
585+           -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ 
586+           -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ 
587+           -DEXECUTORCH_BUILD_XNNPACK=ON \ 
588+           -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ 
589+           -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ 
590+           -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ 
591+           -DEXECUTORCH_BUILD_DEVTOOLS=ON \ 
592+           -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ 
593+           -Bcmake-out . 
594+         cmake --build cmake-out -j16 --target install --config Release 
578595        echo "::endgroup::" 
579596
580597        echo "::group::Set up Hugging Face" 
581598        pip install -U "huggingface_hub[cli]" 
582599        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN 
583600        git clone https://github.com/huggingface/optimum-executorch 
584-         cd  optimum-executorch 
601+         pushd  optimum-executorch 
585602        # There is no release yet, for CI stability, always test from the same commit on main 
586-         git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6  
603+         git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8  
587604        pip install .[tests] 
605+         popd 
606+ 
607+         if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then 
608+           # Fixes for gemma-3 is not available in the released version 
609+           git clone https://github.com/huggingface/transformers.git 
610+           pushd transformers 
611+           git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f 
612+           pip install -e . 
613+           popd 
614+         fi 
588615        pip list 
589616        echo "::endgroup::" 
590617
591-         echo "::group::Export and Run ${{ matrix.hf_model_id }} " 
618+         echo "::group::Export to ExecuTorch " 
592619        # Pass matrix variable as environment variable 
593620        export MODEL_ID="${{ matrix.hf_model_id }}" 
621+         export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w" 
622+         pushd optimum-executorch 
623+ 
624+         optimum-cli export executorch \ 
625+           --model ${MODEL_ID} \ 
626+           --task text-generation \ 
627+           --recipe xnnpack \ 
628+           --use_custom_sdpa \ 
629+           --output_dir ${OUTPUT_DIR} \ 
630+           --qlinear 
631+ 
632+         ls -FlAGhp ${OUTPUT_DIR} 
633+         popd 
634+         echo "::endgroup::" 
635+ 
636+         echo "::group::Inference using python API" 
637+         pushd optimum-executorch 
594638        python -c " 
595639        import os 
596640        from optimum.executorch import ExecuTorchModelForCausalLM 
597641        from transformers import AutoTokenizer 
598642
599643        model_id = os.getenv('MODEL_ID') 
600-         print(f'Loading model: {model_id} ') 
601-         model = ExecuTorchModelForCausalLM.from_pretrained( model_id, recipe='xnnpack ') 
602-         tokenizer  = AutoTokenizer .from_pretrained(model_id ) 
644+         pte_dir = os.getenv('OUTPUT_DIR ') 
645+         print(f'Loading  model { model_id} from {pte_dir}. ') 
646+         model  = ExecuTorchModelForCausalLM .from_pretrained(pte_dir ) 
603647        generated_text = model.text_generation( 
604-           tokenizer=tokenizer , 
648+           tokenizer=AutoTokenizer.from_pretrained(model_id) , 
605649          prompt='Simply put, the theory of relativity states that', 
606650          max_seq_len=64 
607651        ) 
608652        print(generated_text) 
609653        " 
654+         popd 
655+         echo "::endgroup::" 
656+ 
657+         echo "::group::Inference using executor_runner with ETDump" 
658+         ./cmake-out/executor_runner \ 
659+           --model_path ${OUTPUT_DIR}/model.pte \ 
660+           --etdump_path ${OUTPUT_DIR}/etdump.etdp 
661+ 
662+         export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv 
663+         mkdir -p $(dirname "$TSV_PATH") 
664+         python3 -m devtools.inspector.inspector_cli \ 
665+           --etdump_path ${OUTPUT_DIR}/etdump.etdp \ 
666+           --tsv_path ${TSV_PATH} 
667+ 
610668        echo "::endgroup::" 
611669
612670
0 commit comments