@@ -374,7 +374,13 @@ jobs:
374374 secrets : inherit
375375 strategy :
376376 matrix :
377- hf_model_repo : [google/gemma-2-2b]
377+ hf_model_id : [
378+ google/gemma-2-2b,
379+ Qwen/Qwen2.5-0.5B,
380+ HuggingFaceTB/SmolLM2-135M,
381+ meta-llama/Llama-3.2-1B,
382+ allenai/OLMo-1B-hf
383+ ]
378384 fail-fast : false
379385 with :
380386 secrets-env : EXECUTORCH_HF_TOKEN
@@ -389,66 +395,39 @@ jobs:
389395 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
390396 conda activate "${CONDA_ENV}"
391397 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
392-
393- echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
394- rm -rf cmake-out
395- cmake \
396- -DCMAKE_INSTALL_PREFIX=cmake-out \
397- -DCMAKE_BUILD_TYPE=Release \
398- -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
399- -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
400- -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
401- -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
402- -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
403- -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
404- -DEXECUTORCH_BUILD_XNNPACK=ON \
405- -DPYTHON_EXECUTABLE=python \
406- -Bcmake-out .
407- cmake --build cmake-out -j9 --target install --config Release
408-
409- echo "Build llama runner"
410- dir="examples/models/llama"
411- cmake \
412- -DCMAKE_INSTALL_PREFIX=cmake-out \
413- -DCMAKE_BUILD_TYPE=Release \
414- -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
415- -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
416- -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
417- -DEXECUTORCH_BUILD_XNNPACK=ON \
418- -DPYTHON_EXECUTABLE=python \
419- -Bcmake-out/${dir} \
420- ${dir}
421- cmake --build cmake-out/${dir} -j9 --config Release
422398 echo "::endgroup::"
423399
424- echo "::group::Set up HuggingFace Dependencies"
425- if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then
426- echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR."
427- exit 1
428- fi
400+ echo "::group::Set up Hugging Face"
429401 pip install -U "huggingface_hub[cli]"
430402 huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
403+ git clone https://github.com/huggingface/optimum-executorch
404+ cd optimum-executorch
405+ # There is no release yet, for CI stability, always test from the same commit on main
406+ git checkout 6a7e83f3eee2976fa809335bfb78a45b1ea1cb25
407+ pip install .
431408 pip install accelerate sentencepiece
432409 pip list
433410 echo "::endgroup::"
434411
435- echo "::group::Export to ExecuTorch"
436- TOKENIZER_FILE=tokenizer.model
437- TOKENIZER_BIN_FILE=tokenizer.bin
438- ET_MODEL_NAME=et_model
439- DOWNLOADED_TOKENIZER_FILE_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${{ matrix.hf_model_repo }}" --files "${TOKENIZER_FILE}")
440- if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" ]; then
441- echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
442- python -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" -o ./${TOKENIZER_BIN_FILE}
443- ls ./tokenizer.bin
444- else
445- echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
446- exit 1
447- fi
448-
449- python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
450-
451- cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
412+ echo "::group::Export and Run ${{ matrix.hf_model_id }}"
413+ # Pass matrix variable as environment variable
414+ export MODEL_ID="${{ matrix.hf_model_id }}"
415+ python -c "
416+ import os
417+ from optimum.executorch import ExecuTorchModelForCausalLM
418+ from transformers import AutoTokenizer
419+
420+ model_id = os.getenv('MODEL_ID')
421+ print(f'Loading model: {model_id}')
422+ model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
423+ tokenizer = AutoTokenizer.from_pretrained(model_id)
424+ generated_text = model.text_generation(
425+ tokenizer=tokenizer,
426+ prompt='Simply put, the theory of relativity states that',
427+ max_seq_len=64
428+ )
429+ print(generated_text)
430+ "
452431 echo "::endgroup::"
453432
454433
@@ -489,3 +468,13 @@ jobs:
489468 PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
490469 # Test llama2
491470 PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
471+
472+ unittest-release :
473+ uses : ./.github/workflows/_unittest.yml
474+ permissions :
475+ id-token : write
476+ contents : read
477+ with :
478+ build-mode : Release
479+ build-tool : cmake
480+ docker-image : executorch-ubuntu-22.04-clang12
0 commit comments