From 319477419f0fbbef70ec32c87d2117be10ad1dde Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Fri, 6 Jun 2025 12:37:43 -0700 Subject: [PATCH 1/7] Benchmark optimum-executorch --- .ci/scripts/gather_benchmark_configs.py | 12 ++- ...android-perf-private-device-experiment.yml | 6 +- .github/workflows/android-perf.yml | 92 ++++++++++++++++--- .../apple-perf-private-device-experiment.yml | 6 +- .github/workflows/apple-perf.yml | 87 ++++++++++++++++-- .github/workflows/trunk.yml | 42 +++++---- .../android-llm-device-farm-test-spec.yml.j2 | 17 +++- 7 files changed, 209 insertions(+), 53 deletions(-) diff --git a/.ci/scripts/gather_benchmark_configs.py b/.ci/scripts/gather_benchmark_configs.py index 75cce6a67c2..ae7b78ecbb7 100755 --- a/.ci/scripts/gather_benchmark_configs.py +++ b/.ci/scripts/gather_benchmark_configs.py @@ -32,7 +32,8 @@ BENCHMARK_CONFIGS = { "xplat": [ "xnnpack_q8", - "hf_xnnpack_fp32", + "hf_xnnpack_custom_spda_kv_cache_8da4w", + "et_xnnpack_custom_spda_kv_cache_8da4w", "llama3_fb16", "llama3_spinquant", "llama3_qlora", @@ -129,8 +130,9 @@ def generate_compatible_configs(model_name: str, target_os=None) -> List[str]: """ configs = [] if is_valid_huggingface_model_id(model_name): + configs.append("hf_xnnpack_custom_spda_kv_cache_8da4w") if model_name.startswith("meta-llama/"): - # LLaMA models + # etLLM recipes for Llama repo_name = model_name.split("meta-llama/")[1] if "qlora" in repo_name.lower(): configs.append("llama3_qlora") @@ -138,6 +140,7 @@ def generate_compatible_configs(model_name: str, target_os=None) -> List[str]: configs.append("llama3_spinquant") else: configs.append("llama3_fb16") + configs.append("et_xnnpack_custom_spda_kv_cache_8da4w") configs.extend( [ config @@ -145,9 +148,8 @@ def generate_compatible_configs(model_name: str, target_os=None) -> List[str]: if config.startswith("llama") ] ) - else: - # Non-LLaMA models - configs.append("hf_xnnpack_fp32") + if model_name.startswith("Qwen/Qwen3"): + configs.append("et_xnnpack_custom_spda_kv_cache_8da4w") elif model_name in MODEL_NAME_TO_MODEL: # ExecuTorch in-tree non-GenAI models configs.append("xnnpack_q8") diff --git a/.github/workflows/android-perf-private-device-experiment.yml b/.github/workflows/android-perf-private-device-experiment.yml index c8a14c4ff05..087250cbd3e 100644 --- a/.github/workflows/android-perf-private-device-experiment.yml +++ b/.github/workflows/android-perf-private-device-experiment.yml @@ -18,7 +18,7 @@ on: description: Models to be benchmarked required: false type: string - default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8 + default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -34,7 +34,7 @@ on: description: Models to be benchmarked required: false type: string - default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8 + default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -57,6 +57,6 @@ jobs: id-token: write contents: read with: - models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }} + models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' }} devices: samsung_galaxy_s22_private benchmark_configs: ${{ inputs.benchmark_configs }} diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index e2f85e05d3a..61528fba098 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -70,7 +70,7 @@ jobs: # Separate default values from the workflow dispatch. To ensure defaults are accessible # during scheduled runs and to provide flexibility for different defaults between # on-demand and periodic benchmarking. - CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' || 'llama' }} + CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'llama' }} CRON_DEFAULT_DEVICES: samsung_galaxy_s22 run: | set -eux @@ -201,8 +201,8 @@ jobs: HF_MODEL_REPO=${{ matrix.model }} OUT_ET_MODEL_NAME="$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g' | tr '[:upper:]' '[:lower:]')_${{ matrix.config }}" + # Convert HF checkpoint to ET via etLLM path if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then - # Llama models on Hugging Face if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then # SpinQuant # Download prequantized chceckpoint from Hugging Face @@ -272,6 +272,21 @@ jobs: --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \ --output_name="${OUT_ET_MODEL_NAME}.pte" ls -lh "${OUT_ET_MODEL_NAME}.pte" + elif [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then + DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") + python -m examples.models.llama.export_llama \ + --model llama3_2 \ + --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ + --params "${DOWNLOADED_PATH}/params.json" \ + -kv \ + --use_sdpa_with_kv_cache \ + -d fp32 \ + -X \ + --xnnpack-extended-ops \ + -qmode 8da4w -G 32 -E 8,0 \ + --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \ + --output_name="${OUT_ET_MODEL_NAME}.pte" + ls -lh "${OUT_ET_MODEL_NAME}.pte" elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/ @@ -292,21 +307,72 @@ jobs: OUT_ET_MODEL_NAME="llama3_2_qnn" # Qualcomm hard-coded it in their script find . -name "${OUT_ET_MODEL_NAME}.pte" -not -path "./${OUT_ET_MODEL_NAME}.pte" -exec mv {} ./ \; ls -lh "${OUT_ET_MODEL_NAME}.pte" - else - # By default, test with the Hugging Face model and the xnnpack recipe - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model") - python -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME" - ls -lh "${OUT_ET_MODEL_NAME}.pte" fi - else - echo "Unsupported model ${{ matrix.model }}" - exit 1 + elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then + if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then + DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json") + python -m examples.models.llama.export_llama \ + --model qwen3-0_6b \ + --params examples/models/qwen3/0_6b_config.json \ + -kv \ + --use_sdpa_with_kv_cache \ + -d fp32 \ + -X \ + --xnnpack-extended-ops \ + -qmode 8da4w -G 32 -E 8,0 \ + --metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \ + --output_name="${OUT_ET_MODEL_NAME}.pte" + ls -lh "${OUT_ET_MODEL_NAME}.pte" + fi + fi + + if [[ ${{ matrix.config }} == "hf_xnnpack_custom_spda_kv_cache_8da4w" ]]; then + DOWNLOADED_PATH=$( + bash .ci/scripts/download_hf_hub.sh \ + --model_id "${HF_MODEL_REPO}" \ + --files "tokenizer.json" + ) + echo "tokenizer.json is downloaded to $DOWNLOADED_PATH" + + # Install optimum-executorch + git clone https://github.com/huggingface/optimum-executorch + pushd optimum-executorch + # There is no release yet, for CI stability, always test from the same commit on main + git checkout 1c653dc49812fc431a22312c7295d97005d22e12 + python install_dev.py + pip list + + ARGS=( + "--model" "${HF_MODEL_REPO}" + "--task" "text-generation" + "--recipe" "xnnpack" + "--use_custom_sdpa" + "--qlinear" + "--qembedding" + "--output_dir" "." + ) + + # Add conditional arguments based on model + case "${HF_MODEL_REPO}" in + *"google/gemma-3-1b-it"*) + echo "--use_custom_kv_cache can not be used for HybridCache" + ;; + *) + ARGS+=("--use_custom_kv_cache") + ;; + esac + + optimum-cli export executorch "${ARGS[@]}" + + mv model.pte ${OUT_ET_MODEL_NAME}.pte + ls -lh "${OUT_ET_MODEL_NAME}.pte" fi - zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${DOWNLOADED_PATH}/tokenizer.model" + zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* ls -lh model.zip - mkdir -p "${ARTIFACTS_DIR_NAME}" - mv model.zip "${ARTIFACTS_DIR_NAME}" + mkdir -p ${ARTIFACTS_DIR_NAME} + mv model.zip ${ARTIFACTS_DIR_NAME} + ls -lh ${ARTIFACTS_DIR_NAME} elif [[ ${{ matrix.model }} == "llama" ]]; then # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh diff --git a/.github/workflows/apple-perf-private-device-experiment.yml b/.github/workflows/apple-perf-private-device-experiment.yml index 4bb6e8f15ec..23a88c0c2b7 100644 --- a/.github/workflows/apple-perf-private-device-experiment.yml +++ b/.github/workflows/apple-perf-private-device-experiment.yml @@ -18,7 +18,7 @@ on: description: Models to be benchmarked required: false type: string - default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8 + default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -34,7 +34,7 @@ on: description: Models to be benchmarked required: false type: string - default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8 + default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -57,6 +57,6 @@ jobs: id-token: write contents: read with: - models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }} + models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' }} devices: apple_iphone_15_private benchmark_configs: ${{ inputs.benchmark_configs }} diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 846dc576f43..92079b9a1ff 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -70,7 +70,7 @@ jobs: # Separate default values from the workflow dispatch. To ensure defaults are accessible # during scheduled runs and to provide flexibility for different defaults between # on-demand and periodic benchmarking. - CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' || 'llama' }} + CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'llama' }} CRON_DEFAULT_DEVICES: apple_iphone_15 run: | set -eux @@ -207,6 +207,7 @@ jobs: HF_MODEL_REPO=${{ matrix.model }} OUT_ET_MODEL_NAME="$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g' | tr '[:upper:]' '[:lower:]')_${{ matrix.config }}" + # Convert HF checkpoint to ET via etLLM path if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then # Llama models on Hugging Face if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then @@ -278,6 +279,21 @@ jobs: --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \ --output_name="${OUT_ET_MODEL_NAME}.pte" ls -lh "${OUT_ET_MODEL_NAME}.pte" + elif [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then + DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") + ${CONDA_RUN} python -m examples.models.llama.export_llama \ + --model llama3_2 \ + --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ + --params "${DOWNLOADED_PATH}/params.json" \ + -kv \ + --use_sdpa_with_kv_cache \ + -d fp32 \ + -X \ + --xnnpack-extended-ops \ + -qmode 8da4w -G 32 -E 8,0 \ + --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \ + --output_name="${OUT_ET_MODEL_NAME}.pte" + ls -lh "${OUT_ET_MODEL_NAME}.pte" elif [[ ${{ matrix.config }} == "llama3_coreml_ane" ]]; then # ANE DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") @@ -293,18 +309,69 @@ jobs: --coreml-compute-units cpu_and_ne \ --output_name="${OUT_ET_MODEL_NAME}.pte" ls -lh "${OUT_ET_MODEL_NAME}.pte" - else - # By default, test with the Hugging Face model and the xnnpack recipe - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model") - ${CONDA_RUN} python -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME" - ls -lh "${OUT_ET_MODEL_NAME}.pte" fi - else - echo "Unsupported model ${{ matrix.model }}" - exit 1 + elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then + if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then + DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json") + ${CONDA_RUN} python -m examples.models.llama.export_llama \ + --model qwen3-0_6b \ + --params examples/models/qwen3/0_6b_config.json \ + -kv \ + --use_sdpa_with_kv_cache \ + -d fp32 \ + -X \ + --xnnpack-extended-ops \ + -qmode 8da4w -G 32 -E 8,0 \ + --metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \ + --output_name="${OUT_ET_MODEL_NAME}.pte" + ls -lh "${OUT_ET_MODEL_NAME}.pte" + fi + fi + + if [[ ${{ matrix.config }} == "hf_xnnpack_custom_spda_kv_cache_8da4w" ]]; then + DOWNLOADED_PATH=$( + bash .ci/scripts/download_hf_hub.sh \ + --model_id "${HF_MODEL_REPO}" \ + --files "tokenizer.json" + ) + echo "tokenizer.json is downloaded to $DOWNLOADED_PATH" + + # Install optimum-executorch + git clone https://github.com/huggingface/optimum-executorch + pushd optimum-executorch + # There is no release yet, for CI stability, always test from the same commit on main + git checkout 1c653dc49812fc431a22312c7295d97005d22e12 + ${CONDA_RUN} python install_dev.py + pip list + + ARGS=( + "--model" "${HF_MODEL_REPO}" + "--task" "text-generation" + "--recipe" "xnnpack" + "--use_custom_sdpa" + "--qlinear" + "--qembedding" + "--output_dir" ".." + ) + + # Add conditional arguments based on model + case "${HF_MODEL_REPO}" in + *"google/gemma-3-1b-it"*) + echo "--use_custom_kv_cache can not be used for HybridCache" + ;; + *) + ARGS+=("--use_custom_kv_cache") + ;; + esac + + ${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}" + popd + + mv model.pte ${OUT_ET_MODEL_NAME}.pte + ls -lh "${OUT_ET_MODEL_NAME}.pte" fi - zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${DOWNLOADED_PATH}/tokenizer.model" + zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* ls -lh model.zip mkdir -p "${ARTIFACTS_DIR_NAME}" mv model.zip "${ARTIFACTS_DIR_NAME}" diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 3f3fc3918fb..03929092b9a 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -571,34 +571,40 @@ jobs: git clone https://github.com/huggingface/optimum-executorch pushd optimum-executorch # There is no release yet, for CI stability, always test from the same commit on main - git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8 + git checkout 1c653dc49812fc431a22312c7295d97005d22e12 pip install .[tests] + pip install transformers==4.52.4 popd - - if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then - # Fixes for gemma-3 is not available in the released version - git clone https://github.com/huggingface/transformers.git - pushd transformers - git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f - pip install -e . - popd - fi pip list echo "::endgroup::" echo "::group::Export to ExecuTorch" # Pass matrix variable as environment variable export MODEL_ID="${{ matrix.hf_model_id }}" - export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w" + export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_kv_cache_8da4w" pushd optimum-executorch - optimum-cli export executorch \ - --model ${MODEL_ID} \ - --task text-generation \ - --recipe xnnpack \ - --use_custom_sdpa \ - --output_dir ${OUTPUT_DIR} \ - --qlinear + ARGS=( + "--model" "${MODEL_ID}" + "--task" "text-generation" + "--recipe" "xnnpack" + "--use_custom_sdpa" + "--qlinear" + "--qembedding" + "--output_dir" "." + ) + + # Add conditional arguments based on model + case "${MODEL_ID}" in + *"google/gemma-3-1b-it"*) + echo "--use_custom_kv_cache can not be used for HybridCache" + ;; + *) + ARGS+=("--use_custom_kv_cache") + ;; + esac + + optimum-cli export executorch "${ARGS[@]}" ls -FlAGhp ${OUTPUT_DIR} popd diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 index af726b36c82..7f2ba17abae 100644 --- a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 +++ b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 @@ -20,6 +20,7 @@ phases: - adb -s $DEVICEFARM_DEVICE_UDID push *.bin /sdcard > /dev/null && echo OK - adb -s $DEVICEFARM_DEVICE_UDID push *.model /sdcard > /dev/null && echo OK - adb -s $DEVICEFARM_DEVICE_UDID push *.pte /sdcard > /dev/null && echo OK + - adb -s $DEVICEFARM_DEVICE_UDID push *.json /sdcard > /dev/null && echo OK # Prepare the model and the tokenizer - adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /sdcard/" @@ -27,9 +28,11 @@ phases: - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/*.bin /data/local/tmp/minibench/" - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/*.model /data/local/tmp/minibench/" - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/*.pte /data/local/tmp/minibench/" + - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/*.json /data/local/tmp/minibench/" - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/minibench/*.bin" - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/minibench/*.model" - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/minibench/*.pte" + - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/minibench/*.json" - adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /data/local/tmp/minibench/" - adb -s $DEVICEFARM_DEVICE_UDID shell "run-as org.pytorch.minibench rm -rf files" @@ -105,6 +108,13 @@ phases: echo "*.model tokenizer files found in /data/local/tmp/minibench/" fi + JSON_FOUND="$(adb -s $DEVICEFARM_DEVICE_UDID shell find /data/local/tmp/minibench/ -name '*.json')" + if [ -z "$JSON_FOUND" ]; then + echo "No *.json tokenizer files found in /data/local/tmp/minibench/" + else + echo "*.json tokenizer files found in /data/local/tmp/minibench/" + fi + - echo "Collect device state before running" - | adb -s $DEVICEFARM_DEVICE_UDID shell 'cat /sys/devices/system/cpu/cpu*/cpufreq/stats/time_in_state /sys/devices/system/cpu/cpu*/cpufreq/stats/trans_table' > $DEVICEFARM_LOG_DIR/state_before.txt @@ -128,11 +138,16 @@ phases: --es "model_dir" "/data/local/tmp/minibench" \ --es "tokenizer_path" "/data/local/tmp/minibench/tokenizer.model" \ --ei "num_iter" 5 --ei "num_warm_up_iter" 2 + elif [ -n "$JSON_FOUND" ]; then + adb -s $DEVICEFARM_DEVICE_UDID shell am start -W -n org.pytorch.minibench/.BenchmarkActivity \ + --es "model_dir" "/data/local/tmp/minibench" \ + --es "tokenizer_path" "/data/local/tmp/minibench/tokenizer.json" \ + --ei "num_iter" 5 --ei "num_warm_up_iter" 2 else adb -s $DEVICEFARM_DEVICE_UDID shell am start -W -n org.pytorch.minibench/.BenchmarkActivity \ --es "model_dir" "/data/local/tmp/minibench" fi - + - echo "Collect device state after running" - | adb -s $DEVICEFARM_DEVICE_UDID shell 'cat /sys/devices/system/cpu/cpu*/cpufreq/stats/time_in_state /sys/devices/system/cpu/cpu*/cpufreq/stats/trans_table' > $DEVICEFARM_LOG_DIR/state_after.txt From 2061c52d2ead4f002b3452e360bb7dd669f539cf Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Fri, 6 Jun 2025 22:05:03 -0700 Subject: [PATCH 2/7] Fix android artifacts upload --- .github/workflows/android-perf.yml | 6 ++++-- .github/workflows/apple-perf.yml | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 61528fba098..ba97adb04e6 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -349,7 +349,7 @@ jobs: "--use_custom_sdpa" "--qlinear" "--qembedding" - "--output_dir" "." + "--output_dir" ".." ) # Add conditional arguments based on model @@ -363,12 +363,14 @@ jobs: esac optimum-cli export executorch "${ARGS[@]}" + popd mv model.pte ${OUT_ET_MODEL_NAME}.pte ls -lh "${OUT_ET_MODEL_NAME}.pte" fi - zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* + # zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* + zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ls -lh model.zip mkdir -p ${ARTIFACTS_DIR_NAME} mv model.zip ${ARTIFACTS_DIR_NAME} diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 92079b9a1ff..546d7495f96 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -371,7 +371,8 @@ jobs: ls -lh "${OUT_ET_MODEL_NAME}.pte" fi - zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* + # zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* + zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ls -lh model.zip mkdir -p "${ARTIFACTS_DIR_NAME}" mv model.zip "${ARTIFACTS_DIR_NAME}" From 695538633a947890b24344fcea74d49b15a02269 Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Mon, 9 Jun 2025 20:18:14 -0700 Subject: [PATCH 3/7] Fix loading non-llama LLM via ios benchmark app --- .github/workflows/android-perf.yml | 4 +++- .github/workflows/apple-perf.yml | 12 +++++++++--- .../apple/Benchmark/Tests/LLaMA/LLaMATests.mm | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index ba97adb04e6..6a77b442b2c 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -319,7 +319,9 @@ jobs: -d fp32 \ -X \ --xnnpack-extended-ops \ - -qmode 8da4w -G 32 -E 8,0 \ + -qmode 8da4w \ + -G 32 \ + -E 8,0 \ --metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \ --output_name="${OUT_ET_MODEL_NAME}.pte" ls -lh "${OUT_ET_MODEL_NAME}.pte" diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 546d7495f96..e2f2cc2fcc3 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -209,6 +209,8 @@ jobs: # Convert HF checkpoint to ET via etLLM path if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then + # The benchmark app replies on the _llm suffix to determine whether the model is a LLM or not + OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm # Llama models on Hugging Face if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then # SpinQuant @@ -311,6 +313,7 @@ jobs: ls -lh "${OUT_ET_MODEL_NAME}.pte" fi elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then + OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json") ${CONDA_RUN} python -m examples.models.llama.export_llama \ @@ -321,7 +324,9 @@ jobs: -d fp32 \ -X \ --xnnpack-extended-ops \ - -qmode 8da4w -G 32 -E 8,0 \ + -qmode 8da4w \ + -G 32 \ + -E 8,0 \ --metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \ --output_name="${OUT_ET_MODEL_NAME}.pte" ls -lh "${OUT_ET_MODEL_NAME}.pte" @@ -367,12 +372,13 @@ jobs: ${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}" popd + # The benchmark app replies on the _llm suffix to determine whether the model is a LLM or not + OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm mv model.pte ${OUT_ET_MODEL_NAME}.pte ls -lh "${OUT_ET_MODEL_NAME}.pte" fi - # zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* - zip -j model.zip ${OUT_ET_MODEL_NAME}.pte + zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* ls -lh model.zip mkdir -p "${ARTIFACTS_DIR_NAME}" mv model.zip "${ARTIFACTS_DIR_NAME}" diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm index c56f054ae3b..66f2e025749 100644 --- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm +++ b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm @@ -60,7 +60,7 @@ @implementation LLaMATests + (NSDictionary *)predicates { return @{ @"model" : ^BOOL(NSString *filename){ - return [filename hasSuffix:@".pte"] && [filename.lowercaseString containsString:@"llama"]; + return [filename hasSuffix:@".pte"] && [filename.lowercaseString containsString:@"llm"]; }, @"tokenizer" : ^BOOL(NSString *filename) { return [filename isEqual:@"tokenizer.bin"] || [filename isEqual:@"tokenizer.model"] || [filename isEqual:@"tokenizer.json"]; From b797a542697d53bb50f400780527c1af85f2bd90 Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Mon, 9 Jun 2025 21:52:03 -0700 Subject: [PATCH 4/7] build benchmark app with SUPPORT_REGEX_LOOKAHEAD --- extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig b/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig index 0172f28b1bb..548e4503b00 100644 --- a/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig +++ b/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig @@ -21,3 +21,5 @@ HEADER_SEARCH_PATHS = $(inherited) \ LIBRARY_SEARCH_PATHS = $(inherited) \ $(TEMP_DIR)/cmake/lib + +OTHER_CFLAGS = $(inherited) -DSUPPORT_REGEX_LOOKAHEAD=1 From ca377cccf2ce73c48ac6bcc1f82c8a375f301770 Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Mon, 9 Jun 2025 22:56:09 -0700 Subject: [PATCH 5/7] offload private device by temp removing some models from count run --- .../workflows/android-perf-private-device-experiment.yml | 6 +++--- .github/workflows/apple-perf-private-device-experiment.yml | 6 +++--- extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig | 2 -- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/android-perf-private-device-experiment.yml b/.github/workflows/android-perf-private-device-experiment.yml index 087250cbd3e..a3e3a803e1b 100644 --- a/.github/workflows/android-perf-private-device-experiment.yml +++ b/.github/workflows/android-perf-private-device-experiment.yml @@ -18,7 +18,7 @@ on: description: Models to be benchmarked required: false type: string - default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf + default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -34,7 +34,7 @@ on: description: Models to be benchmarked required: false type: string - default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf + default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -57,6 +57,6 @@ jobs: id-token: write contents: read with: - models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' }} + models: ${{ inputs.models || 'google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' }} devices: samsung_galaxy_s22_private benchmark_configs: ${{ inputs.benchmark_configs }} diff --git a/.github/workflows/apple-perf-private-device-experiment.yml b/.github/workflows/apple-perf-private-device-experiment.yml index 23a88c0c2b7..cbe66d6e135 100644 --- a/.github/workflows/apple-perf-private-device-experiment.yml +++ b/.github/workflows/apple-perf-private-device-experiment.yml @@ -18,7 +18,7 @@ on: description: Models to be benchmarked required: false type: string - default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf + default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -34,7 +34,7 @@ on: description: Models to be benchmarked required: false type: string - default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf + default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -57,6 +57,6 @@ jobs: id-token: write contents: read with: - models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' }} + models: ${{ inputs.models || 'google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' }} devices: apple_iphone_15_private benchmark_configs: ${{ inputs.benchmark_configs }} diff --git a/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig b/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig index 548e4503b00..0172f28b1bb 100644 --- a/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig +++ b/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig @@ -21,5 +21,3 @@ HEADER_SEARCH_PATHS = $(inherited) \ LIBRARY_SEARCH_PATHS = $(inherited) \ $(TEMP_DIR)/cmake/lib - -OTHER_CFLAGS = $(inherited) -DSUPPORT_REGEX_LOOKAHEAD=1 From e1340e38406429c26ee3032bb13455e3da02fefd Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Tue, 10 Jun 2025 18:27:48 -0700 Subject: [PATCH 6/7] Fix trunk jobs --- .../android-perf-private-device-experiment.yml | 2 +- .github/workflows/android-perf.yml | 3 +-- .../workflows/apple-perf-private-device-experiment.yml | 4 ++-- .github/workflows/trunk.yml | 10 +++++----- .../Benchmark/Benchmark.xcodeproj/project.pbxproj | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/android-perf-private-device-experiment.yml b/.github/workflows/android-perf-private-device-experiment.yml index a3e3a803e1b..1a707a29224 100644 --- a/.github/workflows/android-perf-private-device-experiment.yml +++ b/.github/workflows/android-perf-private-device-experiment.yml @@ -57,6 +57,6 @@ jobs: id-token: write contents: read with: - models: ${{ inputs.models || 'google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' }} + models: ${{ inputs.models || 'Qwen/Qwen3-0.6B' }} devices: samsung_galaxy_s22_private benchmark_configs: ${{ inputs.benchmark_configs }} diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 6a77b442b2c..34744268ff5 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -371,8 +371,7 @@ jobs: ls -lh "${OUT_ET_MODEL_NAME}.pte" fi - # zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* - zip -j model.zip ${OUT_ET_MODEL_NAME}.pte + zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* ls -lh model.zip mkdir -p ${ARTIFACTS_DIR_NAME} mv model.zip ${ARTIFACTS_DIR_NAME} diff --git a/.github/workflows/apple-perf-private-device-experiment.yml b/.github/workflows/apple-perf-private-device-experiment.yml index cbe66d6e135..42c395519ac 100644 --- a/.github/workflows/apple-perf-private-device-experiment.yml +++ b/.github/workflows/apple-perf-private-device-experiment.yml @@ -34,7 +34,7 @@ on: description: Models to be benchmarked required: false type: string - default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf + default: Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf devices: description: Target devices to run benchmark required: false @@ -57,6 +57,6 @@ jobs: id-token: write contents: read with: - models: ${{ inputs.models || 'google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' }} + models: ${{ inputs.models || 'Qwen/Qwen3-0.6B' }} devices: apple_iphone_15_private benchmark_configs: ${{ inputs.benchmark_configs }} diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 03929092b9a..153c0ce2687 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -591,7 +591,7 @@ jobs: "--use_custom_sdpa" "--qlinear" "--qembedding" - "--output_dir" "." + "--output_dir" "${OUTPUT_DIR}" ) # Add conditional arguments based on model @@ -707,18 +707,18 @@ jobs: timeout: 90 script: | set -eux - + # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - + # Build and install Executorch PYTHON_EXECUTABLE=python \ CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \ .ci/scripts/setup-linux.sh --build-tool "cmake" - + # Install test requirements pip install -r backends/nxp/requirements-tests.txt - + # Run pytest PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj index f6fe811b4ab..5eac01cb4ed 100644 --- a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj +++ b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj @@ -397,7 +397,7 @@ ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n PLATFORM=\"MAC_ARM64\"\n DEPLOYMENT_TARGET=\"12.0\"\nfi\n\ncmake_build() {\n local src_dir=$1\n local target=$2\n shift 2\n local extra_args=(\"$@\")\n local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n mkdir -p \"$build_dir\" && cd \"$build_dir\"\n\n if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n fi\n cmake -G Xcode \\\n -DCMAKE_BUILD_TYPE=\"Release\" \\\n -DCMAKE_CXX_STANDARD=17 \\\n -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n -DPLATFORM=\"$PLATFORM\" \\\n -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n \"${extra_args[@]}\" \\\n \"$src_dir\"\n cmake --build . --config \"Release\" --target \"$target\"\n if [[ \"$target\" == \"install\" ]]; then\n cmake --install . --prefix \"$CMAKE_DIR\"\n fi\n}\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/abseil-cpp\" \"install\" \\\n -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/re2\" \"install\"\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/pcre2\" \"install\" \\\n -DPCRE2_BUILD_PCRE2_8=ON \\\n -DPCRE2_BUILD_PCRE2_16=OFF \\\n -DPCRE2_BUILD_PCRE2_32=OFF \\\n -DPCRE2_BUILD_TESTS=OFF \\\n -DPCRE2_BUILD_PCRE2GREP=OFF \\\n -DPCRE2_BUILD_PCRE2TEST=OFF \\\n -DPCRE2_BUILD_PCRE2GPERF=OFF \\\n -DPCRE2_BUILD_DOCS=OFF \\\n -DPCRE2_BUILD_LIBPCRE2_PDB=OFF\n \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/sentencepiece\" \"sentencepiece-static\" \\\n -DSPM_ENABLE_SHARED=OFF\n \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/llama.cpp-unicode\" \"install\"\n \n# Include the single header for json.\nmkdir -p \"$CMAKE_DIR/include/nlohmann\"\ncp \"$SRCROOT/../../../llm/tokenizers/third-party/json/single_include/nlohmann/json.hpp\" \"$CMAKE_DIR/include/nlohmann/json.hpp\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n"; + shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n PLATFORM=\"MAC_ARM64\"\n DEPLOYMENT_TARGET=\"12.0\"\nfi\n\ncmake_build() {\n local src_dir=$1\n local target=$2\n shift 2\n local extra_args=(\"$@\")\n local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n mkdir -p \"$build_dir\" && cd \"$build_dir\"\n\n if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n fi\n cmake -G Xcode \\\n -DCMAKE_BUILD_TYPE=\"Release\" \\\n -DCMAKE_CXX_STANDARD=17 \\\n -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n -DPLATFORM=\"$PLATFORM\" \\\n -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n \"${extra_args[@]}\" \\\n \"$src_dir\"\n cmake --build . --config \"Release\" --target \"$target\"\n if [[ \"$target\" == \"install\" ]]; then\n cmake --install . --prefix \"$CMAKE_DIR\"\n fi\n}\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/abseil-cpp\" \"install\" \\\n -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/re2\" \"install\"\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/pcre2\" \"install\" \\\n -DPCRE2_BUILD_PCRE2_8=ON \\\n -DPCRE2_BUILD_PCRE2_16=OFF \\\n -DPCRE2_BUILD_PCRE2_32=OFF \\\n -DPCRE2_BUILD_TESTS=OFF \\\n -DPCRE2_BUILD_PCRE2GREP=OFF \\\n -DPCRE2_BUILD_PCRE2TEST=OFF \\\n -DPCRE2_BUILD_PCRE2GPERF=OFF \\\n -DPCRE2_BUILD_DOCS=OFF \\\n -DPCRE2_BUILD_LIBPCRE2_PDB=OFF \\\n -DSUPPORT_REGEX_LOOKAHEAD=ON\n \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/sentencepiece\" \"sentencepiece-static\" \\\n -DSPM_ENABLE_SHARED=OFF\n \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/llama.cpp-unicode\" \"install\"\n \n# Include the single header for json.\nmkdir -p \"$CMAKE_DIR/include/nlohmann\"\ncp \"$SRCROOT/../../../llm/tokenizers/third-party/json/single_include/nlohmann/json.hpp\" \"$CMAKE_DIR/include/nlohmann/json.hpp\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n"; }; /* End PBXShellScriptBuildPhase section */ From b25c0d20114a2571e9fbfb51f763a05ae1086dac Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Wed, 11 Jun 2025 14:01:11 -0700 Subject: [PATCH 7/7] add missing regex_lookahead.cpp to the Benchmark.xcodeproj --- .../apple/Benchmark/Benchmark.xcodeproj/project.pbxproj | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj index 5eac01cb4ed..47a7af09dbd 100644 --- a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj +++ b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj @@ -33,6 +33,7 @@ 30AA4B642DC0766800B1BE50 /* std_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5E2DC0766800B1BE50 /* std_regex.cpp */; }; 30AA4B652DC0766800B1BE50 /* pre_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5B2DC0766800B1BE50 /* pre_tokenizer.cpp */; }; 30AA4B662DC0766800B1BE50 /* re2_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5C2DC0766800B1BE50 /* re2_regex.cpp */; }; + 3C6ABD332DFA27DE0015DE55 /* regex_lookahead.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */; }; F22E9E1A2DF2CBB900EC5425 /* text_llm_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */; }; F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */; }; F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */; }; @@ -95,6 +96,7 @@ 30AA4B5D2DC0766800B1BE50 /* regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = regex.cpp; path = src/regex.cpp; sourceTree = ""; }; 30AA4B5E2DC0766800B1BE50 /* std_regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = std_regex.cpp; path = src/std_regex.cpp; sourceTree = ""; }; 30AA4B5F2DC0766800B1BE50 /* token_decoder.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = token_decoder.cpp; path = src/token_decoder.cpp; sourceTree = ""; }; + 3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = regex_lookahead.cpp; path = src/regex_lookahead.cpp; sourceTree = ""; }; F22E9E182DF2CBB900EC5425 /* text_llm_runner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = text_llm_runner.h; sourceTree = ""; }; F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = text_llm_runner.cpp; sourceTree = ""; }; F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = ""; }; @@ -181,6 +183,7 @@ 032A74022CAFBB7800932D36 /* tokenizers */ = { isa = PBXGroup; children = ( + 3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */, 30AA4B592DC0766800B1BE50 /* hf_tokenizer.cpp */, 30AA4B5A2DC0766800B1BE50 /* pcre2_regex.cpp */, 30AA4B5B2DC0766800B1BE50 /* pre_tokenizer.cpp */, @@ -434,6 +437,7 @@ 30AA4B652DC0766800B1BE50 /* pre_tokenizer.cpp in Sources */, 30AA4B662DC0766800B1BE50 /* re2_regex.cpp in Sources */, 032A73CA2CAFBA8600932D36 /* LLaMATests.mm in Sources */, + 3C6ABD332DFA27DE0015DE55 /* regex_lookahead.cpp in Sources */, 032A74262CAFC34800932D36 /* llama_tiktoken.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0;