diff --git a/.github/workflows/android-perf-private-device-experiment.yml b/.github/workflows/android-perf-private-device-experiment.yml deleted file mode 100644 index cf37538f620..00000000000 --- a/.github/workflows/android-perf-private-device-experiment.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: android-perf (private devices) - -on: - schedule: - - cron: 0 0,4,8,12,16,20 * * * - pull_request: - paths: - - .github/workflows/android-perf-private-device-experiment.yml - push: - branches: - - main - paths: - - .github/workflows/android-perf-private-device-experiment.yml - # Note: GitHub has an upper limit of 10 inputs - workflow_dispatch: - inputs: - models: - description: Models to be benchmarked - required: false - type: string - default: Qwen/Qwen3-0.6B - devices: - description: Target devices to run benchmark - required: false - type: string - default: samsung_galaxy_s22+private - benchmark_configs: - description: The list of configs used the benchmark - required: false - type: string - workflow_call: - inputs: - models: - description: Models to be benchmarked - required: false - type: string - default: Qwen/Qwen3-0.6B - devices: - description: Target devices to run benchmark - required: false - type: string - default: samsung_galaxy_s22+private - benchmark_configs: - description: The list of configs used the benchmark - required: false - type: string - -concurrency: - group: android-perf-private-devices-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} - cancel-in-progress: true - -jobs: - android: - uses: ./.github/workflows/android-perf.yml - secrets: inherit - permissions: - id-token: write - contents: read - with: - models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }} - devices: samsung_galaxy_s22+private - benchmark_configs: ${{ inputs.benchmark_configs }} diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml deleted file mode 100644 index 33937531a01..00000000000 --- a/.github/workflows/android-perf.yml +++ /dev/null @@ -1,562 +0,0 @@ -name: android-perf - -on: - schedule: - - cron: 0 0,8,16 * * * - pull_request: - paths: - - .github/workflows/android-perf.yml - - .ci/scripts/gather_benchmark_configs.py - - extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 - push: - branches: - - main - paths: - - .github/workflows/android-perf.yml - - .ci/scripts/gather_benchmark_configs.py - - extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 - # Note: GitHub has an upper limit of 10 inputs - workflow_dispatch: - inputs: - models: - description: Models to be benchmarked - required: false - type: string - default: Qwen/Qwen3-0.6B - devices: - description: Target devices to run benchmark - required: false - type: string - default: samsung_galaxy_s22+public - benchmark_configs: - description: The list of configs used the benchmark - required: false - type: string - workflow_call: - inputs: - models: - description: Models to be benchmarked - required: false - type: string - default: Qwen/Qwen3-0.6B - devices: - description: Target devices to run benchmark - required: false - type: string - default: samsung_galaxy_s22+public - benchmark_configs: - description: The list of configs used the benchmark - required: false - type: string - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} - cancel-in-progress: true - -jobs: - set-parameters: - runs-on: ubuntu-22.04 - outputs: - benchmark_configs: ${{ steps.set-parameters.outputs.benchmark_configs }} - steps: - - uses: actions/checkout@v3 - with: - submodules: 'false' - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - name: Set parameters - id: set-parameters - shell: bash - env: - # Separate default values from the workflow dispatch. To ensure defaults are accessible - # during scheduled runs and to provide flexibility for different defaults between - # on-demand and periodic benchmarking. - CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }} - CRON_DEFAULT_DEVICES: samsung_galaxy_s22+public - run: | - set -eux - - ARGS="--os android" - - MODELS="${{ inputs.models }}" - if [ -z "$MODELS" ]; then - MODELS="$CRON_DEFAULT_MODELS" - fi - ARGS="$ARGS --models $MODELS" - - DEVICES="${{ inputs.devices }}" - if [ -z "$DEVICES" ]; then - DEVICES="$CRON_DEFAULT_DEVICES" - fi - ARGS="$ARGS --devices $DEVICES" - - BENCHMARK_CONFIGS="${{ inputs.benchmark_configs }}" - if [ -n "$BENCHMARK_CONFIGS" ]; then - ARGS="$ARGS --configs $BENCHMARK_CONFIGS" - fi - - PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py $ARGS - - prepare-test-specs: - runs-on: linux.2xlarge - needs: set-parameters - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - steps: - - uses: actions/checkout@v3 - - - name: Prepare the spec - id: prepare - shell: bash - env: - BENCHMARK_CONFIG: ${{ toJSON(matrix) }} - working-directory: extension/benchmark/android/benchmark - run: | - set -eux - - # The model will be exported in the next step to this S3 path - MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" - # We could write a script to properly use jinja here, but there is only one variable, - # so let's just sed it - sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2 - - BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') - # The config for this benchmark runs, we save it in the test spec so that it can be fetched - # later by the upload script - sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2 - - cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml - # Just print the test spec for debugging - cat android-llm-device-farm-test-spec.yml - - # Save the benchmark configs so that we can use it later in the dashboard - echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" - echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT - - - name: Upload the spec - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml - - - name: Update the benchmark configs - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json - - export-models: - name: export-models - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - needs: set-parameters - secrets: inherit - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - with: - runner: linux.2xlarge.memory - docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk - submodules: 'recursive' - timeout: 60 - upload-artifact: android-models - upload-artifact-to-s3: true - secrets-env: EXECUTORCH_HF_TOKEN - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - echo "::group::Setting up dev environment" - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - if [[ ${{ matrix.config }} == *"qnn"* ]]; then - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh - fi - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" - # Install requirements for export_llama - PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh - - pip install -U "huggingface_hub[cli]" - huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN - pip install accelerate sentencepiece - pip list - - ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.config }} - echo "::endgroup::" - - echo "::group::Exporting ${{ matrix.config }} model: ${{ matrix.model }}" - BUILD_MODE="cmake" - - if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then - # HuggingFace model. Assume the pattern is always like "/" - HF_MODEL_REPO=${{ matrix.model }} - OUT_ET_MODEL_NAME="$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g' | tr '[:upper:]' '[:lower:]')_${{ matrix.config }}" - - # Convert HF checkpoint to ET via etLLM path - if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then - if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then - # SpinQuant - # Download prequantized chceckpoint from Hugging Face - DOWNLOADED_PATH=$( - bash .ci/scripts/download_hf_hub.sh \ - --model_id "${HF_MODEL_REPO}" \ - --files "tokenizer.model" "params.json" "consolidated.00.pth" - ) - # Export using ExecuTorch's model definition - python -m extension.llm.export.export_llm \ - base.model_class="llama3_2" \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - model.use_sdpa_with_kv_cache=true \ - backend.xnnpack.enabled=true \ - backend.xnnpack.extended_ops=true \ - base.preq_mode="preq_8da4w_out_8da8w" \ - base.preq_group_size=32 \ - export.max_seq_length=2048 \ - export.max_context_length=2048 \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" \ - model.use_kv_cache=true \ - model.dtype_override=fp32 \ - base.preq_embedding_quantize=\'8,0\' \ - quantization.use_spin_quant=native \ - base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' - ls -lh "${OUT_ET_MODEL_NAME}.pte" - elif [[ ${{ matrix.config }} == "llama3_qlora" ]]; then - # QAT + LoRA - # Download prequantized chceckpoint from Hugging Face - DOWNLOADED_PATH=$( - bash .ci/scripts/download_hf_hub.sh \ - --model_id "${HF_MODEL_REPO}" \ - --files "tokenizer.model" "params.json" "consolidated.00.pth" - ) - # Export using ExecuTorch's model definition - python -m extension.llm.export.export_llm \ - base.model_class="llama3_2" \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - quantization.use_qat=true \ - base.use_lora=16 \ - base.preq_mode="preq_8da4w_out_8da8w" \ - base.preq_group_size=32 \ - base.preq_embedding_quantize=\'8,0\' \ - model.use_sdpa_with_kv_cache=true \ - model.use_kv_cache=true \ - backend.xnnpack.enabled=true \ - backend.xnnpack.extended_ops=true \ - model.dtype_override=fp32 \ - export.max_seq_length=2048 \ - export.max_context_length=2048 \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" \ - base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' - ls -lh "${OUT_ET_MODEL_NAME}.pte" - elif [[ ${{ matrix.config }} == "llama3_fb16" ]]; then - # Original BF16 version, without any quantization - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") - python -m extension.llm.export.export_llm \ - base.model_class="llama3_2" \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - model.use_kv_cache=true \ - model.use_sdpa_with_kv_cache=true \ - backend.xnnpack.enabled=true \ - model.dtype_override=bf16 \ - base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" - ls -lh "${OUT_ET_MODEL_NAME}.pte" - elif [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") - python -m extension.llm.export.export_llm \ - base.model_class=llama3_2 \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - model.use_kv_cache=true \ - model.use_sdpa_with_kv_cache=true \ - model.dtype_override=fp32 \ - backend.xnnpack.enabled=true \ - backend.xnnpack.extended_ops=true \ - quantization.qmode=8da4w \ - quantization.group_size=32 \ - quantization.embedding_quantize=\'8,0\' \ - base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" - ls -lh "${OUT_ET_MODEL_NAME}.pte" - elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then - export QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724 - export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/ - export PYTHONPATH=$(pwd)/.. - - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") - python -m examples.qualcomm.oss_scripts.llama3_2.llama -- \ - --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ - --params "${DOWNLOADED_PATH}/params.json" \ - --tokenizer_model "${DOWNLOADED_PATH}/tokenizer.model" \ - --compile_only \ - --ptq 16a4w \ - -m SM8650 \ - --model_size 1B \ - --model_mode kv \ - --prompt "Once" - - OUT_ET_MODEL_NAME="llama3_2_qnn" # Qualcomm hard-coded it in their script - find . -name "${OUT_ET_MODEL_NAME}.pte" -not -path "./${OUT_ET_MODEL_NAME}.pte" -exec mv {} ./ \; - ls -lh "${OUT_ET_MODEL_NAME}.pte" - fi - elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then - if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json") - python -m extension.llm.export.export_llm \ - base.model_class=qwen3_0_6b \ - base.params=examples/models/qwen3/config/0_6b_config.json \ - model.use_kv_cache=true \ - model.use_sdpa_with_kv_cache=true \ - model.dtype_override=fp32 \ - backend.xnnpack.enabled=true \ - backend.xnnpack.extended_ops=true \ - quantization.qmode=8da4w \ - quantization.group_size=32 \ - quantization.embedding_quantize=\'8,0\' \ - base.metadata='"{\"get_bos_id\":151644,\"get_eos_ids\":[151645]}"' \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" - ls -lh "${OUT_ET_MODEL_NAME}.pte" - fi - fi - - if [[ ${{ matrix.config }} == "hf_xnnpack_custom_spda_kv_cache_8da4w" ]]; then - DOWNLOADED_PATH=$( - bash .ci/scripts/download_hf_hub.sh \ - --model_id "${HF_MODEL_REPO}" \ - --files "tokenizer.json" - ) - echo "tokenizer.json is downloaded to $DOWNLOADED_PATH" - - # Install optimum-executorch - OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) - git clone https://github.com/huggingface/optimum-executorch - pushd optimum-executorch - # There is no release yet, for CI stability, always test from the same commit on main - git checkout $OPTIMUM_ET_COMMIT - python install_dev.py --skip_override_torch - pip list - - ARGS=( - "--model" "${HF_MODEL_REPO}" - "--task" "text-generation" - "--recipe" "xnnpack" - "--use_custom_sdpa" - "--use_custom_kv_cache" - "--qlinear" "8da4w" - "--qembedding" "8w" - "--output_dir" ".." - ) - - optimum-cli export executorch "${ARGS[@]}" - popd - - mv model.pte ${OUT_ET_MODEL_NAME}.pte - ls -lh "${OUT_ET_MODEL_NAME}.pte" - fi - - zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* - ls -lh model.zip - mkdir -p ${ARTIFACTS_DIR_NAME} - mv model.zip ${ARTIFACTS_DIR_NAME} - ls -lh ${ARTIFACTS_DIR_NAME} - elif [[ ${{ matrix.model }} == "llama" ]]; then - # Install requirements for export_llama - PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh - # Test llama2 - if [[ ${{ matrix.config }} == *"xnnpack"* ]]; then - DELEGATE_CONFIG="xnnpack+custom+qe" - elif [[ ${{ matrix.config }} == *"qnn"* ]]; then - DELEGATE_CONFIG="qnn" - else - echo "Unsupported delegate ${{ matrix.config }}" - exit 1 - fi - DTYPE="fp32" - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \ - -model "${{ matrix.model }}" \ - -build_tool "${BUILD_MODE}" \ - -dtype "${DTYPE}" \ - -mode "${DELEGATE_CONFIG}" \ - -upload "${ARTIFACTS_DIR_NAME}" - else - PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh \ - "${{ matrix.model }}" \ - "${BUILD_MODE}" \ - "${{ matrix.config }}" \ - "${ARTIFACTS_DIR_NAME}" - fi - echo "::endgroup::" - - build-benchmark-app: - name: build-benchmark-app - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - needs: set-parameters - with: - runner: linux.2xlarge - docker-image: ci-image:executorch-ubuntu-22.04-clang12-android - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - upload-artifact: android-apps - upload-artifact-to-s3: true - script: | - set -eux - - # Use sccache for NDK compiler as well - export CMAKE_CXX_COMPILER_LAUNCHER=sccache - export CMAKE_C_COMPILER_LAUNCHER=sccache - - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake - export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded - - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh - - mkdir -p aar-out - PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh - mkdir -p extension/benchmark/android/benchmark/app/libs - cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs - pushd extension/benchmark/android/benchmark - ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew build assembleAndroidTest - popd - MINIBENCH_APP_DIR="${ARTIFACTS_DIR_NAME}/minibench" - mkdir -p "${MINIBENCH_APP_DIR}" - cp extension/benchmark/android/benchmark/app/build/outputs/apk/debug/*.apk "${MINIBENCH_APP_DIR}" - cp extension/benchmark/android/benchmark/app/build/outputs/apk/androidTest/debug/*.apk "${MINIBENCH_APP_DIR}" - - # Let's see how expensive this job is, we might want to tone it down by running it periodically - # CHANGE IF this job name 'benchmark-on-device' changed: extract_model_info() in executorch/.github/scripts/extract_benchmark_results.py - benchmark-on-device: - if: always() - permissions: - id-token: write - contents: read - uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main - needs: - - set-parameters - - prepare-test-specs - - build-benchmark-app - - export-models - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - with: - # Due to scheduling a job may be pushed beyond the default 60m threshold - timeout: 240 - device-type: android - runner: linux.2xlarge - test-infra-ref: '' - # This is the ARN of ExecuTorch project on AWS - project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 - device-pool-arn: ${{ matrix.device_arn }} - android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk - android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk - test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/android-llm-device-farm-test-spec.yml - new-output-format-flag: true - - upload-benchmark-results: - needs: - - benchmark-on-device - if: always() - runs-on: linux.2xlarge - environment: upload-benchmark-results - permissions: - id-token: write - contents: read - steps: - - uses: actions/checkout@v3 - with: - submodules: false - - - name: Authenticate with AWS - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results - # The max duration enforced by the server side - role-duration-seconds: 18000 - aws-region: us-east-1 - - - name: Setup conda - uses: pytorch/test-infra/.github/actions/setup-miniconda@main - with: - python-version: '3.10' - - - name: Download the list of artifacts from S3 - env: - ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/ - shell: bash - run: | - set -eux - ${CONDA_RUN} python -mpip install awscli==1.32.18 - - mkdir -p artifacts - pushd artifacts - ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" . - popd - - ls -lah artifacts - - - name: Download the list of benchmark configs from S3 - env: - BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ - shell: bash - run: | - set -eux - - mkdir -p benchmark-configs - pushd benchmark-configs - ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" . - popd - - ls -lah benchmark-configs - - - name: Extract the benchmark results JSON - shell: bash - env: - DEVICE_TYPE: android - run: | - set -eux - - mkdir -p benchmark-results - - for ARTIFACTS_BY_JOB in artifacts/*.json; do - [ -f "${ARTIFACTS_BY_JOB}" ] || break - echo "${ARTIFACTS_BY_JOB}" - ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ - --artifacts "${ARTIFACTS_BY_JOB}" \ - --output-dir benchmark-results \ - --app "${DEVICE_TYPE}" \ - --benchmark-configs benchmark-configs - done - - for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do - cat "${BENCHMARK_RESULTS}" - echo - done - - - name: Upload the benchmark results (v3) - uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main - with: - benchmark-results-dir: benchmark-results/v3 - dry-run: false - schema-version: v3 - github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/apple-perf-private-device-experiment.yml b/.github/workflows/apple-perf-private-device-experiment.yml deleted file mode 100644 index 47e2c6c9340..00000000000 --- a/.github/workflows/apple-perf-private-device-experiment.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: apple-perf (private devices) - -on: - schedule: - - cron: 0 0,4,8,12,16,20 * * * - pull_request: - paths: - - .github/workflows/apple-perf-private-device-experiment.yml - push: - branches: - - main - paths: - - .github/workflows/apple-perf-private-device-experiment.yml - # Note: GitHub has an upper limit of 10 inputs - workflow_dispatch: - inputs: - models: - description: Models to be benchmarked - required: false - type: string - default: Qwen/Qwen3-0.6B - devices: - description: Target devices to run benchmark - required: false - type: string - default: apple_iphone_15+pro_private - benchmark_configs: - description: The list of configs used the benchmark - required: false - type: string - workflow_call: - inputs: - models: - description: Models to be benchmarked - required: false - type: string - default: Qwen/Qwen3-0.6B - devices: - description: Target devices to run benchmark - required: false - type: string - default: apple_iphone_15+pro_private - benchmark_configs: - description: The list of configs used the benchmark - required: false - type: string - -concurrency: - group: apple-perf-private-devices-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} - cancel-in-progress: true - -jobs: - apple: - uses: ./.github/workflows/apple-perf.yml - secrets: inherit - permissions: - id-token: write - contents: read - with: - models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }} - devices: apple_iphone_15+pro_private - benchmark_configs: ${{ inputs.benchmark_configs }} diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml deleted file mode 100644 index 56fc67d1617..00000000000 --- a/.github/workflows/apple-perf.yml +++ /dev/null @@ -1,603 +0,0 @@ -name: apple-perf - -on: - schedule: - - cron: 0 1 * * * - pull_request: - paths: - - .github/workflows/apple-perf.yml - - .ci/scripts/gather_benchmark_configs.py - - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 - push: - branches: - - main - paths: - - .github/workflows/apple-perf.yml - - .ci/scripts/gather_benchmark_configs.py - - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 - # Note: GitHub has an upper limit of 10 inputs - workflow_dispatch: - inputs: - models: - description: Models to be benchmarked - required: false - type: string - default: Qwen/Qwen3-0.6B - devices: - description: Target devices to run benchmark - required: false - type: string - default: apple_iphone_15+public - benchmark_configs: - description: The list of configs used the benchmark - required: false - type: string - workflow_call: - inputs: - models: - description: Models to be benchmarked - required: false - type: string - default: Qwen/Qwen3-0.6B - devices: - description: Target devices to run benchmark - required: false - type: string - default: apple_iphone_15+public - benchmark_configs: - description: The list of configs used the benchmark - required: false - type: string - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} - cancel-in-progress: true - -jobs: - set-parameters: - runs-on: ubuntu-22.04 - outputs: - benchmark_configs: ${{ steps.set-parameters.outputs.benchmark_configs }} - steps: - - uses: actions/checkout@v3 - with: - submodules: 'false' - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - name: Set parameters - id: set-parameters - shell: bash - env: - # Separate default values from the workflow dispatch. To ensure defaults are accessible - # during scheduled runs and to provide flexibility for different defaults between - # on-demand and periodic benchmarking. - CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }} - CRON_DEFAULT_DEVICES: apple_iphone_15+public - run: | - set -eux - - ARGS="--os ios" - - MODELS="${{ inputs.models }}" - if [ -z "$MODELS" ]; then - MODELS="$CRON_DEFAULT_MODELS" - fi - ARGS="$ARGS --models $MODELS" - - DEVICES="${{ inputs.devices }}" - if [ -z "$DEVICES" ]; then - DEVICES="$CRON_DEFAULT_DEVICES" - fi - ARGS="$ARGS --devices $DEVICES" - - BENCHMARK_CONFIGS="${{ inputs.benchmark_configs }}" - if [ -n "$BENCHMARK_CONFIGS" ]; then - ARGS="$ARGS --configs $BENCHMARK_CONFIGS" - fi - - PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py $ARGS - - echo "benchmark_configs is: ${{ steps.set-parameters.outputs.benchmark_configs }}" - - prepare-test-specs: - runs-on: linux.2xlarge - needs: set-parameters - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - steps: - - uses: actions/checkout@v3 - - - name: Prepare the spec - id: prepare - shell: bash - env: - BENCHMARK_CONFIG: ${{ toJSON(matrix) }} - working-directory: extension/benchmark/apple/Benchmark - run: | - set -eux - - # The model will be exported in the next step to this S3 path - MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" - # We could write a script to properly use jinja here, but there is only one variable, - # so let's just sed it - sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2 - - BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g') - # The config for this benchmark runs, we save it in the test spec so that it can be fetched - # later by the upload script - sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2 - - cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml - # Just print the test spec for debugging - cat default-ios-device-farm-appium-test-spec.yml - - # Save the benchmark configs so that we can use it later in the dashboard - echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" - echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT - - - name: Upload the spec - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml - - - name: Update the benchmark configs - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json - - export-models: - name: export-models - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - needs: set-parameters - secrets: inherit - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - with: - # NB: Need to use our AWS MacOS runner to upload large models to S3 - runner: macos-m1-stable - python-version: '3.11' - submodules: 'recursive' - timeout: 60 - upload-artifact: ios-models - upload-artifact-to-s3: true - secrets-env: EXECUTORCH_HF_TOKEN - script: | - set -eux - - echo "::group::Setting up CI environment" - .ci/scripts/setup-conda.sh - - BUILD_TOOL=cmake - # Setup MacOS dependencies as there is no Docker support on MacOS atm - GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - - if [[ ${{ matrix.config }} == *"coreml"* ]]; then - PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - backends/apple/coreml/scripts/install_requirements.sh - fi - - # Install requirements for export_llama - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh - - pip install -U "huggingface_hub[cli]" - huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN - ${CONDA_RUN} pip install accelerate sentencepiece - pip list - - ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.config }} - echo "::endgroup::" - - echo "::group::Exporting ${{ matrix.config }} model: ${{ matrix.model }}" - BUILD_MODE="cmake" - - if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then - # HuggingFace model. Assume the pattern is always like "/" - HF_MODEL_REPO=${{ matrix.model }} - OUT_ET_MODEL_NAME="$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g' | tr '[:upper:]' '[:lower:]')_${{ matrix.config }}" - - # Convert HF checkpoint to ET via etLLM path - if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then - # The benchmark app replies on the _llm suffix to determine whether the model is a LLM or not - OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm - # Llama models on Hugging Face - if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then - # SpinQuant - # Download prequantized chceckpoint from Hugging Face - DOWNLOADED_PATH=$( - bash .ci/scripts/download_hf_hub.sh \ - --model_id "${HF_MODEL_REPO}" \ - --files "tokenizer.model" "params.json" "consolidated.00.pth" - ) - # Export using ExecuTorch's model definition - ${CONDA_RUN} python -m extension.llm.export.export_llm \ - base.model_class="llama3_2" \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - model.use_sdpa_with_kv_cache=true \ - backend.xnnpack.enabled=true \ - backend.xnnpack.extended_ops=true \ - base.preq_mode="preq_8da4w_out_8da8w" \ - base.preq_group_size=32 \ - export.max_seq_length=2048 \ - export.max_context_length=2048 \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" \ - model.use_kv_cache=true \ - model.dtype_override=fp32 \ - base.preq_embedding_quantize=\'8,0\' \ - quantization.use_spin_quant=native \ - base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' - ls -lh "${OUT_ET_MODEL_NAME}.pte" - elif [[ ${{ matrix.config }} == "llama3_qlora" ]]; then - # QAT + LoRA - # Download prequantized chceckpoint from Hugging Face - DOWNLOADED_PATH=$( - bash .ci/scripts/download_hf_hub.sh \ - --model_id "${HF_MODEL_REPO}" \ - --files "tokenizer.model" "params.json" "consolidated.00.pth" - ) - # Export using ExecuTorch's model definition - ${CONDA_RUN} python -m extension.llm.export.export_llm \ - base.model_class="llama3_2" \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - quantization.use_qat=true \ - base.use_lora=16 \ - base.preq_mode="preq_8da4w_out_8da8w" \ - base.preq_group_size=32 \ - base.preq_embedding_quantize=\'8,0\' \ - model.use_sdpa_with_kv_cache=true \ - model.use_kv_cache=true \ - backend.xnnpack.enabled=true \ - backend.xnnpack.extended_ops=true \ - model.dtype_override=fp32 \ - export.max_seq_length=2048 \ - export.max_context_length=2048 \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" \ - base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' - ls -lh "${OUT_ET_MODEL_NAME}.pte" - elif [[ ${{ matrix.config }} == "llama3_fb16" ]]; then - # Original BF16 version, without any quantization - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") - ${CONDA_RUN} python -m extension.llm.export.export_llm \ - base.model_class="llama3_2" \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - model.use_kv_cache=true \ - model.use_sdpa_with_kv_cache=true \ - backend.xnnpack.enabled=true \ - model.dtype_override=bf16 \ - base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" - ls -lh "${OUT_ET_MODEL_NAME}.pte" - elif [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") - ${CONDA_RUN} python -m extension.llm.export.export_llm \ - base.model_class=llama3_2 \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - model.use_kv_cache=true \ - model.use_sdpa_with_kv_cache=true \ - model.dtype_override=fp32 \ - backend.xnnpack.enabled=true \ - backend.xnnpack.extended_ops=true \ - quantization.qmode=8da4w \ - quantization.group_size=32 \ - quantization.embedding_quantize=\'8,0\' \ - base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" - ls -lh "${OUT_ET_MODEL_NAME}.pte" - elif [[ ${{ matrix.config }} == "llama3_coreml_ane" ]]; then - # ANE - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") - ${CONDA_RUN} python -m extension.llm.export.export_llm \ - base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ - base.params="${DOWNLOADED_PATH}/params.json" \ - quantization.embedding_quantize=\'4,32\' \ - model.use_kv_cache=true \ - model.enable_dynamic_shape=false \ - backend.coreml.enabled=true \ - backend.coreml.ios=18 \ - backend.coreml.quantize=c4w \ - backend.coreml.compute_units=cpu_and_ne \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" - ls -lh "${OUT_ET_MODEL_NAME}.pte" - fi - elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then - OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm - if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then - DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json") - ${CONDA_RUN} python -m extension.llm.export.export_llm \ - base.model_class=qwen3_0_6b \ - base.params=examples/models/qwen3/config/0_6b_config.json \ - model.use_kv_cache=true \ - model.use_sdpa_with_kv_cache=true \ - model.dtype_override=fp32 \ - backend.xnnpack.enabled=true \ - backend.xnnpack.extended_ops=true \ - quantization.qmode=8da4w \ - quantization.group_size=32 \ - quantization.embedding_quantize=\'8,0\' \ - base.metadata='"{\"get_bos_id\":151644,\"get_eos_ids\":[151645]}"' \ - export.output_name="${OUT_ET_MODEL_NAME}.pte" - ls -lh "${OUT_ET_MODEL_NAME}.pte" - fi - fi - - if [[ ${{ matrix.config }} == "hf_xnnpack_custom_spda_kv_cache_8da4w" ]]; then - DOWNLOADED_PATH=$( - bash .ci/scripts/download_hf_hub.sh \ - --model_id "${HF_MODEL_REPO}" \ - --files "tokenizer.json" - ) - echo "tokenizer.json is downloaded to $DOWNLOADED_PATH" - - # Install optimum-executorch - OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) - git clone https://github.com/huggingface/optimum-executorch - pushd optimum-executorch - # There is no release yet, for CI stability, always test from the same commit on main - git checkout $OPTIMUM_ET_COMMIT - ${CONDA_RUN} python install_dev.py --skip_override_torch - pip list - - ARGS=( - "--model" "${HF_MODEL_REPO}" - "--task" "text-generation" - "--recipe" "xnnpack" - "--use_custom_sdpa" - "--use_custom_kv_cache" - "--qlinear" "8da4w" - "--qembedding" "8w" - "--output_dir" ".." - ) - - ${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}" - popd - - # The benchmark app replies on the _llm suffix to determine whether the model is a LLM or not - OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm - mv model.pte ${OUT_ET_MODEL_NAME}.pte - ls -lh "${OUT_ET_MODEL_NAME}.pte" - fi - - zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.* - ls -lh model.zip - mkdir -p "${ARTIFACTS_DIR_NAME}" - mv model.zip "${ARTIFACTS_DIR_NAME}" - elif [[ ${{ matrix.model }} == "llama" ]]; then - # Install requirements for export_llama - PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - bash examples/models/llama/install_requirements.sh - - # Test llama2 - if [[ ${{ matrix.config }} == *"xnnpack"* ]]; then - DELEGATE_CONFIG="xnnpack+custom+qe" - elif [[ ${{ matrix.config }} == *"coreml"* ]]; then - DELEGATE_CONFIG="coreml" - elif [[ ${{ matrix.config }} == *"mps"* ]]; then - DELEGATE_CONFIG="mps" - fi - DTYPE="fp32" - PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - bash .ci/scripts/test_llama.sh \ - -model "stories110M" \ - -build_tool "${BUILD_MODE}" \ - -dtype "${DTYPE}" \ - -mode "${DELEGATE_CONFIG}" \ - -upload "${ARTIFACTS_DIR_NAME}" - else - PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - bash .ci/scripts/test_model.sh \ - "${{ matrix.model }}" \ - "${BUILD_MODE}" \ - "${{ matrix.config }}" \ - "${ARTIFACTS_DIR_NAME}" - fi - echo "::endgroup::" - - build-benchmark-app: - name: build-benchmark-app - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - needs: - - set-parameters - secrets: inherit - with: - runner: macos-14-xlarge - python-version: '3.11' - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - upload-artifact: ios-apps - secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD - timeout: 90 - script: | - set -eux - - echo "::group::Setting up CI environment" - .ci/scripts/setup-conda.sh - - BUILD_TOOL=cmake - # Setup MacOS dependencies as there is no Docker support on MacOS atm - GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded - - # Setup Apple certificate for iOS development - BUILD_PROVISION_PROFILE_BASE64="${SECRET_EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64}" \ - BUILD_CERTIFICATE_BASE64="${SECRET_BUILD_CERTIFICATE_BASE64}" \ - KEYCHAIN_PASSWORD="${SECRET_KEYCHAIN_PASSWORD}" \ - .ci/scripts/setup-ios.sh - - # Install CoreML Backend Requirements - PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - backends/apple/coreml/scripts/install_requirements.sh - echo "::endgroup::" - - echo "::group::Build ExecuTorch iOS frameworks" - PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output scripts/build_apple_frameworks.sh - echo "::endgroup::" - - # NB: Although exported models can be copied to this directory and bundled together with the - # app, we don't use this in CI and rely on AWS extra data parameter to make the model and the - # tokenizer available to the benchmark. This decouples the app and the model. We just need to - # create the directory here to pass the build - mkdir -p extension/benchmark/apple/Benchmark/Models - ${CONDA_RUN} --no-capture-output \ - scripts/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME} - - upload-benchmark-app: - needs: build-benchmark-app - runs-on: linux.2xlarge - steps: - - name: Download the apps from GitHub - uses: actions/download-artifact@v4 - with: - # The name here needs to match the name of the upload-artifact parameter - name: ios-apps - path: ${{ runner.temp }}/artifacts/ - - - name: Verify the apps - shell: bash - working-directory: ${{ runner.temp }}/artifacts/ - run: | - ls -lah ./ - - - name: Upload the apps to S3 - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts - retention-days: 14 - if-no-files-found: ignore - path: ${{ runner.temp }}/artifacts/ - - # CHANGE IF this job name 'benchmark-on-device' changed: extract_model_info() in executorch/.github/scripts/extract_benchmark_results.py - benchmark-on-device: - if: always() - needs: - - set-parameters - - prepare-test-specs - - upload-benchmark-app - - export-models - permissions: - id-token: write - contents: read - uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main - strategy: - matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} - fail-fast: false - with: - # Due to scheduling a job may be pushed beyond the default 60m threshold - timeout: 120 - device-type: ios - # For iOS testing, the runner just needs to call AWS Device Farm, so there is no need to run this on macOS - runner: linux.2xlarge - test-infra-ref: '' - # This is the ARN of ExecuTorch project on AWS - project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 - device-pool-arn: ${{ matrix.device_arn }} - # Uploaded to S3 from the previous job - ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa - ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip - test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/default-ios-device-farm-appium-test-spec.yml - new-output-format-flag: true - - upload-benchmark-results: - needs: - - benchmark-on-device - if: always() - runs-on: linux.2xlarge - environment: upload-benchmark-results - permissions: - id-token: write - contents: read - steps: - - uses: actions/checkout@v3 - with: - submodules: false - - - name: Authenticate with AWS - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results - # The max duration enforced by the server side - role-duration-seconds: 18000 - aws-region: us-east-1 - - - name: Setup conda - uses: pytorch/test-infra/.github/actions/setup-miniconda@main - with: - python-version: '3.10' - - - name: Download the list of artifacts from S3 - env: - ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/ - shell: bash - run: | - set -eux - ${CONDA_RUN} python -mpip install awscli==1.32.18 - - mkdir -p artifacts - pushd artifacts - ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" . - popd - - ls -lah artifacts - - - name: Download the list of benchmark configs from S3 - env: - BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ - shell: bash - run: | - set -eux - mkdir -p benchmark-configs - pushd benchmark-configs - ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" . - popd - ls -lah benchmark-configs - - - name: Extract the benchmark results JSON - shell: bash - env: - DEVICE_TYPE: ios - run: | - set -eux - - mkdir -p benchmark-results - - for ARTIFACTS_BY_JOB in artifacts/*.json; do - [ -f "${ARTIFACTS_BY_JOB}" ] || break - echo "${ARTIFACTS_BY_JOB}" - ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ - --artifacts "${ARTIFACTS_BY_JOB}" \ - --output-dir benchmark-results \ - --app "${DEVICE_TYPE}" \ - --benchmark-configs benchmark-configs - done - - for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do - cat "${BENCHMARK_RESULTS}" - echo - done - - - name: Upload the benchmark results (v3) - uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main - with: - benchmark-results-dir: benchmark-results/v3 - dry-run: false - schema-version: v3 - github-token: ${{ secrets.GITHUB_TOKEN }}