Skip to content
68 changes: 52 additions & 16 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ on:
description: The list of configs used the benchmark
required: false
type: string
test_spec:
description: The test spec to drive the test on AWS devices
required: false
type: string
workflow_call:
inputs:
models:
Expand All @@ -60,10 +56,6 @@ on:
description: The list of configs used the benchmark
required: false
type: string
test_spec:
description: The test spec to drive the test on AWS devices
required: false
type: string

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
Expand Down Expand Up @@ -125,9 +117,46 @@ jobs:
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT

prepare-test-specs:
runs-on: linux.2xlarge
needs: set-parameters
strategy:
matrix:
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
fail-fast: false
steps:
- uses: actions/checkout@v3

- name: Prepare the spec
shell: bash
working-directory: extension/benchmark/android/benchmark
run: |
set -eux

# The model will be exported in the next step to this S3 path
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
# We could write a script to properly use jinja here, but there is only one variable,
# so let's just sed it
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml

# Just print the test spec for debugging
cat android-llm-device-farm-test-spec.yml

- name: Upload the spec
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
retention-days: 1
if-no-files-found: error
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml

export-models:
name: export-models
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's new in _v2.yml?

Copy link
Contributor Author

@huydhn huydhn Dec 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It supports manywheel 2.28 (what PyTorch is moving to). For example, arm binaries are now using this new format #7080. During my testing, I saw an error on this job about old v.s. new wheel format. So, I just move it to v2.

This has been delay on PyTorch side until the next release, so this is kind of not needed right now. Let me put it back.

On the other hand, this is ok too because manywheel 2.28 is backward compatible with the current older format.

needs: set-parameters
strategy:
matrix:
Expand Down Expand Up @@ -170,15 +199,24 @@ jobs:
echo "Unsupported delegate ${{ matrix.delegate }}"
exit 1
fi
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \
-model "${{ matrix.model }}" \
-build_tool "${BUILD_MODE}" \
-dtype "${DTYPE}" \
-mode "${DELEGATE_CONFIG}" \
-upload "${ARTIFACTS_DIR_NAME}"
else
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh \
"${{ matrix.model }}" \
"${BUILD_MODE}" \
"${{ matrix.delegate }}" \
"${ARTIFACTS_DIR_NAME}"
fi
echo "::endgroup::"

build-benchmark-app:
name: build-benchmark-app
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
needs: set-parameters
with:
runner: linux.2xlarge
Expand Down Expand Up @@ -212,6 +250,7 @@ jobs:
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
needs:
- set-parameters
- prepare-test-specs
- build-benchmark-app
- export-models
strategy:
Expand All @@ -231,10 +270,7 @@ jobs:
device-pool-arn: ${{ matrix.device }}
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
# NB: Need to set the default spec here so that it works for periodic too
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
# Uploaded to S3 from the previous job
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/android-llm-device-farm-test-spec.yml

upload-benchmark-results:
needs:
Expand Down
67 changes: 2 additions & 65 deletions .github/workflows/upload-android-test-specs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ on:
pull_request:
paths:
- .github/workflows/upload-android-test-specs.yml
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
push:
branches:
- main
paths:
- .github/workflows/upload-android-test-specs.yml
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2

concurrency:
# NB: This concurency group needs to be different than the one used in android-perf, otherwise
Expand All @@ -19,23 +19,7 @@ concurrency:
cancel-in-progress: true

jobs:
upload-android-test-spec-for-validation:
runs-on: linux.2xlarge
steps:
- uses: actions/checkout@v3

- name: Upload the spec as a GitHub artifact for validation
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifacts
retention-days: 1
if-no-files-found: error
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml

validate-android-test-spec:
needs: upload-android-test-spec-for-validation
uses: ./.github/workflows/android-perf.yml
permissions:
id-token: write
Expand All @@ -45,50 +29,3 @@ jobs:
models: stories110M
devices: samsung_galaxy_s22
delegates: xnnpack
test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/android-llm-device-farm-test-spec.yml

upload-android-test-spec:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems the upload step is moved to android-perf.yml right after the test-spec is specialized and instantiated, w/o validation. Given that, do we still need to keep a validation step here? How would it catch and prevent bad test-spec being uploaded and used?

Copy link
Contributor Author

@huydhn huydhn Dec 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this new setup, the specialized test spec will just be an artifact of the current job uploaded at https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/android-llm-device-farm-test-spec.yml. If the spec is bad, the subsequent benchmark job would fail (and give a red signals on PR).

The validation workflow here now acts just as a trigger to call android-perf workflow when the spec template changes.

Finally, the upload step here is being deleted (not moving to android-perf) because there would not be a single share spec file at s3://ossci-android/executorch/android-llm-device-farm-test-spec.yml anymore. It couldn't be because each specialized spec now has different a model+delegation S3 link, for example https://github.com/pytorch/executorch/actions/runs/12053946075/job/33613172395#step:10:46

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this can be simplified and merged into android-perf.yml by triggering a run when the test spec template is modified?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could do that but the job would need to use the default value in CRON_DEFAULT_MODELS, it seems overkill to me to test them all just to validate the spec. But I think I could tweak it a bit to use a different value for pull_request and keep the full list for scheduled jobs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here it the example run https://github.com/pytorch/executorch/actions/runs/12150009281 without upload-android-test-specs.yml

Copy link
Contributor

@guangy10 guangy10 Dec 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

job would need to use the default value in CRON_DEFAULT_MODELS, it seems overkill to me to test them all just to validate the spec.

I see. This is a fair point. Maybe we could add new defaults or change the defaults for test-spec validation in android-perf.yml?
I don't have strong option to merge it to the android-perf.yml, thinking in that direction because job upload-android-test-specs.yml now is essentially validating the android-perf.yml, the specialized test-spec generated on the fly is already part of it.

needs: validate-android-test-spec
runs-on: ubuntu-22.04
timeout-minutes: 15
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3

- uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: pip

- name: configure aws credentials
uses: aws-actions/[email protected]
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-android
aws-region: us-east-1

- name: Only push to S3 when running the workflow manually from main branch
if: ${{ github.ref == 'refs/heads/main' }}
shell: bash
run: |
set -eux
echo "UPLOAD_ON_MAIN=1" >> "${GITHUB_ENV}"

- name: Upload the spec to S3 ossci-android bucket
shell: bash
working-directory: extension/benchmark/android/benchmark/
env:
SPEC_FILE: android-llm-device-farm-test-spec.yml
run: |
set -eux

pip install awscli==1.32.18

AWS_CMD="aws s3 cp --dryrun"
if [[ "${UPLOAD_ON_MAIN:-0}" == "1" ]]; then
AWS_CMD="aws s3 cp"
fi

shasum -a 256 "${SPEC_FILE}"
${AWS_CMD} "${SPEC_FILE}" s3://ossci-android/executorch/ --acl public-read
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ phases:

pre_test:
commands:
# Download the model from S3
- curl -s --fail '{{ model_path }}' -o model.zip
- unzip model.zip && ls -la

# Copy the model to sdcard. This prints too much progress info when the files
# are large, so it's better to just silent them
- adb -s $DEVICEFARM_DEVICE_UDID push *.bin /sdcard > /dev/null && echo OK
- adb -s $DEVICEFARM_DEVICE_UDID push *.pte /sdcard > /dev/null && echo OK

# Prepare the model and the tokenizer
- adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /sdcard/"
- adb -s $DEVICEFARM_DEVICE_UDID shell "mkdir -p /data/local/tmp/minibench/"
Expand Down