Skip to content

Commit d4cea28

Browse files
committed
Update base for rebase on "Reuse GELU implementation from PyTorch core"
kernels/optimized doesn't need to support embedded systems, so it can just take a header-only dep on PyTorch. Note that, because we will pick up Sleef internally and ignore it externally thanks to ATen vec, this PR gets to enable optimized GELU in OSS. Testing: CI to make sure this doesn't break mobile build modes; happy to take advice on anything not currently covered that might break. Differential Revision: [D66335522](https://our.internmc.facebook.com/intern/diff/D66335522/) [ghstack-poisoned]
2 parents 05e9a40 + 63238ab commit d4cea28

File tree

226 files changed

+23861
-1594
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

226 files changed

+23861
-1594
lines changed

.github/scripts/extract_benchmark_results.py

Lines changed: 104 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ def transform(
310310
workflow_run_attempt: int,
311311
job_name: str,
312312
job_id: int,
313+
schema_version: str,
313314
) -> List:
314315
"""
315316
Transform the benchmark results into the format writable into the benchmark database
@@ -319,45 +320,91 @@ def transform(
319320
for r in benchmark_results:
320321
r["deviceInfo"]["device"] = job_name
321322

322-
# TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
323-
# and I'm trying to fit ET benchmark results into it, which is kind of awkward.
324-
# However, the schema is going to be updated soon
325-
return [
326-
{
327-
# GH-info to identify where the benchmark is run
328-
"repo": repo,
329-
"head_branch": head_branch,
330-
"workflow_id": workflow_run_id,
331-
"run_attempt": workflow_run_attempt,
332-
"job_id": job_id,
333-
# The model
334-
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
335-
"dtype": (
336-
r["benchmarkModel"]["quantization"]
337-
if r["benchmarkModel"]["quantization"]
338-
else "unknown"
339-
),
340-
# The metric value
341-
"metric": r["metric"],
342-
"actual": r["actualValue"],
343-
"target": r["targetValue"],
344-
# The device
345-
"device": r["deviceInfo"]["device"],
346-
"arch": r["deviceInfo"].get("os", ""),
347-
# Not used here, just set it to something unique here
348-
"filename": workflow_name,
349-
"test_name": app_type,
350-
"runner": job_name,
351-
}
352-
for r in benchmark_results
353-
]
323+
if schema_version == "v2":
324+
# TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
325+
return [
326+
{
327+
# GH-info to identify where the benchmark is run
328+
"repo": repo,
329+
"head_branch": head_branch,
330+
"workflow_id": workflow_run_id,
331+
"run_attempt": workflow_run_attempt,
332+
"job_id": job_id,
333+
# The model
334+
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
335+
"dtype": (
336+
r["benchmarkModel"]["quantization"]
337+
if r["benchmarkModel"]["quantization"]
338+
else "unknown"
339+
),
340+
# The metric value
341+
"metric": r["metric"],
342+
"actual": r["actualValue"],
343+
"target": r["targetValue"],
344+
# The device
345+
"device": r["deviceInfo"]["device"],
346+
"arch": r["deviceInfo"].get("os", ""),
347+
# Not used here, just set it to something unique here
348+
"filename": workflow_name,
349+
"test_name": app_type,
350+
"runner": job_name,
351+
}
352+
for r in benchmark_results
353+
]
354+
elif schema_version == "v3":
355+
quantization = (
356+
r["benchmarkModel"]["quantization"]
357+
if r["benchmarkModel"]["quantization"]
358+
else "unknown"
359+
)
360+
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
361+
return [
362+
{
363+
"benchmark": {
364+
"name": "ExecuTorch",
365+
"mode": "inference",
366+
"dtype": quantization,
367+
"extra_info": {
368+
"app_type": app_type,
369+
},
370+
},
371+
"model": {
372+
"name": r["benchmarkModel"]["name"],
373+
"type": "OSS model",
374+
"backend": r["benchmarkModel"].get("backend", ""),
375+
"extra_info": {
376+
"quantization": quantization,
377+
},
378+
},
379+
"metric": {
380+
"name": r["metric"],
381+
"benchmark_values": [r["actualValue"]],
382+
"target_value": r["targetValue"],
383+
"extra_info": {
384+
"method": r.get("method", ""),
385+
},
386+
},
387+
"runners": [
388+
{
389+
"name": r["deviceInfo"]["device"],
390+
"type": r["deviceInfo"]["os"],
391+
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
392+
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
393+
}
394+
],
395+
}
396+
for r in benchmark_results
397+
]
354398

355399

356400
def main() -> None:
357401
args = parse_args()
358402

359-
# Across all devices
360-
all_benchmark_results = []
403+
# Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
404+
all_benchmark_results = {
405+
"v2": [],
406+
"v3": [],
407+
}
361408

362409
with open(args.artifacts) as f:
363410
for artifact in json.load(f):
@@ -384,23 +431,31 @@ def main() -> None:
384431
)
385432

386433
if benchmark_results:
387-
benchmark_results = transform(
388-
app_type,
389-
benchmark_results,
390-
args.repo,
391-
args.head_branch,
392-
args.workflow_name,
393-
args.workflow_run_id,
394-
args.workflow_run_attempt,
395-
job_name,
396-
extract_job_id(args.artifacts),
397-
)
398-
all_benchmark_results.extend(benchmark_results)
434+
for schema in all_benchmark_results.keys():
435+
results = transform(
436+
app_type,
437+
benchmark_results,
438+
args.repo,
439+
args.head_branch,
440+
args.workflow_name,
441+
args.workflow_run_id,
442+
args.workflow_run_attempt,
443+
job_name,
444+
extract_job_id(args.artifacts),
445+
schema,
446+
)
447+
all_benchmark_results[schema].extend(results)
448+
449+
for schema in all_benchmark_results.keys():
450+
if not all_benchmark_results.get(schema):
451+
continue
452+
453+
output_dir = os.path.join(args.output_dir, schema)
454+
os.makedirs(output_dir, exist_ok=True)
399455

400-
if all_benchmark_results:
401456
output_file = os.path.basename(args.artifacts)
402-
with open(f"{args.output_dir}/{output_file}", "w") as f:
403-
json.dump(all_benchmark_results, f)
457+
with open(f"{output_dir}/{output_file}", "w") as f:
458+
json.dump(all_benchmark_results[schema], f)
404459

405460

406461
if __name__ == "__main__":

.github/workflows/android-perf.yml

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@ name: android-perf
33
on:
44
schedule:
55
- cron: 0 0 * * *
6+
pull_request:
7+
paths:
8+
- .github/workflows/android-perf.yml
9+
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
10+
push:
11+
branches:
12+
- main
13+
paths:
14+
- .github/workflows/android-perf.yml
15+
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
616
# Note: GitHub has an upper limit of 10 inputs
717
workflow_dispatch:
818
inputs:
@@ -30,10 +40,6 @@ on:
3040
description: The list of configs used the benchmark
3141
required: false
3242
type: string
33-
test_spec:
34-
description: The test spec to drive the test on AWS devices
35-
required: false
36-
type: string
3743
workflow_call:
3844
inputs:
3945
models:
@@ -60,10 +66,6 @@ on:
6066
description: The list of configs used the benchmark
6167
required: false
6268
type: string
63-
test_spec:
64-
description: The test spec to drive the test on AWS devices
65-
required: false
66-
type: string
6769

6870
concurrency:
6971
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@@ -84,9 +86,9 @@ jobs:
8486
# Separate default values from the workflow dispatch. To ensure defaults are accessible
8587
# during scheduled runs and to provide flexibility for different defaults between
8688
# on-demand and periodic benchmarking.
87-
CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3,vit"
88-
CRON_DEFAULT_DEVICES: "samsung_galaxy_s22"
89-
CRON_DEFAULT_DELEGATES: "xnnpack,qnn"
89+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'stories110M,dl3,mv3,mv2,ic4,ic3,vit' || 'stories110M' }}
90+
CRON_DEFAULT_DEVICES: samsung_galaxy_s22
91+
CRON_DEFAULT_DELEGATES: ${{ github.event_name == 'schedule' && 'xnnpack,qnn' || 'xnnpack' }}
9092
run: |
9193
set -ex
9294
MODELS="${{ inputs.models }}"
@@ -125,6 +127,43 @@ jobs:
125127
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
126128
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
127129
130+
prepare-test-specs:
131+
runs-on: linux.2xlarge
132+
needs: set-parameters
133+
strategy:
134+
matrix:
135+
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
136+
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
137+
fail-fast: false
138+
steps:
139+
- uses: actions/checkout@v3
140+
141+
- name: Prepare the spec
142+
shell: bash
143+
working-directory: extension/benchmark/android/benchmark
144+
run: |
145+
set -eux
146+
147+
# The model will be exported in the next step to this S3 path
148+
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
149+
# We could write a script to properly use jinja here, but there is only one variable,
150+
# so let's just sed it
151+
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
152+
cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
153+
154+
# Just print the test spec for debugging
155+
cat android-llm-device-farm-test-spec.yml
156+
157+
- name: Upload the spec
158+
uses: seemethere/upload-artifact-s3@v5
159+
with:
160+
s3-bucket: gha-artifacts
161+
s3-prefix: |
162+
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
163+
retention-days: 1
164+
if-no-files-found: error
165+
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
166+
128167
export-models:
129168
name: export-models
130169
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -170,9 +209,18 @@ jobs:
170209
echo "Unsupported delegate ${{ matrix.delegate }}"
171210
exit 1
172211
fi
173-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
212+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \
213+
-model "${{ matrix.model }}" \
214+
-build_tool "${BUILD_MODE}" \
215+
-dtype "${DTYPE}" \
216+
-mode "${DELEGATE_CONFIG}" \
217+
-upload "${ARTIFACTS_DIR_NAME}"
174218
else
175-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
219+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh \
220+
"${{ matrix.model }}" \
221+
"${BUILD_MODE}" \
222+
"${{ matrix.delegate }}" \
223+
"${ARTIFACTS_DIR_NAME}"
176224
fi
177225
echo "::endgroup::"
178226
@@ -212,6 +260,7 @@ jobs:
212260
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
213261
needs:
214262
- set-parameters
263+
- prepare-test-specs
215264
- build-benchmark-app
216265
- export-models
217266
strategy:
@@ -231,10 +280,7 @@ jobs:
231280
device-pool-arn: ${{ matrix.device }}
232281
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
233282
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
234-
# NB: Need to set the default spec here so that it works for periodic too
235-
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
236-
# Uploaded to S3 from the previous job
237-
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
283+
test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/android-llm-device-farm-test-spec.yml
238284

239285
upload-benchmark-results:
240286
needs:
@@ -298,15 +344,25 @@ jobs:
298344
--workflow-run-attempt ${{ github.run_attempt }}
299345
done
300346
301-
ls -lah benchmark-results
302-
303-
for BENCHMARK_RESULTS in benchmark-results/*.json; do
304-
cat "${BENCHMARK_RESULTS}"
305-
echo
347+
for SCHEMA in v2 v3; do
348+
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
349+
cat "${BENCHMARK_RESULTS}"
350+
echo
351+
done
306352
done
307353
308-
- name: Upload the benchmark results
354+
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
355+
- name: Upload the benchmark results (v2)
356+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
357+
with:
358+
benchmark-results-dir: benchmark-results/v2
359+
dry-run: false
360+
schema-version: v2
361+
362+
- name: Upload the benchmark results (v3)
309363
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
310364
with:
311-
benchmark-results-dir: 'benchmark-results'
365+
benchmark-results-dir: benchmark-results/v3
312366
dry-run: false
367+
schema-version: v3
368+
github-token: ${{ secrets.GITHUB_TOKEN }}

0 commit comments

Comments
 (0)