Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 246 additions & 0 deletions .github/scripts/extract_benchmark_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import json
import logging
import os
import re
import time
import zipfile
from argparse import Action, ArgumentParser, Namespace
from io import BytesIO
from logging import info, warning
from typing import Any, List, Optional
from urllib import error, request


logging.basicConfig(level=logging.INFO)


BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")


class ValidateArtifacts(Action):
def __call__(
self,
parser: ArgumentParser,
namespace: Namespace,
values: Any,
option_string: Optional[str] = None,
) -> None:
if os.path.isfile(values) and values.endswith(".json"):
setattr(namespace, self.dest, values)
return

parser.error(f"{values} is not a valid JSON file (*.json)")


class ValidateOutputDir(Action):
def __call__(
self,
parser: ArgumentParser,
namespace: Namespace,
values: Any,
option_string: Optional[str] = None,
) -> None:
if os.path.isdir(values):
setattr(namespace, self.dest, values)
return

parser.error(f"{values} is not a valid directory")


def parse_args() -> Any:
from argparse import ArgumentParser

parser = ArgumentParser("extract benchmark results from AWS Device Farm artifacts")
parser.add_argument(
"--artifacts",
type=str,
required=True,
action=ValidateArtifacts,
help="the list of artifacts from AWS in JSON format",
)
parser.add_argument(
"--output-dir",
type=str,
required=True,
action=ValidateOutputDir,
help="the directory to keep the benchmark results",
)
parser.add_argument(
"--repo",
type=str,
required=True,
help="which GitHub repo this workflow run belongs to",
)
parser.add_argument(
"--head-branch",
type=str,
required=True,
help="the head branch that runs",
)
parser.add_argument(
"--workflow-name",
type=str,
required=True,
help="the name of the benchmark workflow",
)
parser.add_argument(
"--workflow-run-id",
type=int,
required=True,
help="the id of the benchmark workflow",
)
parser.add_argument(
"--workflow-run-attempt",
type=int,
required=True,
help="which retry of the workflow this is",
)

return parser.parse_args()


def extract_android_benchmark_results(
job_name: str, artifact_type: str, artifact_s3_url: str
) -> List:
"""
The benchmark results from Android have already been stored in CUSTOMER_ARTIFACT
artifact, so we will just need to get it

Return the list of benchmark results.
"""
if artifact_type != "CUSTOMER_ARTIFACT":
return []

try:
with request.urlopen(artifact_s3_url) as data:
with zipfile.ZipFile(BytesIO(data.read())) as customer_artifact:
for name in customer_artifact.namelist():
if BENCHMARK_RESULTS_FILENAME in name:
return json.loads(customer_artifact.read(name))

except error.HTTPError:
warning(f"Fail to {artifact_type} {artifact_s3_url}")
return []


def extract_job_id(artifacts_filename: str) -> int:
"""
Extract the job id from the artifacts filename
"""
m = ARTIFACTS_FILENAME_REGEX.match(os.path.basename(artifacts_filename))
if not m:
return 0
return int(m.group("job_id"))


def transform(
app_type: str,
benchmark_results: List,
repo: str,
head_branch: str,
workflow_name: str,
workflow_run_id: int,
workflow_run_attempt: int,
job_name: str,
job_id: int,
) -> List:
"""
Transform the benchmark results into the format writable into the benchmark database
"""
# Overwrite the device name here with the job name as it has more information about
# the device, i.e. Samsung Galaxy S22 5G instead of just Samsung
for r in benchmark_results:
r["deviceInfo"]["device"] = job_name

# TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
# and I'm trying to fit ET benchmark results into it, which is kind of awkward.
# However, the schema is going to be updated soon
return [
{
# GH-info to identify where the benchmark is run
"repo": repo,
"head_branch": head_branch,
"workflow_id": workflow_run_id,
"run_attempt": workflow_run_attempt,
"job_id": job_id,
# The model
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
"dtype": (
r["benchmarkModel"]["quantization"]
if r["benchmarkModel"]["quantization"]
else "unknown"
),
# The metric value
"metric": r["metric"],
"actual": r["actualValue"],
"target": r["targetValue"],
# The device
"device": r["deviceInfo"]["device"],
"arch": r["deviceInfo"].get("os", ""),
# Not used here, just set it to something unique here
"filename": workflow_name,
"test_name": app_type,
"runner": job_name,
}
for r in benchmark_results
]


def main() -> None:
args = parse_args()

# Across all devices
all_benchmark_results = []

with open(args.artifacts) as f:
for artifact in json.load(f):
app_type = artifact.get("app_type", "")
# We expect this to be set to either ANDROID_APP or IOS_APP
if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]:
info(
f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}"
)
continue

job_name = artifact["job_name"]
artifact_type = artifact["type"]
artifact_s3_url = artifact["s3_url"]

if app_type == "ANDROID_APP":
benchmark_results = extract_android_benchmark_results(
job_name, artifact_type, artifact_s3_url
)
if benchmark_results:
benchmark_results = transform(
app_type,
benchmark_results,
args.repo,
args.head_branch,
args.workflow_name,
args.workflow_run_id,
args.workflow_run_attempt,
job_name,
extract_job_id(args.artifacts),
)
all_benchmark_results.extend(benchmark_results)

if app_type == "IOS_APP":
# TODO (huydhn): Implement the logic for iOS next
pass

if all_benchmark_results:
output_file = os.path.basename(args.artifacts)
with open(f"{args.output_dir}/{output_file}", "w") as f:
json.dump(all_benchmark_results, f)


if __name__ == "__main__":
main()
75 changes: 75 additions & 0 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,78 @@ jobs:
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
# Uploaded to S3 from the previous job
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip

upload-benchmark-results:
needs:
- benchmark-on-device
if: always()
runs-on: linux.2xlarge
environment: upload-benchmark-results
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
with:
submodules: false

- name: Authenticate with AWS
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
# The max duration enforced by the server side
role-duration-seconds: 18000
aws-region: us-east-1

- name: Setup conda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: '3.10'

- name: Download the list of artifacts from S3
env:
ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
shell: bash
run: |
set -eux
${CONDA_RUN} python -mpip install awscli==1.32.18

mkdir -p artifacts
pushd artifacts
${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
popd

ls -lah artifacts

- name: Extract the benchmark results JSON
shell: bash
run: |
set -eux

mkdir -p benchmark-results

for ARTIFACTS_BY_JOB in artifacts/*.json; do
[ -f "${ARTIFACTS_BY_JOB}" ] || break
echo "${ARTIFACTS_BY_JOB}"
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
--artifacts "${ARTIFACTS_BY_JOB}" \
--output-dir benchmark-results \
--repo ${{ github.repository }} \
--head-branch ${{ github.head_ref || github.ref_name }} \
--workflow-name ${{ github.workflow }} \
--workflow-run-id ${{ github.run_id }} \
--workflow-run-attempt ${{ github.run_attempt }}
done

ls -lah benchmark-results

for BENCHMARK_RESULTS in benchmark-results/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done

- name: Upload the benchmark results
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: 'benchmark-results'
dry-run: false
Loading