diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py new file mode 100755 index 00000000000..e2d430188d1 --- /dev/null +++ b/.github/scripts/extract_benchmark_results.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import json +import logging +import os +import re +import time +import zipfile +from argparse import Action, ArgumentParser, Namespace +from io import BytesIO +from logging import info, warning +from typing import Any, List, Optional +from urllib import error, request + + +logging.basicConfig(level=logging.INFO) + + +BENCHMARK_RESULTS_FILENAME = "benchmark_results.json" +ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P\d+).json") + + +class ValidateArtifacts(Action): + def __call__( + self, + parser: ArgumentParser, + namespace: Namespace, + values: Any, + option_string: Optional[str] = None, + ) -> None: + if os.path.isfile(values) and values.endswith(".json"): + setattr(namespace, self.dest, values) + return + + parser.error(f"{values} is not a valid JSON file (*.json)") + + +class ValidateOutputDir(Action): + def __call__( + self, + parser: ArgumentParser, + namespace: Namespace, + values: Any, + option_string: Optional[str] = None, + ) -> None: + if os.path.isdir(values): + setattr(namespace, self.dest, values) + return + + parser.error(f"{values} is not a valid directory") + + +def parse_args() -> Any: + from argparse import ArgumentParser + + parser = ArgumentParser("extract benchmark results from AWS Device Farm artifacts") + parser.add_argument( + "--artifacts", + type=str, + required=True, + action=ValidateArtifacts, + help="the list of artifacts from AWS in JSON format", + ) + parser.add_argument( + "--output-dir", + type=str, + required=True, + action=ValidateOutputDir, + help="the directory to keep the benchmark results", + ) + parser.add_argument( + "--repo", + type=str, + required=True, + help="which GitHub repo this workflow run belongs to", + ) + parser.add_argument( + "--head-branch", + type=str, + required=True, + help="the head branch that runs", + ) + parser.add_argument( + "--workflow-name", + type=str, + required=True, + help="the name of the benchmark workflow", + ) + parser.add_argument( + "--workflow-run-id", + type=int, + required=True, + help="the id of the benchmark workflow", + ) + parser.add_argument( + "--workflow-run-attempt", + type=int, + required=True, + help="which retry of the workflow this is", + ) + + return parser.parse_args() + + +def extract_android_benchmark_results( + job_name: str, artifact_type: str, artifact_s3_url: str +) -> List: + """ + The benchmark results from Android have already been stored in CUSTOMER_ARTIFACT + artifact, so we will just need to get it + + Return the list of benchmark results. + """ + if artifact_type != "CUSTOMER_ARTIFACT": + return [] + + try: + with request.urlopen(artifact_s3_url) as data: + with zipfile.ZipFile(BytesIO(data.read())) as customer_artifact: + for name in customer_artifact.namelist(): + if BENCHMARK_RESULTS_FILENAME in name: + return json.loads(customer_artifact.read(name)) + + except error.HTTPError: + warning(f"Fail to {artifact_type} {artifact_s3_url}") + return [] + + +def extract_job_id(artifacts_filename: str) -> int: + """ + Extract the job id from the artifacts filename + """ + m = ARTIFACTS_FILENAME_REGEX.match(os.path.basename(artifacts_filename)) + if not m: + return 0 + return int(m.group("job_id")) + + +def transform( + app_type: str, + benchmark_results: List, + repo: str, + head_branch: str, + workflow_name: str, + workflow_run_id: int, + workflow_run_attempt: int, + job_name: str, + job_id: int, +) -> List: + """ + Transform the benchmark results into the format writable into the benchmark database + """ + # Overwrite the device name here with the job name as it has more information about + # the device, i.e. Samsung Galaxy S22 5G instead of just Samsung + for r in benchmark_results: + r["deviceInfo"]["device"] = job_name + + # TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2, + # and I'm trying to fit ET benchmark results into it, which is kind of awkward. + # However, the schema is going to be updated soon + return [ + { + # GH-info to identify where the benchmark is run + "repo": repo, + "head_branch": head_branch, + "workflow_id": workflow_run_id, + "run_attempt": workflow_run_attempt, + "job_id": job_id, + # The model + "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(), + "dtype": ( + r["benchmarkModel"]["quantization"] + if r["benchmarkModel"]["quantization"] + else "unknown" + ), + # The metric value + "metric": r["metric"], + "actual": r["actualValue"], + "target": r["targetValue"], + # The device + "device": r["deviceInfo"]["device"], + "arch": r["deviceInfo"].get("os", ""), + # Not used here, just set it to something unique here + "filename": workflow_name, + "test_name": app_type, + "runner": job_name, + } + for r in benchmark_results + ] + + +def main() -> None: + args = parse_args() + + # Across all devices + all_benchmark_results = [] + + with open(args.artifacts) as f: + for artifact in json.load(f): + app_type = artifact.get("app_type", "") + # We expect this to be set to either ANDROID_APP or IOS_APP + if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]: + info( + f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}" + ) + continue + + job_name = artifact["job_name"] + artifact_type = artifact["type"] + artifact_s3_url = artifact["s3_url"] + + if app_type == "ANDROID_APP": + benchmark_results = extract_android_benchmark_results( + job_name, artifact_type, artifact_s3_url + ) + if benchmark_results: + benchmark_results = transform( + app_type, + benchmark_results, + args.repo, + args.head_branch, + args.workflow_name, + args.workflow_run_id, + args.workflow_run_attempt, + job_name, + extract_job_id(args.artifacts), + ) + all_benchmark_results.extend(benchmark_results) + + if app_type == "IOS_APP": + # TODO (huydhn): Implement the logic for iOS next + pass + + if all_benchmark_results: + output_file = os.path.basename(args.artifacts) + with open(f"{args.output_dir}/{output_file}", "w") as f: + json.dump(all_benchmark_results, f) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 1a679ee8370..7d50a441024 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -234,3 +234,78 @@ jobs: test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }} # Uploaded to S3 from the previous job extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip + + upload-benchmark-results: + needs: + - benchmark-on-device + if: always() + runs-on: linux.2xlarge + environment: upload-benchmark-results + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v3 + with: + submodules: false + + - name: Authenticate with AWS + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results + # The max duration enforced by the server side + role-duration-seconds: 18000 + aws-region: us-east-1 + + - name: Setup conda + uses: pytorch/test-infra/.github/actions/setup-miniconda@main + with: + python-version: '3.10' + + - name: Download the list of artifacts from S3 + env: + ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/ + shell: bash + run: | + set -eux + ${CONDA_RUN} python -mpip install awscli==1.32.18 + + mkdir -p artifacts + pushd artifacts + ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" . + popd + + ls -lah artifacts + + - name: Extract the benchmark results JSON + shell: bash + run: | + set -eux + + mkdir -p benchmark-results + + for ARTIFACTS_BY_JOB in artifacts/*.json; do + [ -f "${ARTIFACTS_BY_JOB}" ] || break + echo "${ARTIFACTS_BY_JOB}" + ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ + --artifacts "${ARTIFACTS_BY_JOB}" \ + --output-dir benchmark-results \ + --repo ${{ github.repository }} \ + --head-branch ${{ github.head_ref || github.ref_name }} \ + --workflow-name ${{ github.workflow }} \ + --workflow-run-id ${{ github.run_id }} \ + --workflow-run-attempt ${{ github.run_attempt }} + done + + ls -lah benchmark-results + + for BENCHMARK_RESULTS in benchmark-results/*.json; do + cat "${BENCHMARK_RESULTS}" + echo + done + + - name: Upload the benchmark results + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main + with: + benchmark-results-dir: 'benchmark-results' + dry-run: false