diff --git a/.github/actions/upload-benchmark-results/action.yml b/.github/actions/upload-benchmark-results/action.yml new file mode 100644 index 0000000000..375f496917 --- /dev/null +++ b/.github/actions/upload-benchmark-results/action.yml @@ -0,0 +1,36 @@ +name: Upload benchmark results + +inputs: + benchmark-results-dir: + description: 'The path to the directory with all the results in JSON format' + required: True + dry-run: + default: 'true' + +runs: + using: composite + steps: + - name: Install dependencies + shell: bash + run: | + set -eux + python3 -mpip install boto3==1.35.33 + + # TODO (huydhn): Once the generic benchmark database is ready, this will be + # uploaded to S3 instead + - name: Upload benchmark results to DynamoDB + shell: bash + env: + BENCHMARK_RESULTS_DIR: ${{ inputs.benchmark-results-dir }} + DRY_RUN: ${{ inputs.dry-run }} + run: | + set -eux + + if [[ "${DRY_RUN}" == "true" ]]; then + python3 "${GITHUB_ACTION_PATH}/../../scripts/upload_benchmark_results.py" \ + --benchmark-results-dir "${BENCHMARK_RESULTS_DIR}" \ + --dry-run + else + python3 "${GITHUB_ACTION_PATH}/../../scripts/upload_benchmark_results.py" \ + --benchmark-results-dir "${BENCHMARK_RESULTS_DIR}" + fi diff --git a/.github/scripts/benchmark-results-dir-for-testing/android-artifacts-31017223108.json b/.github/scripts/benchmark-results-dir-for-testing/android-artifacts-31017223108.json new file mode 100644 index 0000000000..3285abbbea --- /dev/null +++ b/.github/scripts/benchmark-results-dir-for-testing/android-artifacts-31017223108.json @@ -0,0 +1 @@ +[{"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 286.98526, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "generate_time(ms)", "actual": 405.055521, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "token_per_sec", "actual": 306.53265, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 308.598385, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "generate_time(ms)", "actual": 522.4928639999999, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "token_per_sec", "actual": 243.51297, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 280.957917, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "generate_time(ms)", "actual": 1366.6017709999999, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "token_per_sec", "actual": 90.70632, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 289.104427, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "generate_time(ms)", "actual": 733.888385, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "token_per_sec", "actual": 174.28572, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}] \ No newline at end of file diff --git a/.github/scripts/benchmark-results-dir-for-testing/android-artifacts-31017223431.json b/.github/scripts/benchmark-results-dir-for-testing/android-artifacts-31017223431.json new file mode 100644 index 0000000000..3d3fe21c2d --- /dev/null +++ b/.github/scripts/benchmark-results-dir-for-testing/android-artifacts-31017223431.json @@ -0,0 +1 @@ +[{"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "avg_inference_latency(ms)", "actual": 23.0566825, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 18.705938, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "avg_inference_latency(ms)", "actual": 42.8922188, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 14.722292, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "avg_inference_latency(ms)", "actual": 10.9387811, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 13.23677, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "avg_inference_latency(ms)", "actual": 63.4430574, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 16.301875, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}] \ No newline at end of file diff --git a/.github/scripts/upload_benchmark_results.py b/.github/scripts/upload_benchmark_results.py new file mode 100755 index 0000000000..0fea644b42 --- /dev/null +++ b/.github/scripts/upload_benchmark_results.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import hashlib +import json +import logging +import os +import time +from argparse import Action, ArgumentParser, Namespace +from decimal import Decimal + +from logging import info +from typing import Any, Callable, Dict, List, Optional + +import boto3 + +logging.basicConfig(level=logging.INFO) + + +class ValidateDir(Action): + def __call__( + self, + parser: ArgumentParser, + namespace: Namespace, + values: Any, + option_string: Optional[str] = None, + ) -> None: + if os.path.isdir(values): + setattr(namespace, self.dest, values) + return + + parser.error(f"{values} is not a valid directory") + + +def parse_args() -> Any: + from argparse import ArgumentParser + + parser = ArgumentParser("upload the benchmark results to OSS benchmark database") + parser.add_argument( + "--benchmark-results-dir", + type=str, + required=True, + action=ValidateDir, + help="the directory with all the benchmark results in JSON format", + ) + parser.add_argument( + "--dry-run", + action="store_true", + ) + parser.add_argument( + "--dynamodb-table", + type=str, + default="torchci-oss-ci-benchmark", + help="the name of the DynamoDB table to upload to", + ) + + return parser.parse_args() + + +# DynamoDB use Decimal, not float +class DecimalEncoder(json.JSONEncoder): + def default(self, o: Any) -> Any: + if isinstance(o, Decimal): + return str(o) + return super().default(o) + + +# TODO (huydhn): This can be replaced by S3 path once we move to S3 +def generate_partition_key(doc: Dict[str, Any]) -> str: + """ + Generate an unique partition key for the document on DynamoDB + """ + repo = doc["repo"] + workflow_id = doc["workflow_id"] + job_id = doc["job_id"] + test_name = doc["test_name"] + filename = doc["filename"] + + hash_content = hashlib.md5( + json.dumps(doc, cls=DecimalEncoder).encode("utf-8") + ).hexdigest() + return f"{repo}/{workflow_id}/{job_id}/{test_name}/{filename}/{hash_content}" + + +def upload_to_dynamodb( + dynamodb_table: str, + docs: List[Any], + generate_partition_key: Optional[Callable[[Dict[str, Any]], str]], + dry_run: bool = True, +) -> None: + """ + Copied from upload stats script + """ + info(f"Writing {len(docs)} documents to DynamoDB {dynamodb_table}") + if not dry_run: + # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/dynamodb.html#batch-writing + with boto3.resource("dynamodb").Table(dynamodb_table).batch_writer() as batch: + for doc in docs: + doc["timestamp"] = int(round(time.time() * 1000)) + if generate_partition_key: + doc["dynamoKey"] = generate_partition_key(doc) + batch.put_item(Item=doc) + + +def main() -> None: + args = parse_args() + + for file in os.listdir(args.benchmark_results_dir): + if not file.endswith(".json"): + continue + + filepath = os.path.join(args.benchmark_results_dir, file) + info(f"Loading {filepath}") + + with open(filepath) as f: + upload_to_dynamodb( + dynamodb_table=args.dynamodb_table, + # NB: DynamoDB only accepts decimal number, not float + docs=json.load(f, parse_float=Decimal), + generate_partition_key=generate_partition_key, + dry_run=args.dry_run, + ) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/test_upload_benchmark_results.yml b/.github/workflows/test_upload_benchmark_results.yml new file mode 100644 index 0000000000..cc74de4af7 --- /dev/null +++ b/.github/workflows/test_upload_benchmark_results.yml @@ -0,0 +1,20 @@ +name: Test upload-benchmark-results + +on: + pull_request: + paths: + - .github/scripts/upload_benchmark_results.py + - .github/workflows/test_upload_benchmark_results.ym + - .github/actions/upload-benchmark-results/* + +jobs: + test: + runs-on: linux.2xlarge + steps: + - uses: actions/checkout@v3 + + - name: Test upload the benchmark results + uses: ./.github/actions/upload-benchmark-results + with: + benchmark-results-dir: .github/scripts/benchmark-results-dir-for-testing + dry-run: true