diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py index 113ff2a420c..bfa6c063125 100755 --- a/.github/scripts/extract_benchmark_results.py +++ b/.github/scripts/extract_benchmark_results.py @@ -310,6 +310,7 @@ def transform( workflow_run_attempt: int, job_name: str, job_id: int, + schema_version: str, ) -> List: """ Transform the benchmark results into the format writable into the benchmark database @@ -319,45 +320,91 @@ def transform( for r in benchmark_results: r["deviceInfo"]["device"] = job_name - # TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2, - # and I'm trying to fit ET benchmark results into it, which is kind of awkward. - # However, the schema is going to be updated soon - return [ - { - # GH-info to identify where the benchmark is run - "repo": repo, - "head_branch": head_branch, - "workflow_id": workflow_run_id, - "run_attempt": workflow_run_attempt, - "job_id": job_id, - # The model - "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(), - "dtype": ( - r["benchmarkModel"]["quantization"] - if r["benchmarkModel"]["quantization"] - else "unknown" - ), - # The metric value - "metric": r["metric"], - "actual": r["actualValue"], - "target": r["targetValue"], - # The device - "device": r["deviceInfo"]["device"], - "arch": r["deviceInfo"].get("os", ""), - # Not used here, just set it to something unique here - "filename": workflow_name, - "test_name": app_type, - "runner": job_name, - } - for r in benchmark_results - ] + if schema_version == "v2": + # TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3 + return [ + { + # GH-info to identify where the benchmark is run + "repo": repo, + "head_branch": head_branch, + "workflow_id": workflow_run_id, + "run_attempt": workflow_run_attempt, + "job_id": job_id, + # The model + "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(), + "dtype": ( + r["benchmarkModel"]["quantization"] + if r["benchmarkModel"]["quantization"] + else "unknown" + ), + # The metric value + "metric": r["metric"], + "actual": r["actualValue"], + "target": r["targetValue"], + # The device + "device": r["deviceInfo"]["device"], + "arch": r["deviceInfo"].get("os", ""), + # Not used here, just set it to something unique here + "filename": workflow_name, + "test_name": app_type, + "runner": job_name, + } + for r in benchmark_results + ] + elif schema_version == "v3": + quantization = ( + r["benchmarkModel"]["quantization"] + if r["benchmarkModel"]["quantization"] + else "unknown" + ) + # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database + return [ + { + "benchmark": { + "name": "ExecuTorch", + "mode": "inference", + "dtype": quantization, + "extra_info": { + "app_type": app_type, + }, + }, + "model": { + "name": r["benchmarkModel"]["name"], + "type": "OSS model", + "backend": r["benchmarkModel"].get("backend", ""), + "extra_info": { + "quantization": quantization, + }, + }, + "metric": { + "name": r["metric"], + "benchmark_values": [r["actualValue"]], + "target_value": r["targetValue"], + "extra_info": { + "method": r.get("method", ""), + }, + }, + "runners": [ + { + "name": r["deviceInfo"]["device"], + "type": r["deviceInfo"]["os"], + "avail_mem_in_gb": r["deviceInfo"].get("availMem", ""), + "total_mem_in_gb": r["deviceInfo"].get("totalMem", ""), + } + ], + } + for r in benchmark_results + ] def main() -> None: args = parse_args() - # Across all devices - all_benchmark_results = [] + # Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3 + all_benchmark_results = { + "v2": [], + "v3": [], + } with open(args.artifacts) as f: for artifact in json.load(f): @@ -384,23 +431,31 @@ def main() -> None: ) if benchmark_results: - benchmark_results = transform( - app_type, - benchmark_results, - args.repo, - args.head_branch, - args.workflow_name, - args.workflow_run_id, - args.workflow_run_attempt, - job_name, - extract_job_id(args.artifacts), - ) - all_benchmark_results.extend(benchmark_results) + for schema in all_benchmark_results.keys(): + results = transform( + app_type, + benchmark_results, + args.repo, + args.head_branch, + args.workflow_name, + args.workflow_run_id, + args.workflow_run_attempt, + job_name, + extract_job_id(args.artifacts), + schema, + ) + all_benchmark_results[schema].extend(results) + + for schema in all_benchmark_results.keys(): + if not all_benchmark_results.get(schema): + continue + + output_dir = os.path.join(args.output_dir, schema) + os.mkdir(output_dir) - if all_benchmark_results: output_file = os.path.basename(args.artifacts) - with open(f"{args.output_dir}/{output_file}", "w") as f: - json.dump(all_benchmark_results, f) + with open(f"{output_dir}/{output_file}", "w") as f: + json.dump(all_benchmark_results[schema], f) if __name__ == "__main__": diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 93ec4fe4e70..76e5f5a1b94 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -298,15 +298,25 @@ jobs: --workflow-run-attempt ${{ github.run_attempt }} done - ls -lah benchmark-results - - for BENCHMARK_RESULTS in benchmark-results/*.json; do - cat "${BENCHMARK_RESULTS}" - echo + for SCHEMA in v2 v3; do + for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do + cat "${BENCHMARK_RESULTS}" + echo + done done - - name: Upload the benchmark results + # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration + - name: Upload the benchmark results (v2) + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main + with: + benchmark-results-dir: benchmark-results/v2 + dry-run: false + schema-version: v2 + + - name: Upload the benchmark results (v3) uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: 'benchmark-results' + benchmark-results-dir: benchmark-results/v3 dry-run: false + schema-version: v3 + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 7de308b1a63..f14e40b942a 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -372,15 +372,25 @@ jobs: --workflow-run-attempt ${{ github.run_attempt }} done - ls -lah benchmark-results - - for BENCHMARK_RESULTS in benchmark-results/*.json; do - cat "${BENCHMARK_RESULTS}" - echo + for SCHEMA in v2 v3; do + for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do + cat "${BENCHMARK_RESULTS}" + echo + done done - - name: Upload the benchmark results + # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration + - name: Upload the benchmark results (v2) + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main + with: + benchmark-results-dir: benchmark-results/v2 + dry-run: false + schema-version: v2 + + - name: Upload the benchmark results (v3) uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: 'benchmark-results' + benchmark-results-dir: benchmark-results/v3 dry-run: false + schema-version: v3 + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index 7236fe317b0..8c2d60252a0 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -187,7 +187,7 @@ public BenchmarkMetric( // the .pte model itself instead of parsing its name public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) { final Matcher m = - Pattern.compile("(?\\w+)_(?\\w+)_(?\\w+)").matcher(model); + Pattern.compile("(?\\w+)_(?[\\w\\+]+)_(?\\w+)").matcher(model); if (m.matches()) { return new BenchmarkMetric.BenchmarkModel( m.group("name"), m.group("backend"), m.group("quantization")); diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java index 22ee7b84804..66ab50550a4 100644 --- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java +++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkMetric.java @@ -63,7 +63,7 @@ public BenchmarkMetric( // the .pte model itself instead of parsing its name public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) { final Matcher m = - Pattern.compile("(?\\w+)_(?\\w+)_(?\\w+)").matcher(model); + Pattern.compile("(?\\w+)_(?[\\w\\+]+)_(?\\w+)").matcher(model); if (m.matches()) { return new BenchmarkMetric.BenchmarkModel( m.group("name"), m.group("backend"), m.group("quantization"));