From b16cb63afd77457d7b46feb2c5deade3b4a5bf19 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 13 Mar 2025 12:13:18 -0700 Subject: [PATCH 1/4] type --- .github/scripts/extract_benchmark_results.py | 156 ++++++------------- 1 file changed, 51 insertions(+), 105 deletions(-) diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py index ba6142a4826..90896b25698 100755 --- a/.github/scripts/extract_benchmark_results.py +++ b/.github/scripts/extract_benchmark_results.py @@ -153,9 +153,10 @@ def extract_android_benchmark_results( # This is to handle the case where there is no benchmark results warning(f"Fail to load the benchmark results from {artifact_s3_url}") return [] + return [] -def initialize_ios_metadata(test_name: str) -> Dict[str, any]: +def initialize_ios_metadata(test_name: str) -> Dict[str, Any]: """ Extract the benchmark metadata from the test name, for example: test_forward_llama2_pte_iOS_17_2_1_iPhone15_4 @@ -364,14 +365,7 @@ def transform( app_type: str, benchmark_results: List, benchmark_config: Dict[str, str], - repo: str, - head_branch: str, - workflow_name: str, - workflow_run_id: int, - workflow_run_attempt: int, job_name: str, - job_id: int, - schema_version: str, ) -> List: """ Transform the benchmark results into the format writable into the benchmark database @@ -381,87 +375,51 @@ def transform( for r in benchmark_results: r["deviceInfo"]["device"] = job_name - if schema_version == "v2": - # TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3 - return [ - { - # GH-info to identify where the benchmark is run - "repo": repo, - "head_branch": head_branch, - "workflow_id": workflow_run_id, - "run_attempt": workflow_run_attempt, - "job_id": job_id, - # The model - "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(), - "dtype": ( - r["benchmarkModel"]["quantization"] - if r["benchmarkModel"]["quantization"] - else "unknown" - ), - # The metric value - "metric": r["metric"], - "actual": r["actualValue"], - "target": r["targetValue"], - # The device - "device": r["deviceInfo"]["device"], - "arch": r["deviceInfo"].get("os", ""), - # Not used here, just set it to something unique here - "filename": workflow_name, - "test_name": app_type, - "runner": job_name, - } - for r in benchmark_results - ] - elif schema_version == "v3": - v3_benchmark_results = [] - # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database - return [ - { - "benchmark": { - "name": "ExecuTorch", - "mode": "inference", - "extra_info": { - "app_type": app_type, - # Just keep a copy of the benchmark config here - "benchmark_config": json.dumps(benchmark_config), - }, - }, - "model": { - "name": benchmark_config.get("model", r["benchmarkModel"]["name"]), - "type": "OSS model", - "backend": benchmark_config.get( - "config", r["benchmarkModel"].get("backend", "") - ), + # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database + return [ + { + "benchmark": { + "name": "ExecuTorch", + "mode": "inference", + "extra_info": { + "app_type": app_type, + # Just keep a copy of the benchmark config here + "benchmark_config": json.dumps(benchmark_config), }, - "metric": { - "name": r["metric"], - "benchmark_values": [r["actualValue"]], - "target_value": r["targetValue"], - "extra_info": { - "method": r.get("method", ""), - }, + }, + "model": { + "name": benchmark_config.get("model", r["benchmarkModel"]["name"]), + "type": "OSS model", + "backend": benchmark_config.get( + "config", r["benchmarkModel"].get("backend", "") + ), + }, + "metric": { + "name": r["metric"], + "benchmark_values": [r["actualValue"]], + "target_value": r["targetValue"], + "extra_info": { + "method": r.get("method", ""), }, - "runners": [ - { - "name": r["deviceInfo"]["device"], - "type": r["deviceInfo"]["os"], - "avail_mem_in_gb": r["deviceInfo"].get("availMem", ""), - "total_mem_in_gb": r["deviceInfo"].get("totalMem", ""), - } - ], - } - for r in benchmark_results - ] + }, + "runners": [ + { + "name": r["deviceInfo"]["device"], + "type": r["deviceInfo"]["os"], + "avail_mem_in_gb": r["deviceInfo"].get("availMem", ""), + "total_mem_in_gb": r["deviceInfo"].get("totalMem", ""), + } + ], + } + for r in benchmark_results + ] def main() -> None: args = parse_args() # Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3 - all_benchmark_results = { - "v2": [], - "v3": [], - } + all_benchmark_results = [] benchmark_config = {} with open(args.artifacts) as f: @@ -482,7 +440,7 @@ def main() -> None: benchmark_config = read_benchmark_config( artifact_s3_url, args.benchmark_configs ) - + benchmark_results = [] if app_type == "ANDROID_APP": benchmark_results = extract_android_benchmark_results( job_name, artifact_type, artifact_s3_url @@ -494,32 +452,20 @@ def main() -> None: ) if benchmark_results: - for schema in all_benchmark_results.keys(): - results = transform( - app_type, - benchmark_results, - benchmark_config, - args.repo, - args.head_branch, - args.workflow_name, - args.workflow_run_id, - args.workflow_run_attempt, - job_name, - extract_job_id(args.artifacts), - schema, - ) - all_benchmark_results[schema].extend(results) - - for schema in all_benchmark_results.keys(): - if not all_benchmark_results.get(schema): - continue - - output_dir = os.path.join(args.output_dir, schema) - os.makedirs(output_dir, exist_ok=True) + results = transform( + app_type, + benchmark_results, + benchmark_config, + job_name + ) + all_benchmark_results.extend(results) + # add v3 in case we have higher version of schema + output_dir = os.path.join(args.output_dir, "v3") + os.makedirs(output_dir, exist_ok=True) output_file = os.path.basename(args.artifacts) with open(f"{output_dir}/{output_file}", "w") as f: - json.dump(all_benchmark_results[schema], f) + json.dump(all_benchmark_results, f) if __name__ == "__main__": From 2d6b862c88f6f2746ebc96424706dc4318fd9a2f Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 13 Mar 2025 12:13:49 -0700 Subject: [PATCH 2/4] type --- .github/scripts/extract_benchmark_results.py | 5 +---- .github/workflows/android-perf.yml | 16 +++------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py index 90896b25698..9baf1dc4354 100755 --- a/.github/scripts/extract_benchmark_results.py +++ b/.github/scripts/extract_benchmark_results.py @@ -453,10 +453,7 @@ def main() -> None: if benchmark_results: results = transform( - app_type, - benchmark_results, - benchmark_config, - job_name + app_type, benchmark_results, benchmark_config, job_name ) all_benchmark_results.extend(results) diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index f21ed849d03..62d63c07647 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -470,21 +470,11 @@ jobs: --benchmark-configs benchmark-configs done - for SCHEMA in v2 v3; do - for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do - cat "${BENCHMARK_RESULTS}" - echo - done + for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do + cat "${BENCHMARK_RESULTS}" + echo done - # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration - - name: Upload the benchmark results (v2) - uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main - with: - benchmark-results-dir: benchmark-results/v2 - dry-run: false - schema-version: v2 - - name: Upload the benchmark results (v3) uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: From 6d515d74774b2389067e39a09764c302930860a4 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 13 Mar 2025 12:16:18 -0700 Subject: [PATCH 3/4] type --- .github/workflows/apple-perf.yml | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 83778d36c1b..039a090df31 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -529,21 +529,11 @@ jobs: --benchmark-configs benchmark-configs done - for SCHEMA in v2 v3; do - for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do - cat "${BENCHMARK_RESULTS}" - echo - done + for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do + cat "${BENCHMARK_RESULTS}" + echo done - # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration - - name: Upload the benchmark results (v2) - uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main - with: - benchmark-results-dir: benchmark-results/v2 - dry-run: false - schema-version: v2 - - name: Upload the benchmark results (v3) uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: From 889c2466c7dcbf6478c2063c98b6060e3826d7c0 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 13 Mar 2025 12:18:50 -0700 Subject: [PATCH 4/4] remove ids --- .github/scripts/extract_benchmark_results.py | 30 -------------------- .github/workflows/android-perf.yml | 5 ---- .github/workflows/apple-perf.yml | 5 ---- 3 files changed, 40 deletions(-) diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py index 9baf1dc4354..77c73eab0b4 100755 --- a/.github/scripts/extract_benchmark_results.py +++ b/.github/scripts/extract_benchmark_results.py @@ -86,36 +86,6 @@ def parse_args() -> Any: action=ValidateDir, help="the directory to keep the benchmark results", ) - parser.add_argument( - "--repo", - type=str, - required=True, - help="which GitHub repo this workflow run belongs to", - ) - parser.add_argument( - "--head-branch", - type=str, - required=True, - help="the head branch that runs", - ) - parser.add_argument( - "--workflow-name", - type=str, - required=True, - help="the name of the benchmark workflow", - ) - parser.add_argument( - "--workflow-run-id", - type=int, - required=True, - help="the id of the benchmark workflow", - ) - parser.add_argument( - "--workflow-run-attempt", - type=int, - required=True, - help="which retry of the workflow this is", - ) parser.add_argument( "--benchmark-configs", type=str, diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 62d63c07647..fbd2cae24e0 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -462,11 +462,6 @@ jobs: ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ --artifacts "${ARTIFACTS_BY_JOB}" \ --output-dir benchmark-results \ - --repo ${{ github.repository }} \ - --head-branch ${{ github.head_ref || github.ref_name }} \ - --workflow-name "${{ github.workflow }}" \ - --workflow-run-id ${{ github.run_id }} \ - --workflow-run-attempt ${{ github.run_attempt }} \ --benchmark-configs benchmark-configs done diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 039a090df31..1cf7e67f007 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -521,11 +521,6 @@ jobs: ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ --artifacts "${ARTIFACTS_BY_JOB}" \ --output-dir benchmark-results \ - --repo ${{ github.repository }} \ - --head-branch ${{ github.head_ref || github.ref_name }} \ - --workflow-name "${{ github.workflow }}" \ - --workflow-run-id ${{ github.run_id }} \ - --workflow-run-attempt ${{ github.run_attempt }} \ --benchmark-configs benchmark-configs done