diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py index 08cc5bd93b2..76f0e533389 100755 --- a/.github/scripts/extract_benchmark_results.py +++ b/.github/scripts/extract_benchmark_results.py @@ -5,6 +5,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import glob import json import logging import os @@ -22,6 +23,7 @@ BENCHMARK_RESULTS_FILENAME = "benchmark_results.json" ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P\d+).json") +BENCHMARK_CONFIG_REGEX = re.compile(r"The benchmark config is (?P.+)") # iOS-related regexes and variables IOS_TEST_SPEC_REGEX = re.compile( @@ -51,7 +53,7 @@ def __call__( parser.error(f"{values} is not a valid JSON file (*.json)") -class ValidateOutputDir(Action): +class ValidateDir(Action): def __call__( self, parser: ArgumentParser, @@ -81,7 +83,7 @@ def parse_args() -> Any: "--output-dir", type=str, required=True, - action=ValidateOutputDir, + action=ValidateDir, help="the directory to keep the benchmark results", ) parser.add_argument( @@ -114,6 +116,13 @@ def parse_args() -> Any: required=True, help="which retry of the workflow this is", ) + parser.add_argument( + "--benchmark-configs", + type=str, + required=True, + action=ValidateDir, + help="the directory to keep the benchmark configs", + ) return parser.parse_args() @@ -300,9 +309,60 @@ def extract_job_id(artifacts_filename: str) -> int: return int(m.group("job_id")) +def read_all_benchmark_configs() -> Dict[str, Dict[str, str]]: + """ + Read all the benchmark configs that we can find + """ + benchmark_configs = {} + + for file in glob.glob(f"{benchmark_configs}/*.json"): + filename = os.path.basename(file) + with open(file) as f: + try: + benchmark_configs[filename] = json.load(f) + except json.JSONDecodeError as e: + warning(f"Fail to load benchmark config {file}: {e}") + + return benchmark_configs + + +def read_benchmark_config( + artifact_s3_url: str, benchmark_configs_dir: str +) -> Dict[str, str]: + """ + Get the correct benchmark config for this benchmark run + """ + try: + with request.urlopen(artifact_s3_url) as data: + for line in data.read().decode("utf8").splitlines(): + m = BENCHMARK_CONFIG_REGEX.match(line) + if not m: + continue + + benchmark_config = m.group("benchmark_config") + filename = os.path.join( + benchmark_configs_dir, f"{benchmark_config}.json" + ) + + if not os.path.exists(filename): + warning(f"There is no benchmark config {filename}") + continue + + with open(filename) as f: + try: + return json.load(f) + except json.JSONDecodeError as e: + warning(f"Fail to load benchmark config {filename}: {e}") + except error.HTTPError: + warning(f"Fail to read the test spec output at {artifact_s3_url}") + + return {} + + def transform( app_type: str, benchmark_results: List, + benchmark_config: Dict[str, str], repo: str, head_branch: str, workflow_name: str, @@ -352,29 +412,25 @@ def transform( for r in benchmark_results ] elif schema_version == "v3": - quantization = ( - r["benchmarkModel"]["quantization"] - if r["benchmarkModel"]["quantization"] - else "unknown" - ) + v3_benchmark_results = [] # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database return [ { "benchmark": { "name": "ExecuTorch", "mode": "inference", - "dtype": quantization, "extra_info": { "app_type": app_type, + # Just keep a copy of the benchmark config here + "benchmark_config": json.dumps(benchmark_config), }, }, "model": { - "name": r["benchmarkModel"]["name"], + "name": benchmark_config.get("model", r["benchmarkModel"]["name"]), "type": "OSS model", - "backend": r["benchmarkModel"].get("backend", ""), - "extra_info": { - "quantization": quantization, - }, + "backend": benchmark_config.get( + "config", r["benchmarkModel"].get("backend", "") + ), }, "metric": { "name": r["metric"], @@ -405,6 +461,7 @@ def main() -> None: "v2": [], "v3": [], } + benchmark_config = {} with open(args.artifacts) as f: for artifact in json.load(f): @@ -420,6 +477,11 @@ def main() -> None: artifact_type = artifact["type"] artifact_s3_url = artifact["s3_url"] + if artifact_type == "TESTSPEC_OUTPUT": + benchmark_config = read_benchmark_config( + artifact_s3_url, args.benchmark_configs + ) + if app_type == "ANDROID_APP": benchmark_results = extract_android_benchmark_results( job_name, artifact_type, artifact_s3_url @@ -435,6 +497,7 @@ def main() -> None: results = transform( app_type, benchmark_results, + benchmark_config, args.repo, args.head_branch, args.workflow_name, diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 6cf583a5992..48625898ae9 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -99,6 +99,8 @@ jobs: - name: Prepare the spec shell: bash + env: + BENCHMARK_CONFIG: ${{ toJSON(matrix) }} working-directory: extension/benchmark/android/benchmark run: | set -eux @@ -108,11 +110,19 @@ jobs: # We could write a script to properly use jinja here, but there is only one variable, # so let's just sed it sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2 - cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml + BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}" + # The config for this benchmark runs, we save it in the test spec so that it can be fetched + # later by the upload script + sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2 + + cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml # Just print the test spec for debugging cat android-llm-device-farm-test-spec.yml + # Save the benchmark configs so that we can use it later in the dashboard + echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" + - name: Upload the spec uses: seemethere/upload-artifact-s3@v5 with: @@ -123,6 +133,16 @@ jobs: if-no-files-found: error path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml + - name: Update the benchmark configs + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/android/benchmark/${{ matrix.model }}_${{ matrix.config }}.json + export-models: name: export-models uses: pytorch/test-infra/.github/workflows/linux_job.yml@main @@ -397,6 +417,20 @@ jobs: ls -lah artifacts + - name: Download the list of benchmark configs from S3 + env: + BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ + shell: bash + run: | + set -eux + + mkdir -p benchmark-configs + pushd benchmark-configs + ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" . + popd + + ls -lah benchmark-configs + - name: Extract the benchmark results JSON shell: bash run: | @@ -414,7 +448,8 @@ jobs: --head-branch ${{ github.head_ref || github.ref_name }} \ --workflow-name "${{ github.workflow }}" \ --workflow-run-id ${{ github.run_id }} \ - --workflow-run-attempt ${{ github.run_attempt }} + --workflow-run-attempt ${{ github.run_attempt }} \ + --benchmark-configs benchmark-configs done for SCHEMA in v2 v3; do diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 4accf649118..b35daebaf5b 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -101,20 +101,30 @@ jobs: - name: Prepare the spec shell: bash + env: + BENCHMARK_CONFIG: ${{ toJSON(matrix) }} working-directory: extension/benchmark/apple/Benchmark run: | set -eux - echo "DEBUG: ${{ matrix.model }}" # The model will be exported in the next step to this S3 path MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" # We could write a script to properly use jinja here, but there is only one variable, # so let's just sed it sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2 + + BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}" + # The config for this benchmark runs, we save it in the test spec so that it can be fetched + # later by the upload script + sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2 + cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml # Just print the test spec for debugging cat default-ios-device-farm-appium-test-spec.yml + # Save the benchmark configs so that we can use it later in the dashboard + echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json" + - name: Upload the spec uses: seemethere/upload-artifact-s3@v5 with: @@ -125,6 +135,16 @@ jobs: if-no-files-found: error path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml + - name: Update the benchmark configs + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/apple/Benchmark/${{ matrix.model }}_${{ matrix.config }}.json + export-models: name: export-models uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -481,6 +501,18 @@ jobs: ls -lah artifacts + - name: Download the list of benchmark configs from S3 + env: + BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/ + shell: bash + run: | + set -eux + mkdir -p benchmark-configs + pushd benchmark-configs + ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" . + popd + ls -lah benchmark-configs + - name: Extract the benchmark results JSON shell: bash run: | @@ -498,7 +530,8 @@ jobs: --head-branch ${{ github.head_ref || github.ref_name }} \ --workflow-name "${{ github.workflow }}" \ --workflow-run-id ${{ github.run_id }} \ - --workflow-run-attempt ${{ github.run_attempt }} + --workflow-run-attempt ${{ github.run_attempt }} \ + --benchmark-configs benchmark-configs done for SCHEMA in v2 v3; do diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 index d3d6ea04f19..0d41274a311 100644 --- a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 +++ b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 @@ -8,6 +8,9 @@ phases: pre_test: commands: + # Print this so that the upload script can read and process the benchmark config + - echo "The benchmark config is {{ benchmark_config_id }}" + # Download the model from S3 - curl -s --fail '{{ model_path }}' -o model.zip - unzip model.zip && ls -la diff --git a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 index 68f8399f16b..05816685638 100644 --- a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 +++ b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 @@ -10,6 +10,9 @@ phases: # The pre-test phase includes commands that setup your test environment. pre_test: commands: + # Print this so that the upload script can read and process the benchmark config + - echo "The benchmark config is {{ benchmark_config_id }}" + # Download the model from S3 - curl -s --fail '{{ model_path }}' -o model.zip - unzip model.zip && ls -la