Pass benchmark config via test spec

huydhn · huydhn · commit bb0de7063333 · 2024-12-23T20:22:39.000-08:00
diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py
@@ -23,6 +23,9 @@
 
 BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
 ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
+BENCHMARK_CONFIG_REGEX = re.compile(
+    r"# The benchmark config is (?P<benchmark_config>.+)"
+)
 
 # iOS-related regexes and variables
 IOS_TEST_SPEC_REGEX = re.compile(
@@ -308,7 +311,7 @@ def extract_job_id(artifacts_filename: str) -> int:
     return int(m.group("job_id"))
 
 
-def read_benchmark_configs(benchmark_configs: str) -> Dict[str, Dict[str, str]]:
+def read_all_benchmark_configs() -> Dict[str, Dict[str, str]]:
     """
     Read all the benchmark configs that we can find
     """
@@ -325,16 +328,44 @@ def read_benchmark_configs(benchmark_configs: str) -> Dict[str, Dict[str, str]]:
     return benchmark_configs
 
 
-def get_benchmark_configs(benchmark_configs: Dict[str, Dict[str, str]]) -> str:
+def read_benchmark_config(
+    artifact_s3_url: str, benchmark_configs_dir: str
+) -> Dict[str, str]:
     """
     Get the correct benchmark config for this benchmark run
     """
+    try:
+        with request.urlopen(artifact_s3_url) as data:
+            for line in data.read().decode("utf8").splitlines():
+                m = IOS_TEST_SPEC_REGEX.match(line)
+                if not m:
+                    continue
+
+                benchmark_config = m.group("benchmark_config")
+                filename = os.path.join(
+                    benchmark_configs_dir, f"{benchmark_config}.json"
+                )
+
+                if not os.path.exists(filename):
+                    warning(f"There is no benchmark config {filename}")
+                    continue
+
+                with open(filename) as f:
+                    try:
+                        return json.load(f)
+                    except json.JSONDecodeError as e:
+                        warning(f"Fail to load benchmark config {filename}: {e}")
+
+    except error.HTTPError:
+        warning(f"Fail to read the test spec output at {artifact_s3_url}")
+
+    return {}
 
 
 def transform(
     app_type: str,
     benchmark_results: List,
-    benchmark_configs: Dict[str, Dict[str, str]],
+    benchmark_config: Dict[str, str],
     repo: str,
     head_branch: str,
     workflow_name: str,
@@ -384,25 +415,25 @@ def transform(
             for r in benchmark_results
         ]
     elif schema_version == "v3":
+        v3_benchmark_results = []
         # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
         return [
             {
                 "benchmark": {
                     "name": "ExecuTorch",
                     "mode": "inference",
-                    "dtype": quantization,
                     "extra_info": {
                         "app_type": app_type,
-                        "benchmark_configs": 
+                        # Just keep a copy of the benchmark config here
+                        "benchmark_config": json.dumps(benchmark_config),
                     },
                 },
                 "model": {
-                    "name": r["benchmarkModel"]["name"],
+                    "name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
                     "type": "OSS model",
-                    "backend": r["benchmarkModel"].get("backend", ""),
-                    "extra_info": {
-                        "quantization": quantization,
-                    },
+                    "backend": benchmark_config.get(
+                        "config", r["benchmarkModel"].get("backend", "")
+                    ),
                 },
                 "metric": {
                     "name": r["metric"],
@@ -433,7 +464,7 @@ def main() -> None:
         "v2": [],
         "v3": [],
     }
-    benchmark_configs = read_benchmark_configs(args.benchmark_configs)
+    benchmark_config = {}
 
     with open(args.artifacts) as f:
         for artifact in json.load(f):
@@ -449,6 +480,11 @@ def main() -> None:
             artifact_type = artifact["type"]
             artifact_s3_url = artifact["s3_url"]
 
+            if artifact_type == "TESTSPEC_OUTPUT":
+                benchmark_config = read_benchmark_config(
+                    artifact_s3_url, args.benchmark_configs
+                )
+
             if app_type == "ANDROID_APP":
                 benchmark_results = extract_android_benchmark_results(
                     job_name, artifact_type, artifact_s3_url
@@ -464,7 +500,7 @@ def main() -> None:
                     results = transform(
                         app_type,
                         benchmark_results,
-                        benchmark_configs,
+                        benchmark_config,
                         args.repo,
                         args.head_branch,
                         args.workflow_name,
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -100,7 +100,7 @@ jobs:
       - name: Prepare the spec
         shell: bash
         env:
-          BENCHMARK_CONFIGS: ${{ toJSON(matrix) }}
+          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
         working-directory: extension/benchmark/android/benchmark
         run: |
           set -eux
@@ -110,13 +110,18 @@ jobs:
           # We could write a script to properly use jinja here, but there is only one variable,
           # so let's just sed it
           sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
-          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
 
+          BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
+          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
+          # later by the upload script
+          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2
+
+          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
           # Just print the test spec for debugging
           cat android-llm-device-farm-test-spec.yml
 
-          # Also dump the benchmark configs so that we can use it later in the dashboard
-          echo "${BENCHMARK_CONFIGS}" > ${{ matrix.model }}_${{ matrix.config }}.json
+          # Save the benchmark configs so that we can use it later in the dashboard
+          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
 
       - name: Upload the spec
         uses: seemethere/upload-artifact-s3@v5
@@ -414,14 +419,14 @@ jobs:
 
       - name: Download the list of benchmark configs from S3
         env:
-          BENCHMARK_CONFIGS_S3_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+          BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
         shell: bash
         run: |
           set -eux
 
           mkdir -p benchmark-configs
           pushd benchmark-configs
-          ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_S3_DIR}" .
+          ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" .
           popd
 
           ls -lah benchmark-configs
diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
@@ -8,6 +8,8 @@ phases:
 
   pre_test:
     commands:
+      # The benchmark config is {{ benchmark_config_id }}
+
       # Download the model from S3
       - curl -s --fail '{{ model_path }}' -o model.zip
       - unzip model.zip && ls -la