pytorch
diff --git a/‎.github/scripts/extract_benchmark_results.py‎
Lines changed: 76 additions & 13 deletions b/‎.github/scripts/extract_benchmark_results.py‎
Lines changed: 76 additions & 13 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 37 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 37 additions & 2 deletions
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 35 additions & 2 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 35 additions & 2 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 2 additions & 2 deletions b/‎CONTRIBUTING.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/coreml/runtime/test/export_stateful_model.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/runtime/test/export_stateful_model.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 3 additions & 4 deletions b/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎backends/apple/mps/test/test_mps_utils.py‎
Lines changed: 1 addition & 4 deletions b/‎backends/apple/mps/test/test_mps_utils.py‎
Lines changed: 1 addition & 4 deletions
@@ -5,6 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import glob
 import json
 import logging
 import os
@@ -22,6 +23,7 @@
 
 BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
 ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
+BENCHMARK_CONFIG_REGEX = re.compile(r"The benchmark config is (?P<benchmark_config>.+)")
 
 # iOS-related regexes and variables
 IOS_TEST_SPEC_REGEX = re.compile(
@@ -51,7 +53,7 @@ def __call__(
         parser.error(f"{values} is not a valid JSON file (*.json)")
 
 
-class ValidateOutputDir(Action):
+class ValidateDir(Action):
     def __call__(
         self,
         parser: ArgumentParser,
@@ -81,7 +83,7 @@ def parse_args() -> Any:
         "--output-dir",
         type=str,
         required=True,
-        action=ValidateOutputDir,
+        action=ValidateDir,
         help="the directory to keep the benchmark results",
     )
     parser.add_argument(
@@ -114,6 +116,13 @@ def parse_args() -> Any:
         required=True,
         help="which retry of the workflow this is",
     )
+    parser.add_argument(
+        "--benchmark-configs",
+        type=str,
+        required=True,
+        action=ValidateDir,
+        help="the directory to keep the benchmark configs",
+    )
 
     return parser.parse_args()
 
@@ -300,9 +309,60 @@ def extract_job_id(artifacts_filename: str) -> int:
     return int(m.group("job_id"))
 
 
+def read_all_benchmark_configs() -> Dict[str, Dict[str, str]]:
+    """
+    Read all the benchmark configs that we can find
+    """
+    benchmark_configs = {}
+
+    for file in glob.glob(f"{benchmark_configs}/*.json"):
+        filename = os.path.basename(file)
+        with open(file) as f:
+            try:
+                benchmark_configs[filename] = json.load(f)
+            except json.JSONDecodeError as e:
+                warning(f"Fail to load benchmark config {file}: {e}")
+
+    return benchmark_configs
+
+
+def read_benchmark_config(
+    artifact_s3_url: str, benchmark_configs_dir: str
+) -> Dict[str, str]:
+    """
+    Get the correct benchmark config for this benchmark run
+    """
+    try:
+        with request.urlopen(artifact_s3_url) as data:
+            for line in data.read().decode("utf8").splitlines():
+                m = BENCHMARK_CONFIG_REGEX.match(line)
+                if not m:
+                    continue
+
+                benchmark_config = m.group("benchmark_config")
+                filename = os.path.join(
+                    benchmark_configs_dir, f"{benchmark_config}.json"
+                )
+
+                if not os.path.exists(filename):
+                    warning(f"There is no benchmark config {filename}")
+                    continue
+
+                with open(filename) as f:
+                    try:
+                        return json.load(f)
+                    except json.JSONDecodeError as e:
+                        warning(f"Fail to load benchmark config {filename}: {e}")
+    except error.HTTPError:
+        warning(f"Fail to read the test spec output at {artifact_s3_url}")
+
+    return {}
+
+
 def transform(
     app_type: str,
     benchmark_results: List,
+    benchmark_config: Dict[str, str],
     repo: str,
     head_branch: str,
     workflow_name: str,
@@ -352,29 +412,25 @@ def transform(
             for r in benchmark_results
         ]
     elif schema_version == "v3":
-        quantization = (
-            r["benchmarkModel"]["quantization"]
-            if r["benchmarkModel"]["quantization"]
-            else "unknown"
-        )
+        v3_benchmark_results = []
         # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
         return [
             {
                 "benchmark": {
                     "name": "ExecuTorch",
                     "mode": "inference",
-                    "dtype": quantization,
                     "extra_info": {
                         "app_type": app_type,
+                        # Just keep a copy of the benchmark config here
+                        "benchmark_config": json.dumps(benchmark_config),
                     },
                 },
                 "model": {
-                    "name": r["benchmarkModel"]["name"],
+                    "name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
                     "type": "OSS model",
-                    "backend": r["benchmarkModel"].get("backend", ""),
-                    "extra_info": {
-                        "quantization": quantization,
-                    },
+                    "backend": benchmark_config.get(
+                        "config", r["benchmarkModel"].get("backend", "")
+                    ),
                 },
                 "metric": {
                     "name": r["metric"],
@@ -405,6 +461,7 @@ def main() -> None:
         "v2": [],
         "v3": [],
     }
+    benchmark_config = {}
 
     with open(args.artifacts) as f:
         for artifact in json.load(f):
@@ -420,6 +477,11 @@ def main() -> None:
             artifact_type = artifact["type"]
             artifact_s3_url = artifact["s3_url"]
 
+            if artifact_type == "TESTSPEC_OUTPUT":
+                benchmark_config = read_benchmark_config(
+                    artifact_s3_url, args.benchmark_configs
+                )
+
             if app_type == "ANDROID_APP":
                 benchmark_results = extract_android_benchmark_results(
                     job_name, artifact_type, artifact_s3_url
@@ -435,6 +497,7 @@ def main() -> None:
                     results = transform(
                         app_type,
                         benchmark_results,
+                        benchmark_config,
                         args.repo,
                         args.head_branch,
                         args.workflow_name,
 
@@ -99,6 +99,8 @@ jobs:
 
       - name: Prepare the spec
         shell: bash
+        env:
+          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
         working-directory: extension/benchmark/android/benchmark
         run: |
           set -eux
@@ -108,11 +110,19 @@ jobs:
           # We could write a script to properly use jinja here, but there is only one variable,
           # so let's just sed it
           sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
-          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
 
+          BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
+          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
+          # later by the upload script
+          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2
+
+          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
           # Just print the test spec for debugging
           cat android-llm-device-farm-test-spec.yml
 
+          # Save the benchmark configs so that we can use it later in the dashboard
+          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
+
       - name: Upload the spec
         uses: seemethere/upload-artifact-s3@v5
         with:
@@ -123,6 +133,16 @@ jobs:
           if-no-files-found: error
           path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
 
+      - name: Update the benchmark configs
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/android/benchmark/${{ matrix.model }}_${{ matrix.config }}.json
+
   export-models:
     name: export-models
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -397,6 +417,20 @@ jobs:
 
           ls -lah artifacts
 
+      - name: Download the list of benchmark configs from S3
+        env:
+          BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+        shell: bash
+        run: |
+          set -eux
+
+          mkdir -p benchmark-configs
+          pushd benchmark-configs
+          ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" .
+          popd
+
+          ls -lah benchmark-configs
+
       - name: Extract the benchmark results JSON
         shell: bash
         run: |
@@ -414,7 +448,8 @@ jobs:
               --head-branch ${{ github.head_ref || github.ref_name }} \
               --workflow-name "${{ github.workflow }}" \
               --workflow-run-id ${{ github.run_id }} \
-              --workflow-run-attempt ${{ github.run_attempt }}
+              --workflow-run-attempt ${{ github.run_attempt }} \
+              --benchmark-configs benchmark-configs
           done
 
           for SCHEMA in v2 v3; do
 
@@ -101,20 +101,30 @@ jobs:
 
       - name: Prepare the spec
         shell: bash
+        env:
+          BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
         working-directory: extension/benchmark/apple/Benchmark
         run: |
           set -eux
 
-          echo "DEBUG: ${{ matrix.model }}"
           # The model will be exported in the next step to this S3 path
           MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
           # We could write a script to properly use jinja here, but there is only one variable,
           # so let's just sed it
           sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
+
+          BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
+          # The config for this benchmark runs, we save it in the test spec so that it can be fetched
+          # later by the upload script
+          sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2
+
           cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
           # Just print the test spec for debugging
           cat default-ios-device-farm-appium-test-spec.yml
 
+          # Save the benchmark configs so that we can use it later in the dashboard
+          echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
+
       - name: Upload the spec
         uses: seemethere/upload-artifact-s3@v5
         with:
@@ -125,6 +135,16 @@ jobs:
           if-no-files-found: error
           path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
 
+      - name: Update the benchmark configs
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/apple/Benchmark/${{ matrix.model }}_${{ matrix.config }}.json
+
   export-models:
     name: export-models
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -481,6 +501,18 @@ jobs:
 
           ls -lah artifacts
 
+      - name: Download the list of benchmark configs from S3
+        env:
+          BENCHMARK_CONFIGS_DIR: s3://gha-artifacts/${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
+        shell: bash
+        run: |
+          set -eux
+          mkdir -p benchmark-configs
+          pushd benchmark-configs
+          ${CONDA_RUN} aws s3 sync "${BENCHMARK_CONFIGS_DIR}" .
+          popd
+          ls -lah benchmark-configs
+
       - name: Extract the benchmark results JSON
         shell: bash
         run: |
@@ -498,7 +530,8 @@ jobs:
               --head-branch ${{ github.head_ref || github.ref_name }} \
               --workflow-name "${{ github.workflow }}" \
               --workflow-run-id ${{ github.run_id }} \
-              --workflow-run-attempt ${{ github.run_attempt }}
+              --workflow-run-attempt ${{ github.run_attempt }} \
+              --benchmark-configs benchmark-configs
           done
 
           for SCHEMA in v2 v3; do
 
@@ -80,8 +80,8 @@ We use [`lintrunner`](https://pypi.org/project/lintrunner/) to help make sure th
 code follows our standards. Set it up with:
 
 ```
-pip install lintrunner==0.11.0
-pip install lintrunner-adapters==0.11.0
+pip install lintrunner==0.12.7
+pip install lintrunner-adapters==0.12.4
 lintrunner init
 ```
 
 
@@ -47,7 +47,7 @@ def main() -> None:
         torch.randn((1, embedding_dim)),
         torch.tensor([0]),
     )
-    exported_model = export(model, example_inputs)
+    exported_model = export(model, example_inputs, strict=True)
     edge_program_manager = exir.to_edge(exported_model)
     compile_specs = CoreMLBackend.generate_compile_specs(
         compute_precision=ct.precision.FLOAT16,
 
@@ -16,7 +16,6 @@
 
 
 class TestCoreMLPartitioner(unittest.TestCase):
-
     # TODO(T182928844): Delegate dim order op to backend.
     edge_compile_config = executorch.exir.EdgeCompileConfig(_skip_dim_order=True)
 
@@ -34,7 +33,7 @@ def forward(self, a, x, b):
         model.eval()
 
         example_inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
 
         edge_program_manager = executorch.exir.to_edge(
             exir_program_aten, compile_config=self.edge_compile_config
@@ -61,7 +60,7 @@ def test_vit_skip_conv(self):
         model.eval()
 
         example_inputs = (torch.randn(1, 3, 224, 224),)
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
         edge_program_manager = executorch.exir.to_edge(
             exir_program_aten, compile_config=self.edge_compile_config
         )
@@ -106,7 +105,7 @@ def forward(self, q, k_val, input_pos):
         k_val = torch.randn((1, embedding_dim))
         input_pos = torch.tensor([0])
         example_inputs = (q, k_val, input_pos)
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
 
         compile_specs = CoreMLBackend.generate_compile_specs(
             minimum_deployment_target=ct.target.iOS18
 
@@ -247,10 +247,7 @@ def lower_module_and_test_output(
             )
 
             executorch_program = to_edge(
-                export(
-                    delegated_program,
-                    sample_inputs,
-                ),
+                export(delegated_program, sample_inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(
                     _check_ir_validity=False,
                     _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ def main() -> None:`
`47`	`47`	`torch.randn((1, embedding_dim)),`
`48`	`48`	`torch.tensor([0]),`
`49`	`49`	`)`
`50`		`- exported_model = export(model, example_inputs)`
	`50`	`+ exported_model = export(model, example_inputs, strict=True)`
`51`	`51`	`edge_program_manager = exir.to_edge(exported_model)`
`52`	`52`	`compile_specs = CoreMLBackend.generate_compile_specs(`
`53`	`53`	`compute_precision=ct.precision.FLOAT16,`