pytorch
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 7 additions & 7 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎backends/qualcomm/tests/tester.py‎
Lines changed: 4 additions & 1 deletion b/‎backends/qualcomm/tests/tester.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/test/harness/error_statistics.py‎
Lines changed: 99 additions & 0 deletions b/‎backends/test/harness/error_statistics.py‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎backends/test/harness/stages/to_edge_transform_and_lower.py‎
Lines changed: 5 additions & 1 deletion b/‎backends/test/harness/stages/to_edge_transform_and_lower.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/test/harness/tester.py‎
Lines changed: 31 additions & 13 deletions b/‎backends/test/harness/tester.py‎
Lines changed: 31 additions & 13 deletions
diff --git a/‎backends/test/harness/tests/test_error_statistics.py‎
Lines changed: 65 additions & 0 deletions b/‎backends/test/harness/tests/test_error_statistics.py‎
Lines changed: 65 additions & 0 deletions
@@ -801,6 +801,8 @@ jobs:
       id-token: write
       contents: read
     strategy:
+      matrix:
+        enable-etdump: ['', '--enable-etdump']
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -820,7 +822,7 @@ jobs:
         source .ci/scripts/setup-emscripten.sh
 
         # Test selective build
-        bash scripts/build_wasm_tests.sh
+        bash scripts/build_wasm_tests.sh ${{ matrix.enable-etdump }}
 
         # Install Jest
         cd cmake-out-wasm/extension/wasm/test
@@ -892,12 +894,10 @@ jobs:
         PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh --build
 
         # Test models serially
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh mv2
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh mv3
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh edsr
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh resnet18
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh resnet50
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh dl3
+        models="mv2 mv3 edsr resnet18 resnet50 dl3"
+        for model in $models; do
+          python -m examples.vulkan.export --model_name=$model --test
+        done
 
 
 
 
@@ -52,7 +52,9 @@ def __init__(
             default_partitioner_cls=QnnPartitioner,
         )
 
-    def run(self, artifact: ExportedProgram, inputs=None) -> None:
+    def run(
+        self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False
+    ) -> None:
         ep = QnnPassManager().transform_for_export_pipeline(artifact)
         transform_passes = QnnPassManager().get_to_edge_transform_passes(ep)
 
@@ -61,6 +63,7 @@ def run(self, artifact: ExportedProgram, inputs=None) -> None:
             transform_passes=transform_passes,
             partitioner=self.partitioners,
             compile_config=self.edge_compile_conf,
+            generate_etrecord=generate_etrecord,
         )
 
 
 
@@ -0,0 +1,99 @@
+from dataclasses import dataclass
+
+import torch
+from torch.ao.ns.fx.utils import compute_sqnr
+
+
+@dataclass
+class TensorStatistics:
+    """Contains summary statistics for a tensor."""
+
+    shape: torch.Size
+    """ The shape of the tensor. """
+
+    numel: int
+    """ The number of elements in the tensor. """
+
+    median: float
+    """ The median of the tensor. """
+
+    mean: float
+    """ The mean of the tensor. """
+
+    max: torch.types.Number
+    """ The maximum element of the tensor. """
+
+    min: torch.types.Number
+    """ The minimum element of the tensor. """
+
+    @classmethod
+    def from_tensor(cls, tensor: torch.Tensor) -> "TensorStatistics":
+        """Creates a TensorStatistics object from a tensor."""
+        flattened = torch.flatten(tensor)
+        return cls(
+            shape=tensor.shape,
+            numel=tensor.numel(),
+            median=torch.quantile(flattened, q=0.5).item(),
+            mean=flattened.mean().item(),
+            max=flattened.max().item(),
+            min=flattened.min().item(),
+        )
+
+
+@dataclass
+class ErrorStatistics:
+    """Contains statistics derived from the difference of two tensors."""
+
+    reference_stats: TensorStatistics
+    """ Statistics for the reference tensor. """
+
+    actual_stats: TensorStatistics
+    """ Statistics for the actual tensor. """
+
+    error_l2_norm: float | None
+    """ The L2 norm of the error between the actual and reference tensor. """
+
+    error_mae: float | None
+    """ The mean absolute error between the actual and reference tensor. """
+
+    error_max: float | None
+    """ The maximum absolute elementwise error between the actual and reference tensor. """
+
+    error_msd: float | None
+    """ The mean signed deviation between the actual and reference tensor. """
+
+    sqnr: float | None
+    """ The signal-to-quantization-noise ratio between the actual and reference tensor. """
+
+    @classmethod
+    def from_tensors(
+        cls, actual: torch.Tensor, reference: torch.Tensor
+    ) -> "ErrorStatistics":
+        """Creates an ErrorStatistics object from two tensors."""
+        actual = actual.to(torch.float64)
+        reference = reference.to(torch.float64)
+
+        if actual.shape != reference.shape:
+            return cls(
+                reference_stats=TensorStatistics.from_tensor(reference),
+                actual_stats=TensorStatistics.from_tensor(actual),
+                error_l2_norm=None,
+                error_mae=None,
+                error_max=None,
+                error_msd=None,
+                sqnr=None,
+            )
+
+        error = actual - reference
+        flat_error = torch.flatten(error)
+
+        return cls(
+            reference_stats=TensorStatistics.from_tensor(reference),
+            actual_stats=TensorStatistics.from_tensor(actual),
+            error_l2_norm=torch.linalg.norm(flat_error).item(),
+            error_mae=torch.mean(torch.abs(flat_error)).item(),
+            error_max=torch.max(torch.abs(flat_error)).item(),
+            error_msd=torch.mean(flat_error).item(),
+            # Torch sqnr implementation requires float32 due to decorator logic
+            sqnr=compute_sqnr(actual.to(torch.float), reference.to(torch.float)).item(),
+        )
@@ -7,6 +7,7 @@
     to_edge_transform_and_lower,
 )
 from executorch.exir.backend.partitioner import Partitioner
+
 from torch.export import ExportedProgram
 
 
@@ -24,11 +25,14 @@ def __init__(
     def stage_type(self) -> StageType:
         return StageType.TO_EDGE_TRANSFORM_AND_LOWER
 
-    def run(self, artifact: ExportedProgram, inputs=None) -> None:
+    def run(
+        self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False
+    ) -> None:
         self.edge_dialect_program = to_edge_transform_and_lower(
             artifact,
             compile_config=self.edge_compile_conf,
             partitioner=self.partitioners,
+            generate_etrecord=generate_etrecord,
         )
 
     @property
 
@@ -4,6 +4,7 @@
 
 import torch
 
+from executorch.backends.test.harness.error_statistics import ErrorStatistics
 from executorch.backends.test.harness.stages import (
     Export,
     Partition,
@@ -182,10 +183,10 @@ def _post(self, stage):
         assert stage_type in self.stages
         self.stages[stage_type] = stage
 
-    def _run_stage(self, stage_instance, inputs=None):
+    def _run_stage(self, stage_instance, inputs=None, *args, **kwargs):
         assert isinstance(stage_instance, Stage)
         prev_stage_artifact = self._pre(stage_instance)
-        stage_instance.run(prev_stage_artifact, inputs=inputs)
+        stage_instance.run(prev_stage_artifact, inputs=inputs, *args, **kwargs)  # noqa
         self._post(stage_instance)
         return self
 
@@ -212,11 +213,14 @@ def to_edge(self, to_edge_stage: Optional[ToEdge] = None):
         return res
 
     def to_edge_transform_and_lower(
-        self, to_edge_and_transform_stage: Optional[ToEdgeTransformAndLower] = None
+        self,
+        to_edge_and_transform_stage: Optional[ToEdgeTransformAndLower] = None,
+        generate_etrecord: bool = False,
     ):
         return self._run_stage(
             to_edge_and_transform_stage
-            or self._get_default_stage(StageType.TO_EDGE_TRANSFORM_AND_LOWER)
+            or self._get_default_stage(StageType.TO_EDGE_TRANSFORM_AND_LOWER),
+            generate_etrecord=generate_etrecord,
         )
 
     def run_passes(self, run_passes_stage: Optional[RunPasses] = None):
@@ -302,20 +306,15 @@ def run_method_and_compare_outputs(
         atol=1e-03,
         rtol=1e-03,
         qtol=0,
+        statistics_callback: Callable[[ErrorStatistics], None] | None = None,
     ):
         number_of_runs = 1 if inputs is not None else num_runs
         reference_stage = self.stages[StageType.EXPORT]
 
         stage = stage or self.cur
 
-        print(f"Comparing Stage {stage} with Stage {reference_stage}")
-        for run_iteration in range(number_of_runs):
+        for _ in range(number_of_runs):
             inputs_to_run = inputs if inputs else next(self.generate_random_inputs())
-            input_shapes = [
-                generated_input.shape if hasattr(generated_input, "shape") else None
-                for generated_input in inputs_to_run
-            ]
-            print(f"Run {run_iteration} with input shapes: {input_shapes}")
 
             # Reference output (and quantization scale)
             (
@@ -328,13 +327,25 @@ def run_method_and_compare_outputs(
             # Output from running artifact at stage
             stage_output = self.stages[stage].run_artifact(inputs_to_run)
             self._compare_outputs(
-                reference_output, stage_output, quantization_scale, atol, rtol, qtol
+                reference_output,
+                stage_output,
+                quantization_scale,
+                atol,
+                rtol,
+                qtol,
+                statistics_callback,
             )
 
         return self
 
     @staticmethod
-    def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03):
+    def _assert_outputs_equal(
+        model_output,
+        ref_output,
+        atol=1e-03,
+        rtol=1e-03,
+        statistics_callback: Callable[[ErrorStatistics], None] | None = None,
+    ):
         """
         Helper testing function that asserts that the model output and the reference output
         are equal with some tolerance. Due to numerical differences between eager mode and
@@ -349,6 +360,11 @@ def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03):
         for i in range(len(model_output)):
             model = model_output[i]
             ref = ref_output[i]
+
+            error_stats = ErrorStatistics.from_tensors(model, ref)
+            if statistics_callback is not None:
+                statistics_callback(error_stats)
+
             assert (
                 ref.shape == model.shape
             ), f"Output {i} shape {model.shape} does not match reference output shape {ref.shape}"
@@ -386,6 +402,7 @@ def _compare_outputs(
         atol=1e-03,
         rtol=1e-03,
         qtol=0,
+        statistics_callback: Callable[[ErrorStatistics], None] | None = None,
     ):
         """
         Compares the original of the original nn module with the output of the generated artifact.
@@ -408,6 +425,7 @@ def _compare_outputs(
             reference_output,
             atol=atol,
             rtol=rtol,
+            statistics_callback=statistics_callback,
         )
 
     @staticmethod
 
@@ -0,0 +1,65 @@
+import unittest
+
+import torch
+from executorch.backends.test.harness.error_statistics import ErrorStatistics
+
+
+class ErrorStatisticsTests(unittest.TestCase):
+    def test_error_stats_simple(self):
+        tensor1 = torch.tensor([1, 2, 3, 4])
+        tensor2 = torch.tensor([2, 2, 2, 5])
+
+        error_stats = ErrorStatistics.from_tensors(tensor1, tensor2)
+
+        # Check actual tensor statistics
+        self.assertEqual(error_stats.actual_stats.shape, torch.Size([4]))
+        self.assertEqual(error_stats.actual_stats.numel, 4)
+        self.assertEqual(error_stats.actual_stats.median, 2.5)
+        self.assertEqual(error_stats.actual_stats.mean, 2.5)
+        self.assertEqual(error_stats.actual_stats.max, 4)
+        self.assertEqual(error_stats.actual_stats.min, 1)
+
+        # Check reference tensor statistics
+        self.assertEqual(error_stats.reference_stats.shape, torch.Size([4]))
+        self.assertEqual(error_stats.reference_stats.numel, 4)
+        self.assertEqual(error_stats.reference_stats.median, 2.0)
+        self.assertEqual(error_stats.reference_stats.mean, 2.75)
+        self.assertEqual(error_stats.reference_stats.max, 5)
+        self.assertEqual(error_stats.reference_stats.min, 2)
+
+        # Check error statistics
+        self.assertAlmostEqual(error_stats.error_l2_norm, 1.732, places=3)
+        self.assertEqual(error_stats.error_mae, 0.75)
+        self.assertEqual(error_stats.error_max, 1.0)
+        self.assertEqual(error_stats.error_msd, -0.25)
+        self.assertAlmostEqual(error_stats.sqnr, 10.0, places=3)
+
+    def test_error_stats_different_shapes(self):
+        # Create tensors with different shapes
+        tensor1 = torch.tensor([1, 2, 3, 4])
+        tensor2 = torch.tensor([[2, 3], [4, 5]])
+
+        error_stats = ErrorStatistics.from_tensors(tensor1, tensor2)
+
+        # Check actual tensor statistics
+        self.assertEqual(error_stats.actual_stats.shape, torch.Size([4]))
+        self.assertEqual(error_stats.actual_stats.numel, 4)
+        self.assertEqual(error_stats.actual_stats.median, 2.5)
+        self.assertEqual(error_stats.actual_stats.mean, 2.5)
+        self.assertEqual(error_stats.actual_stats.max, 4)
+        self.assertEqual(error_stats.actual_stats.min, 1)
+
+        # Check reference tensor statistics
+        self.assertEqual(error_stats.reference_stats.shape, torch.Size([2, 2]))
+        self.assertEqual(error_stats.reference_stats.numel, 4)
+        self.assertEqual(error_stats.reference_stats.median, 3.5)
+        self.assertEqual(error_stats.reference_stats.mean, 3.5)
+        self.assertEqual(error_stats.reference_stats.max, 5)
+        self.assertEqual(error_stats.reference_stats.min, 2)
+
+        # Check that all error values are None when shapes differ
+        self.assertIsNone(error_stats.error_l2_norm)
+        self.assertIsNone(error_stats.error_mae)
+        self.assertIsNone(error_stats.error_max)
+        self.assertIsNone(error_stats.error_msd)
+        self.assertIsNone(error_stats.sqnr)
Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,9 @@ def __init__(`
`52`	`52`	`default_partitioner_cls=QnnPartitioner,`
`53`	`53`	`)`
`54`	`54`
`55`		`- def run(self, artifact: ExportedProgram, inputs=None) -> None:`
	`55`	`+ def run(`
	`56`	`+ self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False`
	`57`	`+ ) -> None:`
`56`	`58`	`ep = QnnPassManager().transform_for_export_pipeline(artifact)`
`57`	`59`	`transform_passes = QnnPassManager().get_to_edge_transform_passes(ep)`
`58`	`60`
`@@ -61,6 +63,7 @@ def run(self, artifact: ExportedProgram, inputs=None) -> None:`
`61`	`63`	`transform_passes=transform_passes,`
`62`	`64`	`partitioner=self.partitioners,`
`63`	`65`	`compile_config=self.edge_compile_conf,`
	`66`	`+ generate_etrecord=generate_etrecord,`
`64`	`67`	`)`
`65`	`68`
`66`	`69`