pytorch · GregoryComer · Aug 12, 2025 · Jul 18, 2025 · Jul 18, 2025 · Jul 19, 2025
diff --git a/backends/qualcomm/tests/TARGETS b/backends/qualcomm/tests/TARGETS
@@ -37,3 +37,13 @@ python_library(
         "//executorch/backends/qualcomm/debugger:utils",
     ],
 )
+
+python_library(
+    name = "tester",
+    srcs = [
+        "tester.py",
+    ],
+    deps = [
+        ":test_qnn_delegate"
+    ]
+)
diff --git a/backends/qualcomm/tests/tester.py b/backends/qualcomm/tests/tester.py
@@ -0,0 +1,87 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, List, Optional, Tuple
+
+import executorch
+import executorch.backends.test.harness.stages as BaseStages
+
+import torch
+from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
+from executorch.backends.qualcomm.partition.qnn_partitioner import QnnPartitioner
+from executorch.backends.qualcomm.utils.utils import (
+    generate_qnn_executorch_compiler_spec,
+    generate_htp_compiler_spec,
+    get_soc_to_chipset_map,
+)
+from executorch.backends.test.harness import Tester as TesterBase
+from executorch.backends.test.harness.stages import StageType
+from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
+from executorch.exir.backend.partitioner import Partitioner
+from torch.export import ExportedProgram
+
+
+class Partition(BaseStages.Partition):
+    def __init__(self, partitioner: Optional[Partitioner] = None):
+        super().__init__(
+            partitioner=partitioner or QnnPartitioner,
+        )
+
+
+class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
+    def __init__(
+        self,
+        partitioners: Optional[List[Partitioner]] = None,
+        edge_compile_config: Optional[EdgeCompileConfig] = None,
+        soc_model: str = "SM8650"
+    ):
+        backend_options = generate_htp_compiler_spec(use_fp16=True)
+        self.chipset = get_soc_to_chipset_map()[soc_model]
+        self.compiler_specs = generate_qnn_executorch_compiler_spec(
+            soc_model=self.chipset,
+            backend_options=backend_options,
+        )
+
+        super().__init__(
+            partitioners=partitioners or [QnnPartitioner(self.compiler_specs)],
+            edge_compile_config=edge_compile_config or EdgeCompileConfig(_check_ir_validity=False),
+            default_partitioner_cls=QnnPartitioner,
+        )
+
+    def run(self, artifact: ExportedProgram, inputs=None) -> None:
+        ep = QnnPassManager().transform_for_export_pipeline(artifact)
+        transform_passes = QnnPassManager().get_to_edge_transform_passes(ep)
+
+        self.edge_dialect_program = to_edge_transform_and_lower(
+            ep,
+            transform_passes=transform_passes,
+            partitioner=self.partitioners,
+            compile_config=self.edge_compile_conf,
+        )
+
+
+class QualcommTester(TesterBase):
+    def __init__(
+        self,
+        module: torch.nn.Module,
+        example_inputs: Tuple[torch.Tensor],
+        dynamic_shapes: Optional[Tuple[Any]] = None,
+    ): 
+        # Specialize for Qualcomm
+        stage_classes = (
+            executorch.backends.test.harness.Tester.default_stage_classes()
+            | {
+                StageType.PARTITION: Partition,
+                StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
+            }
+        )
+
+        super().__init__(
+            module=module,
+            stage_classes=stage_classes,
+            example_inputs=example_inputs,
+            dynamic_shapes=dynamic_shapes,
+        )
@@ -0,0 +1,91 @@
+from dataclasses import dataclass
+from torch.ao.ns.fx.utils import compute_sqnr
+
+import torch
+
+@dataclass
+class TensorStatistics:
+    """ Contains summary statistics for a tensor. """
+
+    shape: torch.Size
+    """ The shape of the tensor. """
+
+    numel: int
+    """ The number of elements in the tensor. """
+
+    median: float
+    """ The median of the tensor. """
+
+    mean: float
+    """ The mean of the tensor. """
+
+    max: torch.types.Number
+    """ The maximum element of the tensor. """
+
+    min: torch.types.Number
+    """ The minimum element of the tensor. """
+
+    @classmethod
+    def from_tensor(cls, tensor: torch.Tensor) -> "TensorStatistics":
+        """ Creates a TensorStatistics object from a tensor. """
+        flattened = torch.flatten(tensor)
+        return cls(
+            shape=tensor.shape,
+            numel=tensor.numel(),
+            median=flattened.median().item(),
+            mean=flattened.mean().item(),
+            max=flattened.max().item(),
+            min=flattened.min().item(),
+        )
+
+@dataclass
+class ErrorStatistics:
+    """ Contains statistics derived from the difference of two tensors. """
+
+    reference_stats: TensorStatistics   
+    """ Statistics for the reference tensor. """
+
+    actual_stats: TensorStatistics
+    """ Statistics for the actual tensor. """
+
+    error_l2_norm: float | None
+    """ The L2 norm of the error between the actual and reference tensor. """
+
+    error_mae: float | None
+    """ The mean absolute error between the actual and reference tensor. """
+
+    error_max: float | None
+    """ The maximum absolute elementwise error between the actual and reference tensor. """
+
+    error_msd: float | None
+    """ The mean signed deviation between the actual and reference tensor. """
+
+    sqnr: float | None
+    """ The signal-to-quantization-noise ratio between the actual and reference tensor. """
+
+    @classmethod
+    def from_tensors(cls, actual: torch.Tensor, reference: torch.Tensor) -> "ErrorStatistics":
+        """ Creates an ErrorStatistics object from two tensors. """
+        if actual.shape != reference.shape:
+            return cls(
+                reference_stats=TensorStatistics.from_tensor(reference),
+                actual_stats=TensorStatistics.from_tensor(actual),
+                error_l2_norm=None,
+                error_mae=None,
+                error_max = None,
+                error_msd=None,
+                sqnr=None,
+            )
+
+        error = actual.to(torch.float64) - reference.to(torch.float64)
+        flat_error = torch.flatten(error)
+
+        return cls(
+            reference_stats=TensorStatistics.from_tensor(reference),
+            actual_stats=TensorStatistics.from_tensor(actual),
+            error_l2_norm=torch.linalg.norm(flat_error).item(),
+            error_mae=torch.mean(torch.abs(flat_error)).item(),
+            error_max=torch.max(torch.abs(flat_error)).item(),
+            error_msd=torch.mean(flat_error).item(),
+            sqnr=compute_sqnr(actual, reference).item()
+        )
@@ -4,6 +4,7 @@
 
 import torch
 
+from executorch.backends.test.harness.error_statistics import ErrorStatistics
 from executorch.backends.test.harness.stages import (
     Export,
     Partition,
@@ -302,17 +303,15 @@ def run_method_and_compare_outputs(
         atol=1e-03,
         rtol=1e-03,
         qtol=0,
+        statistics_callback: Callable[[ErrorStatistics], None] | None = None,
     ):
         number_of_runs = 1 if inputs is not None else num_runs
         reference_stage = self.stages[StageType.EXPORT]
 
         stage = stage or self.cur
 
-        print(f"Comparing Stage {stage} with Stage {reference_stage}")
-        for run_iteration in range(number_of_runs):
+        for _ in range(number_of_runs):
             inputs_to_run = inputs if inputs else next(self.generate_random_inputs())
-            input_shapes = [generated_input.shape for generated_input in inputs_to_run]
-            print(f"Run {run_iteration} with input shapes: {input_shapes}")
 
             # Reference output (and quantization scale)
             (
@@ -325,13 +324,19 @@ def run_method_and_compare_outputs(
             # Output from running artifact at stage
             stage_output = self.stages[stage].run_artifact(inputs_to_run)
             self._compare_outputs(
-                reference_output, stage_output, quantization_scale, atol, rtol, qtol
+                reference_output, stage_output, quantization_scale, atol, rtol, qtol, statistics_callback
             )
 
         return self
 
     @staticmethod
-    def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03):
+    def _assert_outputs_equal(
+        model_output, 
+        ref_output, 
+        atol=1e-03, 
+        rtol=1e-03,
+        statistics_callback: Callable[[ErrorStatistics], None] | None = None,
+    ):
         """
         Helper testing function that asserts that the model output and the reference output
         are equal with some tolerance. Due to numerical differences between eager mode and
@@ -346,6 +351,11 @@ def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03):
         for i in range(len(model_output)):
             model = model_output[i]
             ref = ref_output[i]
+
+            error_stats = ErrorStatistics.from_tensors(model, ref)
+            if statistics_callback is not None:
+                statistics_callback(error_stats)
+
             assert (
                 ref.shape == model.shape
             ), f"Output {i} shape {model.shape} does not match reference output shape {ref.shape}"
@@ -383,6 +393,7 @@ def _compare_outputs(
         atol=1e-03,
         rtol=1e-03,
         qtol=0,
+        statistics_callback: Callable[[ErrorStatistics], None] | None = None,
     ):
         """
         Compares the original of the original nn module with the output of the generated artifact.
@@ -399,12 +410,13 @@ def _compare_outputs(
         # atol by qtol quant units.
         if quantization_scale is not None:
             atol += quantization_scale * qtol
-
+            
         Tester._assert_outputs_equal(
             stage_output,
             reference_output,
             atol=atol,
             rtol=rtol,
+            statistics_callback=statistics_callback,
         )
 
     @staticmethod

diff --git a/backends/test/suite/context.py b/backends/test/suite/context.py
@@ -1,8 +1,9 @@
 # Test run context management. This is used to determine the test context for reporting
 # purposes.
 class TestContext:
-    def __init__(self, test_name: str, flow_name: str, params: dict | None):
+    def __init__(self, test_name: str, test_base_name: str, flow_name: str, params: dict | None):
         self.test_name = test_name
+        self.test_base_name = test_base_name
         self.flow_name = flow_name
         self.params = params
 

diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py
@@ -62,4 +62,20 @@ def all_flows() -> dict[str, TestFlow]:
     except Exception as e:
         logger.info(f"Skipping Core ML flow registration: {e}")
 
+    try:
+        from executorch.backends.test.suite.flows.vulkan import VULKAN_TEST_FLOW
+        flows += [
+            VULKAN_TEST_FLOW,
+        ]
+    except Exception as e:
+        logger.info(f"Skipping Vulkan flow registration: {e}")
+
+    try:
+        from executorch.backends.test.suite.flows.qualcomm import QUALCOMM_TEST_FLOW
+        flows += [
+            QUALCOMM_TEST_FLOW,
+        ]
+    except Exception as e:
+        logger.info(f"Skipping Qualcomm flow registration: {e}")
+
     return {f.name: f for f in flows if f is not None}
diff --git a/backends/test/suite/flows/qualcomm.py b/backends/test/suite/flows/qualcomm.py
@@ -0,0 +1,15 @@
+from executorch.backends.qualcomm.tests.tester import QualcommTester
+from executorch.backends.test.suite.flow import TestFlow
+
+def _create_qualcomm_flow(
+    name: str, 
+    quantize: bool = False, 
+) -> TestFlow:
+    return TestFlow(
+        name,
+        backend="qualcomm",
+        tester_factory=QualcommTester,
+        quantize=quantize,
+    )
+
+QUALCOMM_TEST_FLOW = _create_qualcomm_flow("qualcomm")
diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py
@@ -0,0 +1,15 @@
+from executorch.backends.vulkan.test.tester import VulkanTester
+from executorch.backends.test.suite.flow import TestFlow
+
+def _create_vulkan_flow(
+    name: str, 
+    quantize: bool = False, 
+) -> TestFlow:
+    return TestFlow(
+        name,
+        backend="vulkan",
+        tester_factory=VulkanTester,
+        quantize=quantize,
+    )
+
+VULKAN_TEST_FLOW = _create_vulkan_flow("vulkan")
diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py
@@ -42,19 +42,19 @@ def _create_test(
     dtype: torch.dtype,
     use_dynamic_shapes: bool,
 ):
+    dtype_name = str(dtype)[6:]  # strip "torch."
+    test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}"
+    if use_dynamic_shapes:
+        test_name += "_dynamic_shape"
+
     def wrapped_test(self):
         params = {
             "dtype": dtype,
             "use_dynamic_shapes": use_dynamic_shapes,
         }
-        with TestContext(test_name, flow.name, params):
+        with TestContext(test_name, test_func.__name__, flow.name, params):
             test_func(self, flow, dtype, use_dynamic_shapes)
 
-    dtype_name = str(dtype)[6:]  # strip "torch."
-    test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}"
-    if use_dynamic_shapes:
-        test_name += "_dynamic_shape"
-
     wrapped_test._name = test_func.__name__  # type: ignore
     wrapped_test._flow = flow  # type: ignore
 
@@ -118,6 +118,7 @@ def run_model_test(
         inputs,
         flow,
         context.test_name,
+        context.test_base_name,
         context.params,
         dynamic_shapes=dynamic_shapes,
     )