Utility function for numerical correctness of edge dialect graphs and reference implementations (#14036)

Andrew Grebenisan · facebook-github-bot · commit 2744287c7968 · 2025-09-09T09:38:36.000-07:00
Summary: Pull Request resolved: #14036 Created two utility functions 1. Converts an edge dialect graph into one where custom cadence op nodes are replaced with python references 2. Validates the outputs (and optionally intermediates) of the graphs Updated two tests in test_replace_ops_passes to utilize these utility functions. Differential Revision: D81843001
diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS
@@ -82,6 +82,8 @@ python_library(
     ],
     deps = [
         ":utils",
+        ":ops_registrations",
+        ":ref_implementations",
         "//caffe2:torch",
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
diff --git a/backends/cadence/aot/pass_utils.py b/backends/cadence/aot/pass_utils.py
@@ -5,9 +5,15 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-strict
-
 from dataclasses import dataclass
-from typing import Callable, List, Optional, Set, Type, Union
+from functools import partial
+from operator import attrgetter
+from torch.utils._python_dispatch import _disable_current_modes
+
+from typing import Any, Callable, cast, List, Optional, Set, Type, Union
+
+import executorch.backends.cadence.aot.ops_registrations  # noqa
+import executorch.backends.cadence.aot.ref_implementations  # noqa
 
 import torch
 from executorch.backends.cadence.aot.utils import get_edge_overload_packet
@@ -16,6 +22,8 @@
 from executorch.exir.pass_base import PassBase, PassResult
 
 from torch._ops import OpOverloadPacket
+from torch.fx import GraphModule
+from torch.utils._pytree import PyTree
 
 
 # Is an overlap in tensor lifetime and storage allowed at the current opt level?
@@ -114,6 +122,125 @@ def op_counts_match(
             return False
     return True
 
+def validate_pass(
+
+) -> Callable[[type[PassBase]], type[PassBase]]:
+    tolerance = 1e-5
+    log_differences = False
+    fail_on_mismatch = True
+
+    def decorator(pass_class: type[PassBase]) -> type[PassBase]:
+        class WrappedPass(pass_class):
+            def call(self, graph_module: GraphModule) -> PassResult:
+                # Ensure we're not in fake tensor mode for actual execution
+                with _disable_current_modes():
+                    # Get inputs for the graph module
+                    original_inputs = self._get_concrete_inputs(graph_module)
+
+                    if original_inputs is None:
+                        raise RuntimeError("Could not extract concrete inputs for {pass_class.__name__}")
+
+                    # Run original graph and collect outputs
+                    with torch.no_grad():
+                        original_outputs = graph_module(*original_inputs)
+
+                    # Apply the transformation
+                    result = super().call(graph_module)
+
+                    # Run transformed graph and collect outputs
+                    with torch.no_grad():
+                        transformed_outputs = result.graph_module(*original_inputs)
+
+                    # Compare outputs
+                    self._compare_outputs(
+                        original_outputs,
+                        transformed_outputs,
+                        pass_class.__name__,
+                        tolerance,
+                        log_differences,
+                        fail_on_mismatch
+                    )
+
+                    return result
+
+            def _get_concrete_inputs(self, graph_module: GraphModule) -> Optional[List[torch.Tensor]]:
+                """Extract concrete tensor inputs from the graph module metadata."""
+                inputs = []
+                for node in graph_module.graph.nodes:
+                    if node.op == "placeholder":
+                        if "val" in node.meta:
+                            val = node.meta["val"]
+                            if hasattr(val, "constant") and val.constant is not None:
+                                inputs.append(val.constant.detach().clone())
+                            elif isinstance(val, torch.Tensor):
+                                # Create a concrete tensor with the same properties
+                                concrete_tensor = torch.testing.make_tensor(val.shape, dtype=val.dtype, device='cpu')
+                                # concrete_tensor = torch.randn(val.shape, dtype=val.dtype)
+                                if hasattr(val, 'device'):
+                                    concrete_tensor = concrete_tensor.to(val.device)
+                                inputs.append(concrete_tensor)
+                            else:
+                                raise ValueError(f"Unsupported type for {node.name}: {type(val)}")
+                        else:
+                            raise ValueError(f"Missing 'val' in node metadata for {node.name}")
+                return inputs
+
+            def _compare_outputs(
+                self,
+                original: Any,
+                transformed: Any,
+                pass_name: str,
+                tolerance: float,
+                log_differences: bool,
+                fail_on_mismatch: bool
+            ) -> None:
+                """Compare outputs and optionally log/fail on differences."""
+                if isinstance(original, torch.Tensor) and isinstance(transformed, torch.Tensor):
+                    if not torch.allclose(original, transformed, atol=tolerance, rtol=tolerance):
+                        max_diff = torch.max(torch.abs(original - transformed)).item()
+                        message = f"{pass_name}: Output mismatch detected. Max difference: {max_diff}"
+
+                        if log_differences:
+                            pass
+                            # logging.warning(message)
+                            # logging.warning(f"Original shape: {original.shape}, Transformed shape: {transformed.shape}")
+
+                        if fail_on_mismatch:
+                            raise ValueError(message)
+                    else:
+                        if log_differences:
+                            pass
+                            # logging.info(f"{pass_name}: Outputs match within tolerance {tolerance}")
+
+                elif isinstance(original, (list, tuple)) and isinstance(transformed, (list, tuple)):
+                    if len(original) != len(transformed):
+                        message = f"{pass_name}: Output count mismatch. Original: {len(original)}, Transformed: {len(transformed)}"
+                        if log_differences:
+                            # logging.warning(message)
+                            pass
+                        if fail_on_mismatch:
+                            raise ValueError(message)
+                    else:
+                        for i, (orig_item, trans_item) in enumerate(zip(original, transformed)):
+                            self._compare_outputs(
+                                orig_item, trans_item, f"{pass_name}[{i}]",
+                                tolerance, log_differences, fail_on_mismatch
+                            )
+                else:
+                    if log_differences:
+                        pass
+                        # logging.info(f"{pass_name}: Non-tensor outputs, skipping numerical comparison")
+
+        # Preserve the original class name and documentation
+        WrappedPass.__name__ = pass_class.__name__
+        WrappedPass.__qualname__ = pass_class.__qualname__
+        WrappedPass.__doc__ = pass_class.__doc__
+
+        return cast(type[PassBase], WrappedPass) # type: ignore[return-value]
+
+    return decorator
+
+
 
 # Testing utils
 # Return the compute/function nodes in the graph
diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py
@@ -34,6 +34,7 @@
     CadencePassAttribute,
     none_throws,
     register_cadence_pass,
+    validate_pass
 )
 from executorch.backends.cadence.aot.remove_ops import RemoveNopSelectOpPass
 from executorch.backends.cadence.aot.utils import get_edge_overload_packet
@@ -947,7 +948,7 @@ def transpose_dims(
             exir_ops.edge.aten.transpose_copy.int, (proxy, dim0, dim1), {}, meta
         )
 
-
+@validate_pass()
 @register_cadence_pass(CadencePassAttribute(opt_level=3))
 class ReplaceConvWithChannelLastConvPass(ExportPassWithTransposeHelper):
     def change_nchw_to_nhwc(self, proxy: ProxyValue, meta: NodeMetadata) -> ProxyValue:
@@ -979,18 +980,18 @@ def call_operator(
     ) -> ProxyValue:
         if op not in {
             exir_ops.edge.cadence.convolution.default,
-            exir_ops.edge.cadence.quantized_conv_nchw.default,
+            exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
         }:
             return super().call_operator(op, args, kwargs, meta)
 
-        quantized_op = op == exir_ops.edge.cadence.quantized_conv_nchw.default
+        quantized_op = op == exir_ops.edge.cadence.quantized_conv_nchw.per_tensor
 
         if not quantized_op and len(args) == 8 and args[-1] is True:
             # Already in NHWC layout.
             return super().call_operator(op, args, kwargs, meta)
 
         new_op = (
-            exir_ops.edge.cadence.quantized_conv_nhwc.default
+            exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor
             if quantized_op
             else exir_ops.edge.cadence.convolution.default
         )
diff --git a/backends/cadence/aot/tests/test_replace_ops_passes.py b/backends/cadence/aot/tests/test_replace_ops_passes.py
@@ -15,7 +15,11 @@
     GraphBuilder,
     single_op_builder,
 )
-from executorch.backends.cadence.aot.pass_utils import count_node, op_counts_match
+from executorch.backends.cadence.aot.pass_utils import (
+    count_node,
+    op_counts_match,
+    validate_pass
+)
 from executorch.backends.cadence.aot.replace_ops import (
     MakeSliceAndCatDimOutermostPass,
     ReplaceAdaptiveAvgPoolWithAtenAvgPoolPass,
@@ -1612,7 +1616,7 @@ def test_no_transpose_if_already_channel_last(self) -> None:
 
     def create_quantized_convolution_graph_module(
         self, channels_last: Optional[bool] = None
-    ) -> torch.fx.GraphModule:
+    ) -> tuple[torch.fx.GraphModule, tuple[torch.Tensor, torch.Tensor, torch.Tensor]]:
         """Helper to create a quantized conv node.
 
         quantized_conv(
@@ -1622,23 +1626,32 @@ def create_quantized_convolution_graph_module(
             Tensor out_shift, bool channel_last=False) -> (Tensor Z)"
         """
         if channels_last:
-            x = torch.randn(1, 224, 56, 3)
-            w = torch.randn(16, 16, 16, 3)
+            x = torch.randint(
+                low=-128, high=127, size=(1, 224, 56, 3), dtype=torch.int8
+            )
+            w = torch.randint(
+                low=-128, high=127, size=(16, 16, 16, 3), dtype=torch.int8
+            )
         else:
-            x = torch.randn(1, 3, 224, 56)
-            w = torch.randn(16, 3, 16, 16)
-        b = torch.randn(16)
+            x = torch.randint(
+                low=-128, high=127, size=(1, 3, 224, 56), dtype=torch.int8
+            )
+            w = torch.randint(
+                low=-128, high=127, size=(16, 3, 16, 16), dtype=torch.int8
+            )
+
+        b = torch.randint(low=-128, high=127, size=(16,), dtype=torch.int32)
         stride = (2, 2)
         padding = (0, 0)
         dilation = (1, 1)
         groups = 1
         input_zero_point = 0
-        w_zero_point = torch.randn(1)
-        b_scale = torch.randn(1)
+        w_zero_point = 1
+        b_scale = 0.8
         out_scale = 1
         out_zero_point = 0
-        out_multiplier = torch.randn(1)
-        out_shift = torch.randn(1)
+        out_multiplier = 0
+        out_shift = 0
         args = (
             x,
             w,
@@ -1661,44 +1674,39 @@ def create_quantized_convolution_graph_module(
                     x,
                     w,
                     b,
-                    w_zero_point,
-                    b_scale,
-                    out_multiplier,
-                    out_shift,
                 ),
-                op=exir_ops.edge.cadence.quantized_conv_nhwc.default,
+                op=exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
                 args=args,
-            )
+            ), (x, w, b)
         else:
             return single_op_builder(
                 placeholders=(
                     x,
                     w,
                     b,
-                    w_zero_point,
-                    b_scale,
-                    out_multiplier,
-                    out_shift,
                 ),
-                op=exir_ops.edge.cadence.quantized_conv_nchw.default,
+                op=exir_ops.edge.cadence.quantized_conv_nchw.per_tensor,
                 args=args,
-            )
+            ), (x, w, b)
 
     def test_quantized_convolution_default_channel_last(self) -> None:
         # Create a graph with a single convolution node.
-        gm = self.create_quantized_convolution_graph_module()
+        gm, (x, w, b) = self.create_quantized_convolution_graph_module()
         self.assertEqual(
-            count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.default), 1
+            count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.per_tensor), 1
         )
         self.assertEqual(count_node(gm, exir_ops.edge.aten.permute_copy.default), 0)
 
+        # self.assertTrue(numerically_equivalent(gm, (x, w, b), True))
+
         # Apply replacement pass.
         p = ReplaceConvWithChannelLastConvPass()
         gm_after_replacement = p.call(gm).graph_module
         # Check that no replacement was made.
         self.assertEqual(
             count_node(
-                gm_after_replacement, exir_ops.edge.cadence.quantized_conv_nhwc.default
+                gm_after_replacement,
+                exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
             ),
             1,
         )
@@ -1708,26 +1716,33 @@ def test_quantized_convolution_default_channel_last(self) -> None:
             3,
         )
 
+        # self.assertTrue(numerically_equivalent(gm_after_replacement, (x, w, b), True))
+
     def test_no_transpose_if_already_quantized_conv_channel_last(self) -> None:
         # Create a graph with a single im2row node.
-        gm = self.create_quantized_convolution_graph_module(channels_last=True)
+        gm, (x, w, b) = self.create_quantized_convolution_graph_module(
+            channels_last=True
+        )
         # Check if graph module is valid by running exportpass on it.
         gm = ExportPass().call(gm).graph_module
         self.assertEqual(
-            count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.default), 1
+            count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor), 1
         )
+        # self.assertTrue(numerically_equivalent(gm, (x, w, b), True))
 
         # Apply replacement pass.
         p = ReplaceConvWithChannelLastConvPass()
         gm_after_replacement = p.call(gm).graph_module
         # Check that no replacement was made.
         self.assertEqual(
             count_node(
-                gm_after_replacement, exir_ops.edge.cadence.quantized_conv_nhwc.default
+                gm_after_replacement,
+                exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor,
             ),
             1,
         )
         self.assertEqual(count_node(gm, exir_ops.edge.aten.permute_copy.default), 0)
+        # self.assertTrue(numerically_equivalent(gm_after_replacement, (x, w, b), True))
 
 
 class TestMakeSliceAndCatDimOutermostPass(unittest.TestCase):