Add xnnpack pass to propagate custom meta field to q/dq nodes

lucylq · lucylq · commit bc8f1822f0d6 · 2025-10-07T13:41:50.000-07:00
diff --git a/backends/xnnpack/_passes/__init__.py b/backends/xnnpack/_passes/__init__.py
@@ -23,6 +23,9 @@
 from executorch.backends.xnnpack._passes.fuse_activation_pass import FuseActivationPass
 from executorch.backends.xnnpack._passes.fuse_batch_norm import FuseBatchNormPass
 from executorch.backends.xnnpack._passes.prelu_reshape_pass import PReLUReshapePass
+from executorch.backends.xnnpack._passes.propagate_custom_meta_pass import (
+    PropagateCustomMetaPass,
+)
 from executorch.backends.xnnpack._passes.remove_redundant_copy_pass import (
     RemoveRedundantCopyPass,
 )
@@ -59,6 +62,7 @@ def __init__(
                 DimOrderOpsRevertPass,
                 ConvertToUpsampleBilinear2d,
                 ConvertToLinearPass,
+                PropagateCustomMetaPass,
                 ConvertToSDPAPass,
                 ConstPropPass,
                 FuseBatchNormPass,
diff --git a/backends/xnnpack/_passes/propagate_custom_meta_pass.py b/backends/xnnpack/_passes/propagate_custom_meta_pass.py
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.xnnpack._passes.xnnpack_pass import XNNPACKPass
+from executorch.backends.xnnpack.utils.quant_utils import is_dequant, is_quant
+from executorch.exir.pass_base import PassResult
+
+
+class PropagateCustomMetaPass(XNNPACKPass):
+    """
+    Pass to propagate node.meta['custom'] from parent nodes to their q/dq child nodes.
+    For all quantize/dequantize nodes in the graph, if the parent node has a
+    node.meta['custom'] entry, this pass will copy that value to the q/dq node's meta.
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        graph = graph_module.graph
+
+        for node in graph.nodes:
+            if not (is_quant(node) or is_dequant(node)):
+                continue
+
+            # Get the parent node (first input argument)
+            if len(node.all_input_nodes) == 0:
+                continue
+
+            parent_node = node.args[0]
+            if not isinstance(parent_node, torch.fx.Node):
+                continue
+
+            if "custom" in parent_node.meta:
+                node.meta["custom"] = parent_node.meta["custom"]
+
+        graph_module.recompile()
+
+        # Since we are overriding "call", we need to call the parent's "call"
+        # to retrace the graph and regenerate metadata
+        graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, True)
diff --git a/backends/xnnpack/test/passes/test_propagate_custom_meta_pass.py b/backends/xnnpack/test/passes/test_propagate_custom_meta_pass.py
@@ -0,0 +1,112 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+
+from executorch.backends.xnnpack.partition.config.xnnpack_config import (
+    ConfigPrecisionType,
+)
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
+    XnnpackFloatingPointPartitioner,
+    XnnpackPartitioner,
+)
+from executorch.exir import (
+    EdgeCompileConfig,
+    ExecutorchBackendConfig,
+    to_edge_transform_and_lower,
+)
+
+from executorch.exir.passes.external_constants_pass import (
+    delegate_external_constants_pass_unlifted,
+)
+from torch.export import export, ExportedProgram
+
+from torchao.quantization.granularity import PerAxis, PerGroup
+from torchao.quantization.quant_api import (
+    Int8DynamicActivationIntxWeightConfig,
+    IntxWeightOnlyConfig,
+    quantize_,
+)
+from torchao.utils import unwrap_tensor_subclass
+
+
+class TestPropagateCustomMetaPass(unittest.TestCase):
+    class ModuleLinear(torch.nn.Module):
+        def __init__(
+            self,
+            in_size: int = 2,
+            input_channels: int = 4,
+            output_channels: int = 4,
+            dtype: torch.dtype = torch.float,
+            use_bias: bool = False,
+        ):
+            super().__init__()
+            self.linear = torch.nn.Linear(
+                input_channels, output_channels, bias=use_bias
+            ).to(dtype=dtype)
+
+            self.ic = input_channels
+            self.oc = output_channels
+            assert dtype in [torch.float, torch.half], "Unsupported op dtype"
+            self.op_dtype = dtype
+            self.in_size = in_size
+
+        def forward(self, x: torch.Tensor):
+            return self.linear(x)
+
+        def get_random_inputs(self):
+            inp = torch.randn(self.in_size, self.ic).to(self.op_dtype)
+            return (inp,)
+
+    def test_propagate_custom_meta_pass(self):
+        eager_model = self.ModuleLinear(
+            in_size=1,
+            input_channels=32,
+            output_channels=2,
+        )
+        test_inputs = eager_model.get_random_inputs()
+        eager_result = eager_model(*test_inputs)
+
+        # Quantize with torchao quantize_ API.
+        linear_config = Int8DynamicActivationIntxWeightConfig(
+            weight_dtype=torch.int4,
+            weight_granularity=PerGroup(32),
+        )
+        quantize_(eager_model, linear_config)
+        quantized_result = eager_model(*test_inputs)
+        unwrap_tensor_subclass(eager_model)
+
+        # Tag the unlifted ep.module().
+        tagged_module = export(
+            eager_model, test_inputs, dynamic_shapes=None, strict=True
+        ).module()
+        delegate_external_constants_pass_unlifted(
+            module=tagged_module,
+            gen_tag_fn=lambda x: "model",  # This is the filename the weights will be saved to. In this case, weights will be saved as "model.ptd"
+        )
+
+        ep = export(tagged_module, test_inputs, dynamic_shapes=None, strict=True)
+        DynamicallyQuantizedPartitioner = XnnpackPartitioner(
+            config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
+            per_op_mode=True,
+        )
+        edge = to_edge_transform_and_lower(
+            ep,
+            compile_config=EdgeCompileConfig(_check_ir_validity=False),
+            partitioner=[XnnpackPartitioner()],
+            generate_etrecord=False,
+        )
+        exec = edge.to_executorch(ExecutorchBackendConfig())
+
+        program_buffer = exec.buffer
+        data_buffer = bytes(exec._tensor_data.pop("model"))
+
+        from executorch.extension.pybindings import portable_lib as runtime
+
+        module = runtime._load_for_executorch_from_buffer(program_buffer, data_buffer)
+        output = module.forward(test_inputs)