Fix review comments

Jiseong-oh · Chen03ZhaoSamsung · Jiseong-oh · commit ad11fb32e6f5 · 2025-10-02T09:13:29.000Z
Fix comments

Co-authored-by: chen03.zhao &lt;chen03.zhao@samsung.com&gt;
diff --git a/backends/samsung/_passes/annotate_qparams.py b/backends/samsung/_passes/annotate_qparams.py
@@ -30,7 +30,7 @@ class AnnotateQparamsPass(ExportPass):
          and add Q->DQ after removing all the Q->DQs.
     """
 
-    deliver_nodes = {
+    propagate_nodes = {
         exir_ops.edge.aten.view_copy.default,
         exir_ops.edge.aten.permute_copy.default,
         exir_ops.edge.aten.squeeze_copy.default,
@@ -83,7 +83,7 @@ def _impl(node: Node, res_list: List[Node]):
             _impl(user, res_list)
         return res_list
 
-    def _deliver_quant_params(self, node: Node):
+    def _propagate_quant_params(self, node: Node):
         assert (
             quantize_attrs := node.meta.get("quantize_attrs")
         ), "Must be annotated node."
@@ -98,25 +98,25 @@ def _deliver_quant_params(self, node: Node):
             ):
                 break
             node = user
-        # Case1: ...-q-dq(cur)-deliver_node-node(not d-dq)
-        # Case2: deliver_node(delivered)-deliver_node-node(not q-dq)
+        # Case1: ...-q-dq(cur)-propagate_node-node(not d-dq)
+        # Case2: propagate_node(propagateed)-propagate_node-node(not q-dq)
         for idx, user in enumerate(node.users.keys()):
-            # For the branch who need to be requantized, we deliver the requantize params
+            # For the branch who need to be requantized, we propagate the requantize params
             user_attrs = requantize_map.get(idx, quantize_attrs)
-            if user.target not in self.deliver_nodes:
+            if user.target not in self.propagate_nodes:
                 continue
             if len(user.users) == 1:
                 # Possibily no need for checking len(users)>1
                 user_of_user = list(user.users)[0]
-                # node-q-dq-deliver-q-dq not need for delivery
+                # node-q-dq-propagate-q-dq not need for propagatey
                 if (
                     user_of_user.target in QuantConstants.QUANT_OPS_KEY_MAP
                     or user_of_user.target in QuantConstants.DEQUANT_OPS_KEY_MAP
                 ):
                     continue
-            # Deliver quant for node-q-dq-deliver_node-node(not qdq)
+            # propagate quant for node-q-dq-propagate_node-node(not qdq)
             user.meta["quantize_attrs"] = user_attrs
-            self._deliver_quant_params(user)
+            self._propagate_quant_params(user)
 
     def _annotate_requantize(self, node: Node):
         assert (
@@ -153,16 +153,7 @@ def _check_same(requant_obj, ori_obj) -> bool:
 
     def _annotate(self, graph_module: GraphModule):
         for node in graph_module.graph.nodes:
-            if key_map := QuantConstants.DEQUANT_OPS_KEY_MAP.get(node.target, None):
-                # We will fold node with constant output in the future pass as a constant node
-                # example: Constant->Q->DQ->nodeN->Q->DQ, this seq will be folded to one
-                # We need to store the q-params from last DQ params for quantizing constant value
-                quant_attrs = self.get_quant_attrs(node, key_map)
-                node.meta["quantize_attrs"] = quant_attrs
-                continue
-            else:
-                key_map = QuantConstants.QUANT_OPS_KEY_MAP.get(node.target, None)
-            # ignore pre-quantized params now.
+            key_map = QuantConstants.QUANT_OPS_KEY_MAP.get(node.target, None)
             if not key_map:
                 continue
             source_node = node.args[0]
@@ -172,46 +163,15 @@ def _annotate(self, graph_module: GraphModule):
             ):
                 # Currently, don't add quant info for d_qd node here.
                 continue
+            elif source_node.target == operator.getitem:
+                source_node = source_node.args[0]
             quant_attrs = self.get_quant_attrs(node, key_map)
-            assert node.args[0].target != operator.getitem, "Not supported now."
-            source_node = node.args[0]
             source_node.meta["quantize_attrs"] = quant_attrs
             self._annotate_requantize(source_node)
-            self._deliver_quant_params(source_node)
-
-    def _annotate_real_out(self, graph_module: GraphModule):
-        for output_nodes in filter(
-            lambda x: x.op == "output", graph_module.graph.nodes
-        ):
-            output_nodes = list(output_nodes.args[0])
-            for idx, output_node in enumerate(output_nodes):
-                if output_node.target not in [
-                    *QuantConstants.QUANT_OPS_KEY_MAP.keys(),
-                    *QuantConstants.DEQUANT_OPS_KEY_MAP.keys(),
-                ]:
-                    continue
-                while output_node.args[0].target in [
-                    *QuantConstants.QUANT_OPS_KEY_MAP.keys(),
-                    *QuantConstants.DEQUANT_OPS_KEY_MAP.keys(),
-                ]:
-                    output_node = output_node.args[0]
-                output_nodes[idx] = output_node
-            for node in output_nodes:
-                if node.target in QuantConstants.QUANT_OPS_KEY_MAP:
-                    node.args[0].meta["real_out"] = True
-                else:
-                    node.meta["real_out"] = True
-
-    def _annotate_real_in(self, graph_module: GraphModule):
-        for in_node in filter(
-            lambda x: is_graph_input(self.edge_program, x), graph_module.graph.nodes
-        ):
-            in_node.meta["real_in"] = True
+            self._propagate_quant_params(source_node)
 
     def call(self, graph_module: GraphModule):
         self._annotate(graph_module)
-        self._annotate_real_out(graph_module)
-        self._annotate_real_in(graph_module)
         graph_module.recompile()
         return PassResult(graph_module, True)
 
@@ -223,7 +183,6 @@ def get_quant_attrs(
         for key, attr in zip(quant_attr_keys[1:], quant_node.args[1:]):
             # For channel-wise quantization, params are stored by buffer nodes.
             if isinstance(attr, torch.fx.Node):
-                assert isinstance(attr.target, str), "Not supported now. "
                 attr = get_buffer(self.edge_program, attr)
             quant_attrs[key] = attr
         quant_attrs["target"] = quant_node.target
diff --git a/backends/samsung/_passes/conv1d_to_conv2d.py b/backends/samsung/_passes/conv1d_to_conv2d.py
@@ -93,5 +93,5 @@ def call(self, graph_module: torch.fx.GraphModule):
                 unsqueeze_before.meta["quantize_attrs"] = prev_qparams
 
         graph_module.recompile()
-        graph_module = super().call(graph_module).graph_module
+        _ = super().call(graph_module).graph_module
         return PassResult(graph_module, True)
diff --git a/backends/samsung/_passes/fold_qdq.py b/backends/samsung/_passes/fold_qdq.py
@@ -32,4 +32,5 @@ def call(self, graph_module: GraphModule):
         self._fold(graph_module)
         graph_module.recompile()
         dead_code_elimination_pass(graph_module)
+        _ = super().call(graph_module).graph_module
         return PassResult(graph_module, True)
diff --git a/backends/samsung/_passes/fold_redundant_as_strided_copy.py b/backends/samsung/_passes/fold_redundant_as_strided_copy.py
diff --git a/backends/samsung/_passes/fuse_conv_act.py b/backends/samsung/_passes/fuse_conv_act.py
@@ -73,4 +73,5 @@ def call(self, graph_module: GraphModule):
         self._fuse(graph_module)
         graph_module.recompile()
         dead_code_elimination_pass(graph_module)
+        _ = super().call(graph_module).graph_module
         return PassResult(graph_module, True)
diff --git a/backends/samsung/_passes/remove_useless_ops.py b/backends/samsung/_passes/remove_useless_ops.py
@@ -24,6 +24,41 @@ class RemoveUselessOpPass(ExportPass):
     def __init__(self):
         super().__init__()
 
+    def gen_pattern_as_strided_copy(self, graph_module: GraphModule):
+        for node in list(graph_module.graph.nodes):  # noqa: C416
+            if node.target != exir_ops.edge.aten.mean.dim:
+                continue
+            if len(node.users) != 1:
+                continue
+            successor = list(node.users.keys())[0]
+            if successor.target != exir_ops.edge.aten.as_strided_copy.default:
+                continue
+            is_pattern = True
+            count = 0
+            for i, stride in enumerate(successor.args[2]):
+                if stride < node.meta["val"].size()[i]:
+                    if stride == 1:
+                        count += 1
+                    else:
+                        is_pattern = False
+                        break
+                if count >= 2:
+                    is_pattern = False
+                    break
+            if is_pattern:
+                yield successor
+
+    def _fold_as_strided_copy(
+        self,
+        graph_module: GraphModule,
+    ):
+        for as_strided_copy_node in self.gen_pattern_as_strided_copy(graph_module):
+            for user in list(as_strided_copy_node.users.keys()):
+                user.replace_input_with(
+                    as_strided_copy_node, as_strided_copy_node.args[0]
+                )
+            graph_module.graph.erase_node(as_strided_copy_node)
+
     def _remove_useless(
         self,
         graph_module: GraphModule,
@@ -42,9 +77,11 @@ def _remove_useless(
             for user in [user for user in node.users.keys()]:  # noqa: C416
                 user.replace_input_with(node, node.all_input_nodes[0])
             graph_module.graph.erase_node(node)
+        self._fold_as_strided_copy(graph_module)
 
     def call(self, graph_module: GraphModule):
         self._remove_useless(graph_module)
         graph_module.recompile()
         dead_code_elimination_pass(graph_module)
+        _ = super().call(graph_module).graph_module
         return PassResult(graph_module, True)
diff --git a/backends/samsung/enn_preprocess.py b/backends/samsung/enn_preprocess.py
@@ -16,6 +16,7 @@
 from executorch.backends.samsung._passes.customized_constant_prop import (
     ConstantPropPass,
 )
+from executorch.backends.samsung._passes.annotate_qparams import AnnotateQparamsPass
 from executorch.backends.samsung._passes.fold_qdq import FoldQDQPass
 from executorch.backends.samsung._passes.insert_qdq import InsertQDQPass
 from executorch.backends.samsung._passes.replace_scalar_ops import ReplaceOpsWithScalar
@@ -58,6 +59,7 @@ def preprocess(
 
         enn_preprocess_passes = PassManager(
             passes=[
+                AnnotateQparamsPass(edge_program),
                 FoldQDQPass(),
                 ConstantPropPass(edge_program),
                 Conv1dToConv2d(edge_program),
diff --git a/backends/samsung/serialization/enn_graph_schema.py b/backends/samsung/serialization/enn_graph_schema.py
@@ -79,9 +79,6 @@ def define_tensor(  # noqa: C901
 
         if quant_param is not None:
             need_quantize = True
-            if quant_param.get(QuantConstants.QUANT_KEY.quant_dtype) == torch.int32:
-                quant_param = none_quant_tensor_quant_meta()
-                need_quantize = False
 
             scales = self._affine_meta_param(
                 quant_param[QuantConstants.QUANT_KEY.scale]
@@ -131,8 +128,6 @@ def serialize(self):
     def _affine_meta_param(param: Any) -> str:
         type_str_affine_table = {
             torch.int8: "AINT8",
-            torch.int32: "FLOAT32",  # INT32 just used for HW quant.
-            torch.int16: "AINT16",  # INT32 just used for HW quant.
         }
         if isinstance(param, str):
             return param
diff --git a/backends/samsung/utils/export_utils.py b/backends/samsung/utils/export_utils.py
@@ -9,10 +9,6 @@
 
 import executorch.exir as exir
 import torch
-from executorch.backends.samsung._passes.annotate_qparams import AnnotateQparamsPass
-from executorch.backends.samsung._passes.fold_redundant_as_strided_copy import (
-    FoldRudundantAsStridedCopyPass,
-)
 from executorch.backends.samsung._passes.fuse_conv_act import FuseConvActPass
 from executorch.backends.samsung._passes.remove_useless_ops import RemoveUselessOpPass
 from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner
@@ -50,11 +46,9 @@ def get_edge_compile_config():
     )
 
 
-def get_enn_pass_list(edge_program: ExportedProgram) -> List[PassType]:
+def get_enn_pass_list() -> List[PassType]:
     return [
         RemoveUselessOpPass(),
-        FoldRudundantAsStridedCopyPass(),
-        AnnotateQparamsPass(edge_program),
         RemoveCloneOpsTransform(),
         FuseConvActPass(),
     ]
@@ -90,7 +84,7 @@ def to_edge_transform_and_lower_to_enn(
 ) -> exir.ExecutorchProgramManager:
     assert compile_specs is not None, "For now, we must deliver complile specs"
     prog = torch.export.export(module, inputs)
-    pass_list = get_enn_pass_list(prog)
+    pass_list = get_enn_pass_list()
     if custom_pass_config:
         pass_list.extend(custom_pass_config)
     return to_edge_transform_and_lower(