add test coverage of cast op

haowhsu-quic · haowhsu-quic · commit dca006f308f4 · 2025-05-22T08:30:42.000+08:00
diff --git a/backends/qualcomm/_passes/fuse_consecutive_cast.py b/backends/qualcomm/_passes/fuse_consecutive_cast.py
@@ -74,7 +74,8 @@ def _canonicalize_cast(
                             clone_cast_node = graph.create_node(
                                 "call_function",
                                 exir_ops.edge.aten._to_copy.default,
-                                (n.args[0]),
+                                n.args,
+                                kwargs=n.kwargs,
                             )
                             clone_cast_node.meta = n.meta
                             users[i].replace_input_with(n, clone_cast_node)
@@ -98,6 +99,8 @@ def _traverse(self, node):
     def _fuse(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
         for n in graph_module.graph.nodes:
             self._traverse(n)
+            # TODO: how to handle following scenario (won't happen for quantized graph)
+            #       fp -> to(i32) -> to(fp)
             if len(self.nodes) > 1:
                 input_node, output_node = self.nodes[0], self.nodes[-1]
                 output_node.replace_input_with(output_node.args[0], input_node.args[0])
diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py
@@ -166,6 +166,16 @@ def forward(self, x):
         return x.type(torch.IntTensor)
 
 
+class CastMultiUsers(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x, y):
+        index = x.to(torch.long)
+        res = torch.gather(y, dim=1, index=index)
+        return res + index.to(torch.int32)
+
+
 class Cat2(torch.nn.Module):
     def __init__(self):
         super().__init__()
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -165,9 +165,14 @@ def test_qnn_backend_bmm(self):
         self.lower_module_and_test_output(module, sample_input)
 
     def test_qnn_backend_cast(self):
-        module = Cast()  # noqa: F405
-        sample_input = (10 * torch.rand((9, 4, 5, 3)),)
-        self.lower_module_and_test_output(module, sample_input)
+        modules = [Cast(), CastMultiUsers()]  # noqa: F405
+        sample_inputs = [
+            (10 * torch.rand((9, 4, 5, 3)),),
+            (torch.randint(0, 3, size=(3, 3)), torch.randn(3, 3)),
+        ]
+        for i, (module, sample_input) in enumerate(zip(modules, sample_inputs)):
+            with self.subTest(i=i):
+                self.lower_module_and_test_output(module, sample_input)
 
     def test_qnn_backend_cat(self):
         modules = [Cat2(), Cat3(), Cat4()]  # noqa: F405
@@ -1234,6 +1239,12 @@ def test_qnn_backend_bmm(self):
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)
 
+    def test_qnn_backend_cast(self):
+        module = CastMultiUsers()  # noqa: F405
+        sample_input = (torch.randint(0, 3, size=(3, 3)), torch.randn(3, 3))
+        module = self.get_qdq_module(module, sample_input)
+        self.lower_module_and_test_output(module, sample_input)
+
     def test_qnn_backend_cat(self):
         modules = [Cat2(), Cat3(), Cat4()]  # noqa: F405
         sample_input = (torch.randn(1, 1, 2, 2), torch.randn(1, 1, 4, 2))