Support TopK operator

dayo09 · dayo09 · commit 2cb20e62762c · 2025-08-11T20:15:56.000+09:00
diff --git a/test/modules/op/top_k.py b/test/modules/op/top_k.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from test.modules.base import TestModuleBase
+from test.utils.tag import use_onert
+
+# CircleInterpreter doesn't support TopK operator
+@use_onert
+class SimpleTopK(TestModuleBase):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        values, indices = torch.topk(x, 2)
+        return values, indices
+
+    def get_example_inputs(self):
+        batch_size = 1
+        seq_len = 63
+        num_experts = 8
+        return (torch.randn(batch_size * seq_len, num_experts),), {}
diff --git a/tico/passes/legalize_predefined_layout_operators.py b/tico/passes/legalize_predefined_layout_operators.py
@@ -17,6 +17,8 @@
 
 if TYPE_CHECKING:
     import torch.fx
+from operator import getitem
+
 import torch
 from torch.export import ExportedProgram
 
@@ -26,7 +28,7 @@
 from tico.utils.graph import create_node
 from tico.utils.passes import PassBase, PassResult
 from tico.utils.trace_decorators import trace_graph_diff_on_pass
-from tico.utils.utils import is_target_node
+from tico.utils.utils import is_target_node, set_new_meta_val
 from tico.utils.validate_args_kwargs import (
     AvgPool2dArgs,
     Conv2DArgs,
@@ -35,6 +37,7 @@
     DequantizePerTensorArgs,
     InstanceNormArgs,
     MaxPool2dWithIndicesArgs,
+    TopKArgs,
 )
 
 
@@ -434,6 +437,52 @@ def legalize_avg_pool2d(self, exported_program, node) -> bool:
         modified = True
         return modified
 
+    def legalize_top_k(self, exported_program, node) -> bool:
+        logger = logging.getLogger(__name__)
+        modified = False
+
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+
+        args = TopKArgs(*node.args, **node.kwargs)  # type: ignore[arg-type]
+        input_ = args.input
+        k = args.k
+        dim = args.dim
+        # TODO: Check dim == -1
+        with graph.inserting_after(input_):
+            circle_topk = create_node(
+                graph,
+                torch.ops.circle_custom.top_k,
+                args=(input_, k),
+                origin=input_,
+            )
+            set_new_meta_val(circle_topk)
+
+        with graph.inserting_after(circle_topk):
+            topk_values = create_node(
+                graph, getitem, args=(circle_topk, 0), origin=circle_topk
+            )
+            set_new_meta_val(topk_values)
+            topk_indices = create_node(
+                graph, getitem, args=(circle_topk, 1), origin=circle_topk
+            )
+            set_new_meta_val(topk_indices)
+        with graph.inserting_after(topk_indices):
+            topk_indices_int32 = create_node(
+                graph,
+                torch.ops.aten.to.dtype,
+                args=(topk_indices, torch.int32),
+                origin=node,
+            )
+            set_new_meta_val(topk_indices_int32)
+        get_item, get_item_1 = node.users.keys()
+        get_item.replace_all_uses_with(topk_values, propagate_meta=False)
+        get_item_1.replace_all_uses_with(topk_indices_int32, propagate_meta=False)
+
+        logger.debug(f"{node.name} is replaced with {circle_topk.name}")
+        modified = True
+        return modified
+
     def call(self, exported_program: ExportedProgram) -> PassResult:
         target_to_legalize_func = {
             torch.ops.aten.conv2d.default: self.legalize_conv2d,
@@ -442,6 +491,7 @@ def call(self, exported_program: ExportedProgram) -> PassResult:
             torch.ops.aten.max_pool2d_with_indices.default: self.legalize_max_pool2d_with_indices,
             torch.ops.aten.avg_pool2d.default: self.legalize_avg_pool2d,
             torch.ops.aten.instance_norm.default: self.legalize_instance_norm,
+            torch.ops.aten.topk.default: self.legalize_top_k,
         }
 
         graph_module = exported_program.graph_module
diff --git a/tico/serialize/circle_serializer.py b/tico/serialize/circle_serializer.py
@@ -32,6 +32,8 @@
 multiple_output_ops = [
     torch.ops.aten.split_with_sizes.default,
     torch.ops.aten.max.dim,
+    torch.ops.aten.topk.default,
+    torch.ops.circle_custom.top_k,
 ]
 
 
@@ -142,6 +144,8 @@ def _export_tensors(graph: CircleSubgraph, ep: ExportedProgram) -> None:
             if node.target in multiple_output_ops:
                 continue
             node_val = node.meta["val"]
+            if not hasattr(node_val, "layout"):
+                breakpoint()
             if node_val.layout != torch.strided:
                 raise RuntimeError(
                     f"Only support dense tensors (node layout: {node_val.layout})"
diff --git a/tico/serialize/operators/op_topk.py b/tico/serialize/operators/op_topk.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, List, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from circle_schema import circle
+
+from tico.serialize.circle_graph import CircleSubgraph
+from tico.serialize.circle_mapping import (
+    circle_legalize_dtype_to,
+    extract_circle_shape,
+    extract_shape,
+    extract_torch_dtype,
+)
+from tico.serialize.operators.hashable_opcode import OpCode
+from tico.serialize.operators.node_visitor import NodeVisitor, register_node_visitor
+from tico.serialize.operators.utils import create_builtin_operator, get_op_index
+from tico.utils.validate_args_kwargs import TopKArgs
+
+
+@register_node_visitor
+class TopkVisitor(NodeVisitor):
+    """ """
+
+    target: List[torch._ops.OpOverload] = [
+        torch.ops.circle_custom.top_k,
+    ]
+
+    def __init__(self, op_codes: Dict[OpCode, int], graph: CircleSubgraph):
+        super().__init__(op_codes, graph)
+
+    def define_topk_node(
+        self, inputs: List, outputs: List
+    ) -> circle.Operator.OperatorT:
+        op_index = get_op_index(
+            circle.BuiltinOperator.BuiltinOperator.TOPK_V2, self._op_codes
+        )
+
+        operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
+
+        operator.builtinOptionsType = circle.BuiltinOptions.BuiltinOptions.TopKV2Options
+        option = circle.TopKV2Options.TopKV2OptionsT()
+        operator.builtinOptions = option
+
+        return operator
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+    ) -> circle.Operator.OperatorT:
+        args = TopKArgs(*node.args, **node.kwargs)  # type: ignore[arg-type]
+        input = args.input
+        k = args.k
+
+        input_shape = extract_circle_shape(input)
+        k_i32 = circle_legalize_dtype_to(k, dtype=torch.int32)
+        assert args.dim == -1 or args.dim == len(input_shape) - 1
+
+        inputs = [input, k_i32]
+
+        outputs = [i for i in node.users.keys()]
+
+        topk_node: circle.Operator.OperatorT = self.define_topk_node(inputs, outputs)
+
+        return topk_node
diff --git a/tico/utils/register_custom_op.py b/tico/utils/register_custom_op.py
@@ -19,6 +19,7 @@
 from torch.library import custom_op, register_fake
 
 from tico.utils.mx.mx_ops import _quantize_mx
+from tico.utils.validate_args_kwargs import TopKArgs
 
 # Note that an operator assumes input tensor has NHWC format.
 def CircleResizeNearestNeighbor():
@@ -662,6 +663,48 @@ def _(
         return input.new_empty(input.size())
 
 
+def CircleTopK():
+    @custom_op(
+        "circle_custom::top_k",
+        mutates_args=(),
+        schema="(Tensor input, int k) -> (Tensor, Tensor)",
+    )
+    def top_k(
+        input: torch.Tensor,
+        k: int,
+        dim: int = -1,
+        largest: bool = True,
+        sorted: bool = True,
+    ) -> tuple[torch.Tensor]:
+        args = TopKArgs(input, k, dim, largest, sorted)
+        topk_out_0, topk_out_1 = torch.ops.aten.topk.default(*args)
+        topk_out_1_int32 = torch.ops.aten.to.dtype(topk_out_1, dtype=torch.int32)
+
+        return (
+            topk_out_0,
+            topk_out_1_int32,
+        )
+
+    @register_fake("circle_custom::top_k")
+    def _(
+        input: FakeTensor,
+        k: int,
+        dim: int = -1,
+        largest: bool = True,
+        sorted: bool = True,
+    ) -> tuple[FakeTensor]:
+        assert dim == -1
+        assert largest is True
+        assert sorted is True
+        topk_out0, topk_out1 = torch.ops.aten.topk.default(input, k, dim)
+        # topk_out_1_int32 = torch.ops.aten.to.dtype(topk_out_1, dtype=torch.int32)
+
+        return (
+            topk_out0,
+            topk_out1.new_empty(size=topk_out1.size(), dtype=torch.int32),
+        )
+
+
 def CircleQuantizeMX():
     # This operator conducts fake-quantization of microscaling
     # NOTE Why using "quantize"_mx not "fake_quantize"_mx?
@@ -715,3 +758,4 @@ def RegisterOps():
     CircleAvgPool2D()
     CircleInstanceNorm()
     CircleQuantizeMX()
+    CircleTopK()
diff --git a/tico/utils/validate_args_kwargs.py b/tico/utils/validate_args_kwargs.py
@@ -1148,6 +1148,25 @@ class ToDtypeLayoutArgs:
     memory_format: Optional[torch.memory_format] = None
 
 
+@enforce_type
+@dataclass
+class TopKArgs:
+    """
+    topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)
+    """
+
+    input: torch.fx.Node
+    k: int
+    dim: int = -1
+    largest: bool = True
+    sorted: bool = True
+
+    def __post_init__(self):
+
+        assert self.largest is True, "Only support largest=True"
+        assert self.sorted is True, "Only support sorted=True"
+
+
 @enforce_type
 @dataclass
 class UnSqueezeArgs: