pytorch
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_div_tensor_mode.py‎
Lines changed: 84 additions & 0 deletions b/‎backends/arm/_passes/decompose_div_tensor_mode.py‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/debug/schema.py‎
Lines changed: 34 additions & 17 deletions b/‎backends/arm/debug/schema.py‎
Lines changed: 34 additions & 17 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/node_visitor.py‎
Lines changed: 14 additions & 7 deletions b/‎backends/arm/operators/node_visitor.py‎
Lines changed: 14 additions & 7 deletions
@@ -97,7 +97,7 @@ test_model() {
     bash examples/models/llava/install_requirements.sh
     STRICT="--no-strict"
   fi
-  if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
+  if [[ "${MODEL_NAME}" == "qwen2_5_1_5b" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
       # Test export_llm script: python3 -m extension.llm.export.export_llm.
 
@@ -176,7 +176,7 @@ jobs:
           - model: phi_4_mini
             backend: portable
             runner: linux.arm64.m7g.4xlarge
-          - model: qwen2_5
+          - model: qwen2_5_1_5b
             backend: portable
             runner: linux.arm64.2xlarge
           - model: llama3_2_vision_encoder
 
@@ -52,7 +52,7 @@ To get started you can:
 
 - Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
 - Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
-- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
+- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [Llava](examples/models/llava/README.md), [Voxtral](examples/models/voxtral/README.md), and [LFM2](examples/models/lfm2/README.md).
 
 ## Feedback and Engagement
 
 
@@ -37,6 +37,7 @@
 from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass  # noqa
 from .decompose_cumsum_pass import DecomposeCumsumPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
+from .decompose_div_tensor_mode import DecomposeDivTensorModePass  # noqa
 from .decompose_elu_pass import DecomposeEluPass  # noqa
 from .decompose_embedding_pass import DecomposeEmbeddingPass  # noqa  # noqa
 from .decompose_expm1_pass import DecomposeExpm1Pass  # noqa
 
@@ -42,6 +42,7 @@
     DecomposeCosineSimilarityPass,
     DecomposeCumsumPass,
     DecomposeDivPass,
+    DecomposeDivTensorModePass,
     DecomposeEluPass,
     DecomposeEmbeddingPass,
     DecomposeExpm1Pass,
@@ -211,6 +212,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
             DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
         )
         self.add_pass(DecomposeNotEqualPass())
+        self.add_pass(DecomposeDivTensorModePass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeSoftmaxPass())
         self.add_pass(DecomposeGeluPass())
@@ -289,6 +291,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeCosineSimilarityPass())
         self.add_pass(DecomposeGluPass())
+        self.add_pass(DecomposeDivTensorModePass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeLinearVectorNormPass())
 
@@ -0,0 +1,84 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+edge_div_mode_ops = (exir_ops.edge.aten.div.Tensor_mode,)
+aten_div_mode_ops = (torch.ops.aten.div.Tensor_mode,)
+
+edge_unary = {
+    "div": exir_ops.edge.aten.div.Tensor,
+    "floor": exir_ops.edge.aten.floor.default,
+    "ceil": exir_ops.edge.aten.ceil.default,
+    "full": exir_ops.edge.aten.full.default,
+    "lt": exir_ops.edge.aten.lt.Tensor,
+    "where": exir_ops.edge.aten.where.self,
+}
+
+aten_unary = {
+    "div": torch.ops.aten.div.Tensor,
+    "floor": torch.ops.aten.floor.default,
+    "ceil": torch.ops.aten.ceil.default,
+    "full": torch.ops.aten.full.default,
+    "lt": torch.ops.aten.lt.Tensor,
+    "where": torch.ops.aten.where.self,
+}
+
+
+def _get_opset(op):
+    if op in edge_div_mode_ops:
+        return edge_unary
+    if op in aten_div_mode_ops:
+        return aten_unary
+    raise RuntimeError(f"div.Tensor_mode not supported for op {op}")
+
+
+class DecomposeDivTensorModePass(ExportPass):
+    """
+    Rewrites aten.div.Tensor_mode into
+
+    rounding_mode=None  -> div(a, b)
+    rounding_mode='floor' -> floor(div(a, b))
+    rounding_mode='trunc' -> where(div(a,b) < 0, ceil(div(a,b)), floor(div(a,b)))
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in (edge_div_mode_ops + aten_div_mode_ops):
+            return super().call_operator(op, args, kwargs, meta)
+
+        opset = _get_opset(op)
+
+        a, b = args[0], args[1]
+        rounding_mode = kwargs.get("rounding_mode", None)
+        if rounding_mode is None and len(args) > 2:
+            rounding_mode = args[2]
+
+        q = super().call_operator(opset["div"], (a, b), {}, meta)
+
+        if rounding_mode is None:
+            return q
+
+        if rounding_mode == "floor":
+            return super().call_operator(opset["floor"], (q,), {}, meta)
+
+        if rounding_mode == "trunc":
+            zero = super().call_operator(
+                opset["full"],
+                args=((1,) * len(meta["val"].size()), 0.0),
+                kwargs={"dtype": torch.float32},
+                meta=meta,
+            )
+            lt0 = self.call_operator(opset["lt"], (q, zero), {}, meta)
+            ceilq = self.call_operator(opset["ceil"], (q,), {}, meta)
+            floorq = self.call_operator(opset["floor"], (q,), {}, meta)
+            return self.call_operator(opset["where"], (lt0, ceilq, floorq), {}, meta)
+
+        raise RuntimeError(
+            f"Unsupported rounding_mode for div.Tensor_mode: {rounding_mode!r}"
+        )
@@ -23,6 +23,7 @@
 class ArmCompileSpecBuilder:
     class DebugMode(Enum):
         JSON = 1
+        TOSA = 2
 
     def __init__(self):
         self.compile_spec: List[CompileSpec] = []
 
@@ -8,11 +8,13 @@
 import json
 
 from dataclasses import asdict, dataclass
-from typing import Any
+from typing import Any, Optional
 
 import serializer.tosa_serializer as ts  # type: ignore
 import torch
 
+from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder
+
 from torch.fx.traceback import NodeSource
 
 
@@ -97,37 +99,52 @@ def from_node(node: torch.fx.Node) -> TorchDebugSchema:
 class DebugSchema:
     event_id: int
     aten_info: ATenDebugSchema
-    tosa_info: TosaDebugSchema
+    tosa_info: Optional[TosaDebugSchema]
     torch_info: TorchDebugSchema
 
+    def to_dict(self) -> dict[str, Any]:
+        output = asdict(self)
+
+        if self.tosa_info is None:
+            output.pop("tosa_info")
+
+        return output
+
 
 class DebugHook:
-    def __init__(self) -> None:
+    def __init__(self, debug_mode: ArmCompileSpecBuilder.DebugMode) -> None:
         self._debug_events: list[DebugSchema] = []
         self.__op_id_to_name = {}
+        self.mode = debug_mode
 
         # Build up a mapping from TOSA 1.0 operator IDs to their names
         for name, val in vars(ts.Op).items():
             self.__op_id_to_name[val] = name
 
-    def add(self, node: torch.fx.Node, tosa_op: Any, tosa_op_id: int) -> None:
-        tosa_debug_info = TosaDebugSchema(
-            node_name=str(tosa_op),
-            operator_name=self.__op_id_to_name[tosa_op_id],
-            operator_id=tosa_op_id,
-        )
+    def add(self, node: torch.fx.Node, tosa_op: Any, tosa_op_id: int) -> DebugSchema:
+        tosa_debug_info = None
+
+        # If the debug data is being embedded into the TOSA flatbuffer
+        # do not collect TOSADebugSchema data, it's redundent
+        if self.mode != ArmCompileSpecBuilder.DebugMode.TOSA:
+            tosa_debug_info = TosaDebugSchema(
+                node_name=str(tosa_op),
+                operator_name=self.__op_id_to_name[tosa_op_id],
+                operator_id=tosa_op_id,
+            )
 
         aten_debug_info = ATenDebugSchema.from_node(node)
         torch_debug_info = TorchDebugSchema.from_node(node)
 
-        self._debug_events.append(
-            DebugSchema(
-                event_id=len(self._debug_events),
-                aten_info=aten_debug_info,
-                tosa_info=tosa_debug_info,
-                torch_info=torch_debug_info,
-            )
+        debug_info = DebugSchema(
+            event_id=len(self._debug_events),
+            aten_info=aten_debug_info,
+            tosa_info=tosa_debug_info,
+            torch_info=torch_debug_info,
         )
+        self._debug_events.append(debug_info)
+
+        return debug_info
 
     def serialize(self) -> str:
-        return json.dumps([asdict(event) for event in self._debug_events], indent=4)
+        return json.dumps([event.to_dict() for event in self._debug_events], indent=4)
@@ -176,6 +176,7 @@ def is_node_supported(
             exir_ops.edge.aten.hardtanh.default,
             exir_ops.edge.aten.hardswish.default,
             exir_ops.edge.aten.div.Tensor,
+            exir_ops.edge.aten.div.Tensor_mode,
             exir_ops.edge.aten.eq.Tensor,
             exir_ops.edge.aten.eq.Scalar,
             exir_ops.edge.aten.erf.default,
 
@@ -5,10 +5,12 @@
 
 # pyre-unsafe
 
+import json
 from typing import Any, Dict, List, Optional
 
 import torch
 
+from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder
 from executorch.backends.arm.debug.schema import DebugHook
 from executorch.backends.arm.tosa.mapping import TosaArg
 from executorch.backends.arm.tosa.specification import TosaSpecification
@@ -49,20 +51,25 @@ def _serialize_operator(
         outputs: List[str],
         attributes: Optional[Any] = None,
     ) -> None:
+        op_location = ""
+        if self.debug_hook:
+            debug_info = self.debug_hook.add(
+                node,
+                tosa_op=outputs[0],
+                tosa_op_id=tosa_op,
+            )
+
+            if self.debug_hook.mode == ArmCompileSpecBuilder.DebugMode.TOSA:
+                op_location = json.dumps(debug_info.to_dict())
+
         tosa_graph.addOperator(
             tosa_op,
             inputs=inputs,
             outputs=outputs,
             attributes=attributes,
+            location=op_location,
         )
 
-        if self.debug_hook:
-            self.debug_hook.add(
-                node,
-                tosa_op=outputs[0],
-                tosa_op_id=tosa_op,
-            )
-
     def define_node(
         self,
         node: torch.fx.Node,