pytorch
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/debug/schema.py‎
Lines changed: 34 additions & 17 deletions b/‎backends/arm/debug/schema.py‎
Lines changed: 34 additions & 17 deletions
diff --git a/‎backends/arm/operator_support/to_copy_support.py‎
Lines changed: 67 additions & 29 deletions b/‎backends/arm/operator_support/to_copy_support.py‎
Lines changed: 67 additions & 29 deletions
diff --git a/‎backends/arm/operators/node_visitor.py‎
Lines changed: 14 additions & 7 deletions b/‎backends/arm/operators/node_visitor.py‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎backends/arm/scripts/run_fvp.sh‎
Lines changed: 17 additions & 2 deletions b/‎backends/arm/scripts/run_fvp.sh‎
Lines changed: 17 additions & 2 deletions
@@ -97,7 +97,7 @@ test_model() {
     bash examples/models/llava/install_requirements.sh
     STRICT="--no-strict"
   fi
-  if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
+  if [[ "${MODEL_NAME}" == "qwen2_5_1_5b" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
       # Test export_llm script: python3 -m extension.llm.export.export_llm.
 
@@ -176,7 +176,7 @@ jobs:
           - model: phi_4_mini
             backend: portable
             runner: linux.arm64.m7g.4xlarge
-          - model: qwen2_5
+          - model: qwen2_5_1_5b
             backend: portable
             runner: linux.arm64.2xlarge
           - model: llama3_2_vision_encoder
 
@@ -52,7 +52,7 @@ To get started you can:
 
 - Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
 - Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
-- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
+- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [Llava](examples/models/llava/README.md), [Voxtral](examples/models/voxtral/README.md), and [LFM2](examples/models/lfm2/README.md).
 
 ## Feedback and Engagement
 
 
@@ -23,6 +23,7 @@
 class ArmCompileSpecBuilder:
     class DebugMode(Enum):
         JSON = 1
+        TOSA = 2
 
     def __init__(self):
         self.compile_spec: List[CompileSpec] = []
 
@@ -8,11 +8,13 @@
 import json
 
 from dataclasses import asdict, dataclass
-from typing import Any
+from typing import Any, Optional
 
 import serializer.tosa_serializer as ts  # type: ignore
 import torch
 
+from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder
+
 from torch.fx.traceback import NodeSource
 
 
@@ -97,37 +99,52 @@ def from_node(node: torch.fx.Node) -> TorchDebugSchema:
 class DebugSchema:
     event_id: int
     aten_info: ATenDebugSchema
-    tosa_info: TosaDebugSchema
+    tosa_info: Optional[TosaDebugSchema]
     torch_info: TorchDebugSchema
 
+    def to_dict(self) -> dict[str, Any]:
+        output = asdict(self)
+
+        if self.tosa_info is None:
+            output.pop("tosa_info")
+
+        return output
+
 
 class DebugHook:
-    def __init__(self) -> None:
+    def __init__(self, debug_mode: ArmCompileSpecBuilder.DebugMode) -> None:
         self._debug_events: list[DebugSchema] = []
         self.__op_id_to_name = {}
+        self.mode = debug_mode
 
         # Build up a mapping from TOSA 1.0 operator IDs to their names
         for name, val in vars(ts.Op).items():
             self.__op_id_to_name[val] = name
 
-    def add(self, node: torch.fx.Node, tosa_op: Any, tosa_op_id: int) -> None:
-        tosa_debug_info = TosaDebugSchema(
-            node_name=str(tosa_op),
-            operator_name=self.__op_id_to_name[tosa_op_id],
-            operator_id=tosa_op_id,
-        )
+    def add(self, node: torch.fx.Node, tosa_op: Any, tosa_op_id: int) -> DebugSchema:
+        tosa_debug_info = None
+
+        # If the debug data is being embedded into the TOSA flatbuffer
+        # do not collect TOSADebugSchema data, it's redundent
+        if self.mode != ArmCompileSpecBuilder.DebugMode.TOSA:
+            tosa_debug_info = TosaDebugSchema(
+                node_name=str(tosa_op),
+                operator_name=self.__op_id_to_name[tosa_op_id],
+                operator_id=tosa_op_id,
+            )
 
         aten_debug_info = ATenDebugSchema.from_node(node)
         torch_debug_info = TorchDebugSchema.from_node(node)
 
-        self._debug_events.append(
-            DebugSchema(
-                event_id=len(self._debug_events),
-                aten_info=aten_debug_info,
-                tosa_info=tosa_debug_info,
-                torch_info=torch_debug_info,
-            )
+        debug_info = DebugSchema(
+            event_id=len(self._debug_events),
+            aten_info=aten_debug_info,
+            tosa_info=tosa_debug_info,
+            torch_info=torch_debug_info,
         )
+        self._debug_events.append(debug_info)
+
+        return debug_info
 
     def serialize(self) -> str:
-        return json.dumps([asdict(event) for event in self._debug_events], indent=4)
+        return json.dumps([event.to_dict() for event in self._debug_events], indent=4)
@@ -20,6 +20,8 @@
 
 logger = logging.getLogger(__name__)
 
+SupportedTypeDict = dict[torch.dtype, list[torch.dtype]]
+
 
 @register_tosa_support_check
 class ToCopySupported(SupportedTOSAOperatorCheck):
@@ -33,8 +35,6 @@ class ToCopySupported(SupportedTOSAOperatorCheck):
         TosaSpecification.create_from_string("TOSA-1.0+FP"),
     ]
 
-    SupportedTypeDict = dict[torch.dtype, list[torch.dtype]]
-
     @staticmethod
     def _merge_supported_types(
         # pyre-ignore[11]
@@ -53,11 +53,22 @@ def _merge_supported_types(
         torch.int8: [torch.bool, torch.int16, torch.int32],
         torch.int16: [torch.bool, torch.int8, torch.int32],
         torch.int32: [torch.bool, torch.int8, torch.int16],
+        torch.int64: [torch.bool, torch.int8, torch.int16, torch.int32],
     }
     SUPPORTED_FLOAT_TYPES: SupportedTypeDict = {
         torch.int8: [torch.float16, torch.bfloat16, torch.float32],
         torch.int16: [torch.float16, torch.bfloat16, torch.float32],
         torch.int32: [torch.float16, torch.bfloat16, torch.float32],
+        # INT64 inputs to casts *should* be ok, since they should be rejected by
+        # CheckInt64InputsAndOutputs if the cast can't be done AOT.
+        torch.int64: [
+            torch.int8,
+            torch.int16,
+            torch.int32,
+            torch.float16,
+            torch.bfloat16,
+            torch.float32,
+        ],
         torch.bfloat16: [torch.int8, torch.int16, torch.int32, torch.float32],
         torch.float16: [torch.int8, torch.int16, torch.int32, torch.float32],
         torch.float32: [
@@ -71,29 +82,42 @@ def _merge_supported_types(
     ALL_SUPPORTED_TYPES = _merge_supported_types(
         SUPPORTED_INT_TYPES, SUPPORTED_FLOAT_TYPES
     )
-    POSSIBLE_TYPE_CONVERSIONS = {torch.int64: torch.int32}
 
     def is_node_tosa_supported(
         self, node: fx.Node, tosa_spec: TosaSpecification
     ) -> bool:
-        assert node.target in self.targets
-
-        supported_dtypes = (
-            self.ALL_SUPPORTED_TYPES
-            if tosa_spec.support_float()
-            else self.SUPPORTED_INT_TYPES
-        )
-        # Take into account possible type conversions
-        supported_dtypes.update(
-            (k, supported_dtypes[v])
-            for k, v in self.POSSIBLE_TYPE_CONVERSIONS.items()
-            if v in supported_dtypes
-        )
 
-        # Check input type
-        assert len(node.all_input_nodes) == 1
+        supported_dtypes: SupportedTypeDict = {}
+        if tosa_spec.support_integer():
+            supported_dtypes = self._merge_supported_types(
+                self.SUPPORTED_INT_TYPES, supported_dtypes
+            )
+        if tosa_spec.support_float():
+            supported_dtypes = self._merge_supported_types(
+                self.SUPPORTED_FLOAT_TYPES, supported_dtypes
+            )
+
+        if len(node.all_input_nodes) != 1:
+            self.reporter.report_reject(
+                node,
+                (
+                    "Expected exactly one input node, "
+                    f"got {len(node.all_input_nodes)} for {node.target}."
+                ),
+            )
+            return False
         input_val = node.all_input_nodes[0].meta["val"]
-        assert isinstance(input_val, torch._subclasses.FakeTensor)
+        if not isinstance(input_val, torch._subclasses.FakeTensor):
+            self.reporter.report_reject(
+                node,
+                (
+                    "Invalid or missing meta: expected FakeTensor input, got "
+                    f"{type(input_val).__name__} for {node.target}."
+                ),
+            )
+            return False
+
+        # Check input type
         input_dtype = input_val.dtype
         if input_dtype not in supported_dtypes:
             self.reporter.report_reject(
@@ -104,14 +128,24 @@ def is_node_tosa_supported(
 
         # Check output type
         output_val = node.meta["val"]
-        assert isinstance(output_val, torch._subclasses.FakeTensor)
+        if not isinstance(output_val, torch._subclasses.FakeTensor):
+            self.reporter.report_reject(
+                node,
+                (
+                    "Invalid or missing meta: expected FakeTensor output, got "
+                    f"{type(output_val).__name__} for {node.target}."
+                ),
+            )
+            return False
         if output_val.dtype not in supported_dtypes[input_dtype]:
             self.reporter.report_reject(
                 node,
-                f"Output dtype {output_val.dtype} is not supported in "
-                f"{node.target} for input dtype {input_dtype}. "
-                f"Supported output types: "
-                f"{''.join(str(t) for t in supported_dtypes[input_dtype])}",
+                (
+                    f"Output dtype {output_val.dtype} is not supported in "
+                    f"{node.target} for input dtype {input_dtype}. "
+                    f"Supported output types: "
+                    f"{', '.join(str(t) for t in supported_dtypes[input_dtype])}"
+                ),
             )
             return False
 
@@ -120,20 +154,24 @@ def is_node_tosa_supported(
             if node.kwargs["memory_format"] in (torch.preserve_format,):
                 self.reporter.report_reject(
                     node,
-                    f"Argument 'memory_format' is not supported for "
-                    f"{node.target} right now.",
+                    (
+                        "Argument 'memory_format' is not supported for "
+                        f"{node.target} right now."
+                    ),
                 )
                 return False
 
         # Check dim_order (to_dim_order_copy)
         if "dim_order" in node.kwargs:
             dim_order = node.kwargs["dim_order"]
             # pyre-ignore[6]
-            if dim_order != list(range(len(dim_order))):  # type: ignore[arg-type]
+            if dim_order is not None and dim_order != list(range(len(dim_order))):  # type: ignore[arg-type]
                 self.reporter.report_reject(
                     node,
-                    f"Argument {dim_order=} is not supported for "
-                    f"{node.target} right now.",
+                    (
+                        f"Argument {dim_order=} is not supported for "
+                        f"{node.target} right now."
+                    ),
                 )
                 return False
 
 
@@ -5,10 +5,12 @@
 
 # pyre-unsafe
 
+import json
 from typing import Any, Dict, List, Optional
 
 import torch
 
+from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder
 from executorch.backends.arm.debug.schema import DebugHook
 from executorch.backends.arm.tosa.mapping import TosaArg
 from executorch.backends.arm.tosa.specification import TosaSpecification
@@ -49,20 +51,25 @@ def _serialize_operator(
         outputs: List[str],
         attributes: Optional[Any] = None,
     ) -> None:
+        op_location = ""
+        if self.debug_hook:
+            debug_info = self.debug_hook.add(
+                node,
+                tosa_op=outputs[0],
+                tosa_op_id=tosa_op,
+            )
+
+            if self.debug_hook.mode == ArmCompileSpecBuilder.DebugMode.TOSA:
+                op_location = json.dumps(debug_info.to_dict())
+
         tosa_graph.addOperator(
             tosa_op,
             inputs=inputs,
             outputs=outputs,
             attributes=attributes,
+            location=op_location,
         )
 
-        if self.debug_hook:
-            self.debug_hook.add(
-                node,
-                tosa_op=outputs[0],
-                tosa_op_id=tosa_op,
-            )
-
     def define_node(
         self,
         node: torch.fx.Node,
 
@@ -92,7 +92,7 @@ if [[ ${target} == *"ethos-u55"*  ]]; then
         -C mps3_board.uart0.shutdown_on_eot=1               \
         -a "${elf_file}"                                    \
         ${data_file}                                        \
-        --timelimit ${timeout} 2>&1 | tee ${log_file} || true # seconds
+        --timelimit ${timeout} 2>&1 | sed 's/\r$//' | tee ${log_file} || true # seconds
     echo "[${BASH_SOURCE[0]}] Simulation complete, $?"
 elif [[ ${target} == *"ethos-u85"*  ]]; then
     ${nobuf} ${fvp_model}                                   \
@@ -104,13 +104,28 @@ elif [[ ${target} == *"ethos-u85"*  ]]; then
         -C mps4_board.uart0.shutdown_on_eot=1               \
         -a "${elf_file}"                                    \
         ${data_file}                                        \
-        --timelimit ${timeout} 2>&1 | tee ${log_file} || true # seconds
+        --timelimit ${timeout} 2>&1 | sed 's/\r$//' | tee ${log_file} || true # seconds
     echo "[${BASH_SOURCE[0]}] Simulation complete, $?"
 else
     echo "Running ${elf_file} for ${target} is not supported"
     exit 1
 fi
 
+echo "Checking for a etdump in log"
+! grep "#\[RUN THIS\]" ${log_file} >/dev/null
+if [ $? != 0 ]; then
+    echo "Found ETDump in log!"
+    echo "#!/bin/sh" > etdump_script.sh
+    sed -n '/^#\[RUN THIS\]$/,/^#\[END\]$/p' ${log_file} >> etdump_script.sh
+    # You can run etdump_script.sh if you do
+    # $ chmod a+x etdump_script.sh
+    # $ ./etdump_script.sh
+    # But lets not trust the script as a bad patch would run bad code on your machine
+    grep ">etdump.bin" etdump_script.sh | cut -d\" -f2- | cut -d\" -f1 >etdump.base64
+    base64 -d etdump.base64 >etdump.bin
+    python3 -m devtools.inspector.inspector_cli --etdump_path etdump.bin  --source_time_scale cycles --target_time_scale cycles
+fi
+
 echo "Checking for problems in log:"
 ! grep -E "^(F|E|\\[critical\\]|Hard fault.|Info: Simulation is stopping. Reason: CPU time has been exceeded.).*$" ${log_file}
 if [ $? != 0 ]; then