pytorch
diff --git a/‎CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/arm/test/ops/test_cat.py‎
Lines changed: 26 additions & 8 deletions b/‎backends/arm/test/ops/test_cat.py‎
Lines changed: 26 additions & 8 deletions
diff --git a/‎backends/arm/test/ops/test_expand.py‎
Lines changed: 25 additions & 9 deletions b/‎backends/arm/test/ops/test_expand.py‎
Lines changed: 25 additions & 9 deletions
diff --git a/‎backends/arm/test/ops/test_full.py‎
Lines changed: 0 additions & 4 deletions b/‎backends/arm/test/ops/test_full.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎backends/arm/test/runner_utils.py‎
Lines changed: 8 additions & 14 deletions b/‎backends/arm/test/runner_utils.py‎
Lines changed: 8 additions & 14 deletions
diff --git a/‎backends/qualcomm/CMakeLists.txt‎
Lines changed: 1 addition & 5 deletions b/‎backends/qualcomm/CMakeLists.txt‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎backends/qualcomm/runtime/QnnManager.cpp‎
Lines changed: 2 additions & 1 deletion b/‎backends/qualcomm/runtime/QnnManager.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/qualcomm/runtime/QnnManager.h‎
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/runtime/QnnManager.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/runtime/backends/QnnMemManager.h‎
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/runtime/backends/QnnMemManager.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl‎
Lines changed: 15 additions & 6 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl‎
Lines changed: 15 additions & 6 deletions
@@ -1,4 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -819,6 +820,14 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
     list(APPEND _executor_runner_libs quantized_ops_lib)
   endif()
 
+  if(EXECUTORCH_ENABLE_EVENT_TRACER)
+    if(EXECUTORCH_BUILD_DEVTOOLS)
+      list(APPEND _executor_runner_libs etdump flatccrt)
+    else()
+      message(SEND_ERROR "Use of 'EXECUTORCH_ENABLE_EVENT_TRACER' requires 'EXECUTORCH_BUILD_DEVTOOLS' to be enabled.")
+    endif()
+  endif()
+
   add_executable(executor_runner ${_executor_runner__srcs})
   if(CMAKE_BUILD_TYPE STREQUAL "Release")
     if(APPLE)
 
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -33,6 +33,8 @@ class Cat(torch.nn.Module):
                 ),
                 -1,
             ),
+            ((torch.randn(1, 2, 4, 4), torch.randn(1, 2, 4, 1)), 3),
+            ((torch.randn(1, 2, 4, 4), torch.randn(1, 2, 4, 4)), 0),
             ((torch.randn(2, 2, 4, 4), torch.randn(2, 2, 4, 1)), 3),
             (
                 (
@@ -47,8 +49,8 @@ class Cat(torch.nn.Module):
         def __init__(self):
             super().__init__()
 
-        def forward(self, tensors: tuple[torch.Tensor, ...], dim: int) -> torch.Tensor:
-            return torch.cat(tensors, dim=dim)
+        def forward(self, t: tuple[torch.Tensor, ...], dim: int) -> torch.Tensor:
+            return torch.cat(t, dim=dim)
 
     def _test_cat_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[tuple[torch.Tensor, ...], int]
@@ -134,22 +136,38 @@ def test_cat_tosa_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
         test_data = (operands, dim)
         self._test_cat_tosa_BI_pipeline(self.Cat(), test_data)
 
-    # Mismatch in provided number of inputs and model signature, MLETORCH 519
-    @parameterized.expand(Cat.test_parameters)
+    @parameterized.expand(Cat.test_parameters[:-3])
     @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
     def test_cat_u55_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
         test_data = (operands, dim)
         self._test_cat_ethosu_BI_pipeline(
             self.Cat(), common.get_u55_compile_spec(), test_data
         )
 
-    # Mismatch in provided number of inputs and model signature, MLETORCH 519
-    @parameterized.expand(Cat.test_parameters)
+    # MLETORCH-630 Cat does not work on FVP with batch>1
+    @parameterized.expand(Cat.test_parameters[-3:])
     @pytest.mark.corstone_fvp
     @conftest.expectedFailureOnFVP
+    def test_cat_u55_BI_xfails(self, operands: tuple[torch.Tensor, ...], dim: int):
+        test_data = (operands, dim)
+        self._test_cat_ethosu_BI_pipeline(
+            self.Cat(), common.get_u55_compile_spec(), test_data
+        )
+
+    @parameterized.expand(Cat.test_parameters[:-3])
+    @pytest.mark.corstone_fvp
     def test_cat_u85_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
         test_data = (operands, dim)
         self._test_cat_ethosu_BI_pipeline(
             self.Cat(), common.get_u85_compile_spec(), test_data
         )
+
+    # MLETORCH-630 Cat does not work on FVP with batch>1
+    @parameterized.expand(Cat.test_parameters[-3:])
+    @pytest.mark.corstone_fvp
+    @conftest.expectedFailureOnFVP
+    def test_cat_u85_BI_xfails(self, operands: tuple[torch.Tensor, ...], dim: int):
+        test_data = (operands, dim)
+        self._test_cat_ethosu_BI_pipeline(
+            self.Cat(), common.get_u85_compile_spec(), test_data
+        )
@@ -37,15 +37,17 @@ class Expand(torch.nn.Module):
         test_parameters = [
             (torch.rand(1), (2,)),
             (torch.randn(1, 4), (1, -1)),
-            (torch.rand(1, 1, 2, 2), (4, 3, -1, 2)),
             (torch.randn(1), (2, 2, 4)),
-            (torch.rand(3, 2, 4, 1), (-1, -1, -1, 3)),
+            (torch.randn(1, 1, 1, 5), (1, 4, -1, -1)),
             (torch.randn(1, 1, 192), (1, -1, -1)),
+            (torch.randn(1, 1), (1, 2, 2, 4)),
+            (torch.randn(1, 1), (2, 2, 2, 4)),
             (torch.randn(10, 1, 1, 97), (-1, 4, -1, -1)),
+            (torch.rand(1, 1, 2, 2), (4, 3, -1, 2)),
         ]
 
-        def forward(self, x: torch.Tensor, multiples: Sequence):
-            return x.expand(multiples)
+        def forward(self, x: torch.Tensor, m: Sequence):
+            return x.expand(m)
 
     def _test_expand_tosa_MI_pipeline(self, module: torch.nn.Module, test_data: Tuple):
         (
@@ -113,20 +115,34 @@ def test_expand_tosa_MI(self, test_input, multiples):
     def test_expand_tosa_BI(self, test_input, multiples):
         self._test_expand_tosa_BI_pipeline(self.Expand(), (test_input, multiples))
 
-    # Mismatch in provided number of inputs and model signature, MLETORCH 519
-    @parameterized.expand(Expand.test_parameters)
+    @parameterized.expand(Expand.test_parameters[:-3])
     @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
     def test_expand_u55_BI(self, test_input, multiples):
         self._test_expand_ethosu_BI_pipeline(
             common.get_u55_compile_spec(), self.Expand(), (test_input, multiples)
         )
 
-    # Mismatch in provided number of inputs and model signature, MLETORCH 519
-    @parameterized.expand(Expand.test_parameters)
+    # MLETORCH-629: Expand does not work on FVP with batch>1
+    @parameterized.expand(Expand.test_parameters[-3:])
     @pytest.mark.corstone_fvp
     @conftest.expectedFailureOnFVP
+    def test_expand_u55_BI_xfails(self, test_input, multiples):
+        self._test_expand_ethosu_BI_pipeline(
+            common.get_u55_compile_spec(), self.Expand(), (test_input, multiples)
+        )
+
+    @parameterized.expand(Expand.test_parameters[:-3])
+    @pytest.mark.corstone_fvp
     def test_expand_u85_BI(self, test_input, multiples):
         self._test_expand_ethosu_BI_pipeline(
             common.get_u85_compile_spec(), self.Expand(), (test_input, multiples)
         )
+
+    # MLETORCH-629: Expand does not work on FVP with batch>1
+    @parameterized.expand(Expand.test_parameters[-3:])
+    @pytest.mark.corstone_fvp
+    @conftest.expectedFailureOnFVP
+    def test_expand_u85_BI_xfails(self, test_input, multiples):
+        self._test_expand_ethosu_BI_pipeline(
+            common.get_u85_compile_spec(), self.Expand(), (test_input, multiples)
+        )
@@ -143,20 +143,16 @@ def test_full_tosa_MI(self, test_tensor: Tuple):
     def test_full_tosa_BI(self, test_tensor: Tuple):
         self._test_full_tosa_BI_pipeline(self.AddVariableFull(), test_tensor)
 
-    # Mismatch in provided number of inputs and model signature, MLETORCH 519
     @parameterized.expand(AddVariableFull.test_parameters)
     @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
     def test_full_u55_BI(self, test_tensor: Tuple):
         self._test_full_tosa_u55_pipeline(
             self.AddVariableFull(),
             test_tensor,
         )
 
-    # Mismatch in provided number of inputs and model signature, MLETORCH 519
     @parameterized.expand(AddVariableFull.test_parameters)
     @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
     def test_full_u85_BI(self, test_tensor: Tuple):
         self._test_full_tosa_u85_pipeline(
             self.AddVariableFull(),
 
@@ -65,16 +65,7 @@ def get_input_names(program: ExportedProgram) -> list[str]:
     Returns:
         A list of strings with the names of the model input.
     """
-    input_names = []
-
-    # E.g. bias and weights are 'placeholders' as well. This is used to
-    # get only the use inputs.
-    usr_inputs = program.graph_signature.user_inputs
-    for node in program.graph.nodes:
-        if node.op == "placeholder" and node.name in usr_inputs:
-            input_names.append(node.name)
-
-    return input_names
+    return [spec.arg.name for spec in program.graph_signature.input_specs]
 
 
 def get_input_quantization_params(
@@ -334,13 +325,16 @@ def run_corstone(
 
 
 def prep_data_for_save(
-    data: torch.Tensor,
+    data,
     input_name: str,
     quant_param: Optional[QuantizationParams] = None,
 ):
-    data_np = np.array(data.detach(), order="C").astype(
-        torch_to_numpy_dtype_dict[data.dtype]
-    )
+    if isinstance(data, torch.Tensor):
+        data_np = np.array(data.detach(), order="C").astype(
+            torch_to_numpy_dtype_dict[data.dtype]
+        )
+    else:
+        data_np = np.array(data)
     if quant_param is not None:
         assert quant_param.node_name in input_name, (
             f"The quantization params name '{quant_param.node_name}' does not "
 
@@ -1,4 +1,5 @@
 # Copyright (c) Qualcomm Innovation Center, Inc.
+# Copyright 2025 Arm Limited and/or its affiliates.
 # All rights reserved
 #
 # This source code is licensed under the BSD-style license found in the
@@ -199,11 +200,6 @@ target_link_libraries(
 #
 target_link_options_shared_lib(qnn_executorch_backend)
 
-#
-# add compile option
-#
-target_compile_options(executorch PUBLIC -DET_EVENT_TRACER_ENABLED)
-
 #
 # add sources
 #
 
@@ -154,8 +154,9 @@ Error QnnManager::RegisterMem(
     const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
   // Not enable shared buffer
-  if (!options_->shared_buffer())
+  if (!options_->shared_buffer()) {
     return Error::Internal;
+  }
 
   if (backend_params_ptr_->qnn_mem_manager_ptr_ == nullptr) {
     QNN_EXECUTORCH_LOG_WARN(
 
@@ -145,7 +145,7 @@ class QnnManager {
           {Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_8,
            executorch::aten::ScalarType::Byte},
           {Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_16,
-           executorch::aten::ScalarType::Bits16},
+           executorch::aten::ScalarType::UInt16},
   };
 };
 } // namespace qnn
 
@@ -77,7 +77,7 @@ class QnnMemManager {
            Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_16},
           {executorch::aten::ScalarType::Byte,
            Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_8},
-          {executorch::aten::ScalarType::Bits16,
+          {executorch::aten::ScalarType::UInt16,
            Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_16},
   };
 };
 
@@ -32,11 +32,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
 ${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
 ${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
 ${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
-${layout_declare_ubo(4, "ivec3", "out_limits")}
-${layout_declare_ubo(5, "ivec4", "in_sizes")}
-${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
-${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
-${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
+
+layout(push_constant) uniform restrict Block {
+  ivec4 out_limits;
+  ivec4 in_sizes;
+  ivec2 kernel_size;
+  ivec2 stride;
+  ivec2 padding;
+  ivec2 dilation;
+  ivec2 overlay_region;
+  int in_group_size;
+  int dummy_padding;
+  float out_min;
+  float out_max;
+};
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
@@ -127,7 +136,7 @@ void main() {
   const ivec3 out_pos = pos_shared[offset_pos_index(gl_LocalInvocationIndex)];
   for (int y = 0; y < BATCH_SIZE_Y; y++) {
     for (int x = 0; x < BATCH_SIZE_X; x++) {
-      if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits))) {
+      if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits.xyz))) {
         continue;
       }
       imageStore(t_out, ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), op(sum[y][x], out_min, out_max));
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`# Copyright (c) Qualcomm Innovation Center, Inc.`
	`2`	`+# Copyright 2025 Arm Limited and/or its affiliates.`
`2`	`3`	`# All rights reserved`
`3`	`4`	`#`
`4`	`5`	`# This source code is licensed under the BSD-style license found in the`
`@@ -199,11 +200,6 @@ target_link_libraries(`
`199`	`200`	`#`
`200`	`201`	`target_link_options_shared_lib(qnn_executorch_backend)`
`201`	`202`
`202`		`-#`
`203`		`-# add compile option`
`204`		`-#`
`205`		`-target_compile_options(executorch PUBLIC -DET_EVENT_TRACER_ENABLED)`
`206`		`-`
`207`	`203`	`#`
`208`	`204`	`# add sources`
`209`	`205`	`#`