pytorch
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_add_sub_alpha_pass.py‎
Lines changed: 94 additions & 0 deletions b/‎backends/arm/_passes/decompose_add_sub_alpha_pass.py‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎backends/arm/runtime/VGFSetup.cpp‎
Lines changed: 22 additions & 9 deletions b/‎backends/arm/runtime/VGFSetup.cpp‎
Lines changed: 22 additions & 9 deletions
diff --git a/‎backends/arm/test/ops/test_add.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/test/ops/test_add.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/test/ops/test_addmm.py‎
Lines changed: 0 additions & 2 deletions b/‎backends/arm/test/ops/test_addmm.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎backends/arm/test/ops/test_amax.py‎
Lines changed: 0 additions & 4 deletions b/‎backends/arm/test/ops/test_amax.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎backends/arm/test/ops/test_amin.py‎
Lines changed: 0 additions & 4 deletions b/‎backends/arm/test/ops/test_amin.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎backends/arm/test/ops/test_any.py‎
Lines changed: 0 additions & 3 deletions b/‎backends/arm/test/ops/test_any.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎backends/arm/test/ops/test_mean_dim.py‎
Lines changed: 0 additions & 5 deletions b/‎backends/arm/test/ops/test_mean_dim.py‎
Lines changed: 0 additions & 5 deletions
@@ -27,6 +27,7 @@
 from .convert_to_clamp import ConvertToClampPass  # noqa
 from .decompose_acosh_pass import DecomposeAcoshPass  # noqa
 from .decompose_adaptive_avg_pool2d_pass import DecomposeAdaptiveAvgPool2dPass  # noqa
+from .decompose_add_sub_alpha_pass import DecomposeAddSubAlphaPass  # noqa
 from .decompose_addmm_pass import DecomposeAddmmPass  # noqa
 from .decompose_asin_and_acos_pass import DecomposeAsinAndAcosPass  # noqa
 from .decompose_asinh_pass import DecomposeAsinhPass  # noqa
 
@@ -36,6 +36,7 @@
     DecomposeAcoshPass,
     DecomposeAdaptiveAvgPool2dPass,
     DecomposeAddmmPass,
+    DecomposeAddSubAlphaPass,
     DecomposeAsinAndAcosPass,
     DecomposeAsinhPass,
     DecomposeAtanhPass,
@@ -262,6 +263,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         )
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeDivPass())
+        self.add_pass(DecomposeAddSubAlphaPass())
         self.add_pass(DecomposeSoftmaxPass())
         self.add_pass(DecomposeGeluPass())
         self.add_pass(ConvertFullLikeToFullPass())
@@ -334,6 +336,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeSignPass())
         self.add_pass(DecomposeAddmmPass())
         self.add_pass(DecomposeDivTensorModePass())
+        self.add_pass(DecomposeAddSubAlphaPass())
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeGroupNormPass())
 
@@ -0,0 +1,94 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+import numbers
+from typing import Set, Type
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+
+_ADD_OPS = (
+    exir_ops.edge.aten.add.Tensor,
+    torch.ops.aten.add.Tensor,
+)
+
+_SUB_OPS = (
+    exir_ops.edge.aten.sub.Tensor,
+    torch.ops.aten.sub.Tensor,
+)
+
+
+def _get_ops(op):
+    if op in _ADD_OPS:
+        if op is exir_ops.edge.aten.add.Tensor:
+            return (
+                exir_ops.edge.aten.mul.Tensor,
+                exir_ops.edge.aten.full.default,
+                exir_ops.edge.aten.add.Tensor,
+            )
+        return (
+            torch.ops.aten.mul.Tensor,
+            torch.ops.aten.full.default,
+            torch.ops.aten.add.Tensor,
+        )
+    if op in _SUB_OPS:
+        if op is exir_ops.edge.aten.sub.Tensor:
+            return (
+                exir_ops.edge.aten.mul.Tensor,
+                exir_ops.edge.aten.full.default,
+                exir_ops.edge.aten.sub.Tensor,
+            )
+        return (
+            torch.ops.aten.mul.Tensor,
+            torch.ops.aten.full.default,
+            torch.ops.aten.sub.Tensor,
+        )
+    raise RuntimeError(f"Unsupported operator {op}")
+
+
+def _should_decompose(alpha) -> bool:
+    if isinstance(alpha, numbers.Number):
+        return alpha != 1
+    return False
+
+
+class DecomposeAddSubAlphaPass(ArmPass):
+    """Rewrite add/sub with alpha into a mul followed by add/sub."""
+
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
+    def call_operator(self, op, args, kwargs, meta, updated: bool | None = False):
+        if op not in _ADD_OPS + _SUB_OPS:
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        alpha = kwargs.get("alpha", 1)
+        if not _should_decompose(alpha):
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        mul_op, full_op, binary_op = _get_ops(op)
+        lhs, rhs = args
+
+        alpha_full = super().call_operator(
+            full_op, ((1,), float(alpha)), {}, meta, updated=True
+        )
+        scaled_rhs = super().call_operator(
+            mul_op,
+            (rhs, alpha_full),
+            {},
+            meta,
+            updated=True,
+        )
+        return super().call_operator(
+            binary_op,
+            (lhs, scaled_rhs),
+            {},
+            meta,
+            updated=True,
+        )
@@ -24,6 +24,13 @@ namespace vgf {
 /* static function to map format to byte count */
 static uint32_t get_format_size(VkFormat format);
 
+// SPV_ARM_tensor does not support rank-0 representations according to the spec.
+// Use an unsqueezed dimension when the resource table contains an empty
+// shape. Tensors are output as rank 0 when copied back from the vgf backend.
+namespace {
+constexpr int64_t kScalarSentinelDimension = 1;
+}
+
 // Debug function to inspect memory properties
 static string memory_flags_to_string(VkMemoryPropertyFlags flags) {
   if (flags == 0)
@@ -264,7 +271,11 @@ static void debug_print_resources(
             the_shape.size(),
             the_stride.size());
         for (int j = 0; j < the_shape.size(); j++) {
-          ET_LOG(Info, "      %d: dim %ld", j, the_shape[j]);
+          ET_LOG(
+              Info,
+              "      %d: dim %lld",
+              j,
+              static_cast<long long>(the_shape[j]));
         }
         // Allocate a tensor with bound memory
         break;
@@ -387,6 +398,7 @@ bool VgfRepr::process_vgf(const char* vgf_data, ArrayRef<CompileSpec> specs) {
     // Get tensor shape and strides
     auto shape = resource_decoder->getTensorShape(i);
     auto stride = resource_decoder->getTensorStride(i);
+    const auto shape_size = shape.size();
 
     switch (resource_decoder->getCategory(i)) {
       case vgflib::ResourceCategory::INPUT:
@@ -409,9 +421,9 @@ bool VgfRepr::process_vgf(const char* vgf_data, ArrayRef<CompileSpec> specs) {
         result = allocate_tensor(
             vk_physical,
             vk_device,
-            vgflib::ToVkFormat(resource_decoder->getVkFormat(i)),
-            static_cast<uint32_t>(shape.size()),
-            shape.begin(),
+            resource_format,
+            shape_size == 0 ? 1 : static_cast<uint32_t>(shape_size),
+            shape_size == 0 ? &kScalarSentinelDimension : shape.begin(),
             static_cast<uint32_t>(stride.size()),
             stride.begin(),
             &tensor_description,
@@ -422,8 +434,7 @@ bool VgfRepr::process_vgf(const char* vgf_data, ArrayRef<CompileSpec> specs) {
           ET_LOG(Error, "Failed to allocate tensor for VGF resource %d", i);
           return false;
         }
-        size_t e_size = get_format_size(
-            vgflib::ToVkFormat(resource_decoder->getVkFormat(i)));
+        size_t e_size = get_format_size(resource_format);
         if (0 == e_size) {
           ET_LOG(Error, "failed to get element size of VkFormat");
           return false;
@@ -449,9 +460,11 @@ bool VgfRepr::process_vgf(const char* vgf_data, ArrayRef<CompileSpec> specs) {
             .sType = VK_STRUCTURE_TYPE_TENSOR_DESCRIPTION_ARM,
             .pNext = nullptr,
             .tiling = VK_TENSOR_TILING_LINEAR_ARM,
-            .format = vgflib::ToVkFormat(resource_decoder->getVkFormat(i)),
-            .dimensionCount = static_cast<uint32_t>(shape.size()),
-            .pDimensions = shape.begin(),
+            .format = resource_format,
+            .dimensionCount =
+                shape_size == 0 ? 1 : static_cast<uint32_t>(shape_size),
+            .pDimensions =
+                shape_size == 0 ? &kScalarSentinelDimension : shape.begin(),
             // Note: stride_data of 0's causes size==0, null means stride==size
             .pStrides = (0 == stride.size() ? nullptr : stride.begin()),
             .usage = VK_TENSOR_USAGE_DATA_GRAPH_BIT_ARM,
 
@@ -78,7 +78,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
 
 class Add3(torch.nn.Module):
     def forward(self, x: torch.Tensor, y: torch.Tensor):
-        return x + y
+        return torch.add(x, y, alpha=1.5)
 
     test_data: list[input_t2] = {
         "3d_randn_diff_rank": lambda: (torch.randn(1, 4, 5), torch.randn(4, 1)),
 
@@ -167,7 +167,6 @@ def test_addmm_u85_INT(test_data: Tuple):
 
 @common.parametrize("test_data", test_data_suite)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_addmm_vgf_FP(test_data: input_t1):
     pipeline = VgfPipeline[input_t1](
         Addmm(),
@@ -181,7 +180,6 @@ def test_addmm_vgf_FP(test_data: input_t1):
 
 @common.parametrize("test_data", test_data_suite)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_addmm_vgf_INT(test_data: input_t1):
     pipeline = VgfPipeline[input_t1](
         Addmm(),
 
@@ -139,7 +139,6 @@ def test_max_dim_tosa_FP_not_delegated():
 
 @common.parametrize("test_data", Amax.test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_amax_vgf_FP(test_data: Amax.input_t):
     data, dim, keep_dims = test_data()
     module = Amax(dim, keep_dims)
@@ -154,7 +153,6 @@ def test_amax_vgf_FP(test_data: Amax.input_t):
 
 @common.parametrize("test_data", Amax.test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_amax_vgf_INT(test_data: Amax.input_t):
     data, dim, keep_dims = test_data()
     module = Amax(dim, keep_dims)
@@ -169,7 +167,6 @@ def test_amax_vgf_INT(test_data: Amax.input_t):
 
 @common.parametrize("test_data", Max.test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_max_dim_vgf_FP_to_amax(test_data: Max.input_t):
     data, dim = test_data()
     pipeline = VgfPipeline[Max.input_t](
@@ -183,7 +180,6 @@ def test_max_dim_vgf_FP_to_amax(test_data: Max.input_t):
 
 @common.parametrize("test_data", Max.test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_max_dim_vgf_INT_to_amax(test_data: Max.input_t):
     data, dim = test_data()
     pipeline = VgfPipeline[Max.input_t](
 
@@ -155,7 +155,6 @@ def test_min_dim_tosa_FP_not_delegated():
 
 @common.parametrize("test_data", Amin.test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_amin_vgf_FP(test_data: Amin.input_t):
     data, dim, keep_dims = test_data()
     pipeline = VgfPipeline[Amin.input_t](
@@ -166,7 +165,6 @@ def test_amin_vgf_FP(test_data: Amin.input_t):
 
 @common.parametrize("test_data", Amin.test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_amin_vgf_INT(test_data: Amin.input_t):
     data, dim, keep_dims = test_data()
     pipeline = VgfPipeline[Amin.input_t](
@@ -180,7 +178,6 @@ def test_amin_vgf_INT(test_data: Amin.input_t):
 
 @common.parametrize("test_data", Min.test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_min_dim_vgf_FP_to_amin(test_data: Min.input_t):
     data, dim = test_data()
     pipeline = VgfPipeline[Min.input_t](
@@ -194,7 +191,6 @@ def test_min_dim_vgf_FP_to_amin(test_data: Min.input_t):
 
 @common.parametrize("test_data", Min.test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_min_dim_vgf_INT_to_amin(test_data: Min.input_t):
     data, dim = test_data()
     pipeline = VgfPipeline[Min.input_t](
 
@@ -6,7 +6,6 @@
 
 from typing import List, Tuple
 
-import pytest
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
@@ -189,7 +188,6 @@ def test_any_u85_INT(test_data: input_t1):
 
 @common.parametrize("test_data", test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_any_vgf_FP(test_data: input_t1):
     op, data_fn = test_data()
     pipeline = VgfPipeline[input_t1](
@@ -204,7 +202,6 @@ def test_any_vgf_FP(test_data: input_t1):
 
 @common.parametrize("test_data", test_data)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_any_vgf_INT(test_data: input_t1):
     op, data_fn = test_data()
     pipeline = VgfPipeline[input_t1](
 
@@ -4,7 +4,6 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-import pytest
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
@@ -84,7 +83,6 @@ def test_adaptive_avg_pool2d_u85_INT(test_data):
 
 @common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_adaptive_avg_pool2d_vgf_FP(test_data):
     pipeline = VgfPipeline[input_t](
         AdaptiveAveragePool2d(),
@@ -98,7 +96,6 @@ def test_adaptive_avg_pool2d_vgf_FP(test_data):
 
 @common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_adaptive_avg_pool2d_vgf_INT(test_data):
     pipeline = VgfPipeline[input_t](
         AdaptiveAveragePool2d(),
@@ -331,7 +328,6 @@ def test_mean_dim_u85_INT(test_data):
 
 @common.parametrize("test_data", MeanDim.test_data_suite)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_mean_dim_vgf_FP(test_data):
     test_data_val, dim, keep_dim = test_data()
     pipeline = VgfPipeline[input_t](
@@ -346,7 +342,6 @@ def test_mean_dim_vgf_FP(test_data):
 
 @common.parametrize("test_data", MeanDim.test_data_suite)
 @common.SkipIfNoModelConverter
-@pytest.mark.xfail(reason="MLETORCH-1410: Tensor dimension count not supported: 0")
 def test_mean_dim_vgf_INT(test_data):
     test_data_val, dim, keep_dim = test_data()
     pipeline = VgfPipeline[input_t](