pytorch · SS-JIA · Sep 11, 2025 · Sep 9, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/.ci/scripts/setup-samsung-linux-deps.sh b/.ci/scripts/setup-samsung-linux-deps.sh
@@ -54,15 +54,6 @@ install_enn_backend() {
   rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
   ANDROID_NDK_VERSION=r27b
 
-  pushd .
-  cd /tmp
-  curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
-  unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
-
-  # Print the content for manual verification
-  ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
-  sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
-  popd
   # build Exynos backend
   export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
   bash backends/samsung/build.sh --build all

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -874,7 +874,7 @@ jobs:
       contents: read
     with:
       runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-gcc9
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
@@ -892,7 +892,7 @@ jobs:
         source .ci/scripts/setup-samsung-linux-deps.sh
 
         # Test models serially
-        models="mv2 ic3 resnet18 resnet50"
+        models="mv2 ic3 resnet18 resnet50 mv3 ic4 dl3 edsr vit w2l"
         for model in $models; do
           python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
         done

diff --git a/backends/samsung/_passes/conv1d_to_conv2d.py b/backends/samsung/_passes/conv1d_to_conv2d.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir import ExportedProgram
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch._export.utils import get_param
+
+
+class Conv1dToConv2d(ExportPass):
+
+    def __init__(self, edge_program: ExportedProgram):
+        super().__init__()
+        self.edge_program = edge_program
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        graph = graph_module.graph
+        node_list = list(graph.nodes)
+        for node in node_list:
+            if node.op == "call_function":
+                if node.target == exir_ops.edge.aten.convolution.default:
+                    stride = list(node.args[3])
+                    if len(stride) != 1:
+                        continue
+
+                    # convert 3dim weight to 4dim
+                    weight_node = node.args[1]
+                    weight_3dim = get_param(self.edge_program, weight_node)
+                    weight_4dim = torch.nn.Parameter(
+                        data=weight_3dim.data.contiguous().unsqueeze(dim=-1),
+                        requires_grad=False,
+                    )
+                    parameter_name = (
+                        self.edge_program.graph_signature.inputs_to_parameters[
+                            weight_node.name
+                        ]
+                    )
+                    self.edge_program.state_dict[parameter_name] = weight_4dim
+                    weight_node.meta["val"] = weight_node.meta["val"].data.unsqueeze(
+                        dim=-1
+                    )
+
+                    # Extend stride, padding, and dilation
+                    node.args = (
+                        node.args[0],
+                        node.args[1],
+                        node.args[2],
+                        node.args[3] + [1],  # stride
+                        node.args[4] + [0],  # padding
+                        node.args[5] + [1],  # dilation
+                        node.args[6],
+                        node.args[7],
+                        node.args[8],
+                    )
+
+                    # unsqueeze -> conv2d -> squeeze
+                    with graph.inserting_before(node):
+                        input_node = node.args[0]
+                        unsqueeze_before = graph.create_node(
+                            "call_function", exir_ops.edge.aten.unsqueeze_copy.default
+                        )
+                        unsqueeze_before.args = (
+                            input_node,
+                            -1,
+                        )
+                        node.replace_input_with(input_node, unsqueeze_before)
+
+                    with graph.inserting_after(node):
+                        squeeze_after = graph.create_node(
+                            "call_function", exir_ops.edge.aten.squeeze_copy.dims
+                        )
+                        squeeze_after.args = (
+                            node,
+                            [-1],
+                        )
+                        original_users = [
+                            user for user in node.users if user != squeeze_after
+                        ]
+                        for user in original_users:
+                            user.replace_input_with(node, squeeze_after)
+
+        graph_module.recompile()
+        graph_module = super().call(graph_module).graph_module
+        return PassResult(graph_module, True)
diff --git a/backends/samsung/_passes/customized_constant_prop.py b/backends/samsung/_passes/customized_constant_prop.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import executorch.exir.passes.constant_prop_pass as constant_prop_module
+from executorch.exir import ExportedProgram
+from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.passes.constant_prop_pass import constant_prop_pass
+from torch.fx import GraphModule
+
+
+class _constant_prop_context:
+    def __init__(self):
+        self.backup = constant_prop_module._DEFAULT_SKIP_TARGETS
+
+    def __enter__(self):
+        constant_prop_module._DEFAULT_SKIP_TARGETS = (
+            constant_prop_module._DEFAULT_SKIP_TARGETS_NO_QUANT
+        )
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        constant_prop_module._DEFAULT_SKIP_TARGETS = self.backup
+
+
+class ConstantPropPass(ExportPass):
+    """
+    Official constant_prop_pass will not fold Q-DQ
+    But we need to fold quantized constant tensor as well as non-quantized one
+    """
+
+    def __init__(self, edge_program: ExportedProgram):
+        super().__init__()
+        self.edge_program = edge_program
+
+    def call(self, graph_module: GraphModule):
+        with _constant_prop_context():
+            _ = constant_prop_pass(self.edge_program)
+        return PassResult(graph_module, True)
diff --git a/backends/samsung/_passes/replace_scalar_ops.py b/backends/samsung/_passes/replace_scalar_ops.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, Tuple
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+from torch._export.pass_base import Argument
+from torch._export.pass_infra.node_metadata import NodeMetadata
+from torch._export.pass_infra.proxy_value import ProxyValue
+
+
+class ReplaceOpsWithScalar(ExportPass):
+    # Replace binary ops with scalar into binary ops with tensor.
+    # Ops list below.
+    _ops_with_scalar = {
+        exir_ops.edge.aten.add.Scalar: exir_ops.edge.aten.add.Tensor,
+        exir_ops.edge.aten.sub.Scalar: exir_ops.edge.aten.sub.Tensor,
+        exir_ops.edge.aten.div.Scalar: exir_ops.edge.aten.div.Tensor,
+        exir_ops.edge.aten.mul.Scalar: exir_ops.edge.aten.mul.Tensor,
+        exir_ops.edge.aten.pow.Tensor_Scalar: exir_ops.edge.aten.pow.Tensor_Tensor,
+    }
+
+    def __init__(self):
+        super(ReplaceOpsWithScalar, self).__init__()
+
+    def call_operator(
+        self,
+        op,
+        args: Tuple[Argument, ...],
+        kwargs: Dict[str, Argument],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        if op not in self._ops_with_scalar:
+            return super().call_operator(op, args, kwargs, meta)
+
+        return super().call_operator(
+            op=self._ops_with_scalar.get(op, op),
+            args=(args[0], torch.tensor(args[1])),
+            kwargs=kwargs,
+            meta=meta,
+        )
diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py
@@ -9,39 +9,83 @@
     op_add,
     op_avg_pool2d,
     op_batch_norm,
+    op_bmm,
     op_cat,
     op_clamp,
+    op_constant_pad_nd,
     op_conv2d,
+    op_div,
+    op_embedding,
+    op_expand_copy,
+    op_gelu,
     op_getitem,
+    op_hardswish,
     op_hardtanh,
+    op_layer_norm,
+    op_leaky_relu,
     op_linear,
+    op_log_softmax,
     op_max_pool2d,
+    op_maximum,
     op_mean_dim,
+    op_minimum,
     op_mul,
     op_permute,
+    op_pixel_shuffle,
     op_relu,
     op_reshape,
+    op_rsqrt,
     op_select,
+    op_slice_copy,
+    op_softmax,
+    op_sqrt,
+    op_squeeze,
+    op_sub,
+    op_to_copy,
     op_unsqueeze,
+    op_upsample_bilinear2d,
+    op_upsample_nearest2d,
 )
 
 __all__ = [
     node_visitor,
     op_add,
     op_avg_pool2d,
     op_batch_norm,
+    op_bmm,
     op_cat,
     op_clamp,
     op_conv2d,
+    op_constant_pad_nd,
+    op_div,
+    op_embedding,
+    op_expand_copy,
+    op_gelu,
     op_getitem,
+    op_hardswish,
     op_hardtanh,
+    op_layer_norm,
+    op_leaky_relu,
     op_linear,
+    op_log_softmax,
     op_max_pool2d,
+    op_maximum,
     op_mean_dim,
+    op_minimum,
     op_mul,
     op_permute,
+    op_pixel_shuffle,
     op_relu,
     op_reshape,
+    op_rsqrt,
     op_select,
+    op_slice_copy,
+    op_softmax,
+    op_sqrt,
+    op_squeeze,
+    op_sub,
+    op_to_copy,
     op_unsqueeze,
+    op_upsample_bilinear2d,
+    op_upsample_nearest2d,
 ]
diff --git a/backends/samsung/builders/op_bmm.py b/backends/samsung/builders/op_bmm.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class BMMVisitor(NodeVisitor):
+    target = "aten.bmm.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input1 = node.args[0]
+        input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
+        input2 = node.args[1]
+        input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        enn_graph.define_op(
+            node.name, "BATCH_MATMUL", [input_id_1, input_id_2], [output_id]
+        )
diff --git a/backends/samsung/builders/op_constant_pad_nd.py b/backends/samsung/builders/op_constant_pad_nd.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict, List
+
+import numpy as np
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+from executorch.backends.transforms import get_shape
+
+
+@register_node_visitor
+class ConstantPadNDVisitor(NodeVisitor):
+    target = "aten.constant_pad_nd.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+
+        # torch padding order starts from the last axis, change the order to fit samsung lite-core
+        paddings = np.reshape(cast(List[int], node.args[1]), (-1, 2))[::-1].astype(
+            np.uint32
+        )
+        in_shape = get_shape(input)
+        paddings = paddings.reshape(-1).tolist()
+        paddings = [0] * (2 * len(in_shape) - len(paddings)) + paddings
+        paddings = paddings[::2] + paddings[1::2]
+
+        padding_value = node.args[2]
+        assert padding_value == 0.0, "Only Support pad constant 0 now."
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        params = {
+            "explicit_padding": paddings,
+            "padding": "EXPLICIT",
+            "padding_type": "CONSTANT",
+        }
+
+        enn_graph.define_op(node.name, "PAD", [input_id], [output_id], params)