From 6d929aea1c33ffc4b7b83318c2336d3d1fd5c057 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 9 Sep 2025 04:55:18 +0000
Subject: [PATCH 01/15] change docker image for using android ndk

Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 .ci/scripts/setup-samsung-linux-deps.sh | 9 ---------
 .github/workflows/pull.yml              | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/.ci/scripts/setup-samsung-linux-deps.sh b/.ci/scripts/setup-samsung-linux-deps.sh
index 7e6024c584e..ed704b2bfbd 100644
--- a/.ci/scripts/setup-samsung-linux-deps.sh
+++ b/.ci/scripts/setup-samsung-linux-deps.sh
@@ -54,15 +54,6 @@ install_enn_backend() {
   rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
   ANDROID_NDK_VERSION=r27b
 
-  pushd .
-  cd /tmp
-  curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
-  unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
-
-  # Print the content for manual verification
-  ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
-  sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
-  popd
   # build Exynos backend
   export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
   bash backends/samsung/build.sh --build all
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 68d7f90d09c..7a1d0222231 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -874,7 +874,7 @@ jobs:
       contents: read
     with:
       runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-gcc9
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90

From 93dff0574a656d5dc64aef5147fb9c096d8ca8cd Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 2 Sep 2025 10:15:51 +0000
Subject: [PATCH 02/15] update batchmul, div,
 max/min/rsqrt/slice_copy/sqrt/to_copy op

- Add ops in op builders
- batchmatmul, div, maximun, minimun, rsqrt, slice_copy, sqrt, to_copy

Co-authored-by: chong-chen <chong.chen@samsung.com>
Co-authored-by: jingya-zhang <jingya.zhang@samsung.com>
---
 backends/samsung/builders/__init__.py      | 24 ++++++++
 backends/samsung/builders/op_bmm.py        | 46 ++++++++++++++++
 backends/samsung/builders/op_div.py        | 44 +++++++++++++++
 backends/samsung/builders/op_maximum.py    | 38 +++++++++++++
 backends/samsung/builders/op_minimum.py    | 42 ++++++++++++++
 backends/samsung/builders/op_rsqrt.py      | 37 +++++++++++++
 backends/samsung/builders/op_slice_copy.py | 64 ++++++++++++++++++++++
 backends/samsung/builders/op_sqrt.py       | 37 +++++++++++++
 backends/samsung/builders/op_to_copy.py    | 44 +++++++++++++++
 9 files changed, 376 insertions(+)
 create mode 100644 backends/samsung/builders/op_bmm.py
 create mode 100644 backends/samsung/builders/op_div.py
 create mode 100644 backends/samsung/builders/op_maximum.py
 create mode 100644 backends/samsung/builders/op_minimum.py
 create mode 100644 backends/samsung/builders/op_rsqrt.py
 create mode 100644 backends/samsung/builders/op_slice_copy.py
 create mode 100644 backends/samsung/builders/op_sqrt.py
 create mode 100644 backends/samsung/builders/op_to_copy.py

diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py
index b3e72da36c3..667e03db3f3 100644
--- a/backends/samsung/builders/__init__.py
+++ b/backends/samsung/builders/__init__.py
@@ -9,20 +9,32 @@
     op_add,
     op_avg_pool2d,
     op_batch_norm,
+    op_bmm,
     op_cat,
     op_clamp,
     op_conv2d,
     op_getitem,
+    op_div,
     op_hardtanh,
     op_linear,
     op_max_pool2d,
+    op_maximum,
     op_mean_dim,
+    op_minimum,
     op_mul,
     op_permute,
     op_relu,
     op_reshape,
+    op_rsqrt,
+
     op_select,
     op_unsqueeze,
+    op_slice_copy,
+    op_sqrt,
+    op_to_copy,
+
+
+
 )
 
 __all__ = [
@@ -30,18 +42,30 @@
     op_add,
     op_avg_pool2d,
     op_batch_norm,
+    op_bmm,
     op_cat,
     op_clamp,
     op_conv2d,
     op_getitem,
+    op_div,
+
     op_hardtanh,
     op_linear,
     op_max_pool2d,
+    op_maximum,
     op_mean_dim,
+    op_minimum,
     op_mul,
     op_permute,
     op_relu,
     op_reshape,
+    op_rsqrt,
+
     op_select,
     op_unsqueeze,
+    op_slice_copy,
+    op_sqrt,
+    op_to_copy,
+
+
 ]
diff --git a/backends/samsung/builders/op_bmm.py b/backends/samsung/builders/op_bmm.py
new file mode 100644
index 00000000000..6e1529bc4d8
--- /dev/null
+++ b/backends/samsung/builders/op_bmm.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class BMMVisitor(NodeVisitor):
+    target = "aten.bmm.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        # input1
+        input1 = node.args[0]
+        input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
+        vals_to_ids[input1] = input_id_1
+
+        # input2
+        input2 = node.args[1]
+        input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
+        vals_to_ids[input2] = input_id_2
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        vals_to_ids[node] = output_id
+
+        enn_graph.define_op(
+            node.name, "BATCH_MATMUL", [input_id_1, input_id_2], [output_id]
+        )
diff --git a/backends/samsung/builders/op_div.py b/backends/samsung/builders/op_div.py
new file mode 100644
index 00000000000..13d1e15adae
--- /dev/null
+++ b/backends/samsung/builders/op_div.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class DivVisitor(NodeVisitor):
+    target = "aten.div.Tensor"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        # input1
+        input1 = node.args[0]
+        input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
+
+        # input2
+        input2 = node.args[1]
+        input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
+        params = {}
+        self._update_params_qdtype(node, params)
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        enn_graph.define_op(
+            node.name, "ELTDIV", [input_id_1, input_id_2], [output_id], params
+        )
diff --git a/backends/samsung/builders/op_maximum.py b/backends/samsung/builders/op_maximum.py
new file mode 100644
index 00000000000..dcd596282d2
--- /dev/null
+++ b/backends/samsung/builders/op_maximum.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class MaximumVisitor(NodeVisitor):
+    target = "aten.maximum.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        # input1
+        input_id_1 = self.define_tensor(node.args[0], enn_graph, vals_to_ids)
+        # input2
+        input_id_2 = self.define_tensor(node.args[1], enn_graph, vals_to_ids)
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        enn_graph.define_op(node.name, "MAXIMUM", [input_id_1, input_id_2], [output_id])
diff --git a/backends/samsung/builders/op_minimum.py b/backends/samsung/builders/op_minimum.py
new file mode 100644
index 00000000000..e338eaaac14
--- /dev/null
+++ b/backends/samsung/builders/op_minimum.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class MinimumVisitor(NodeVisitor):
+    target = "aten.minimum.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        # input1
+        input1 = node.args[0]
+        input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
+
+        # input2
+        input2 = node.args[1]
+        input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        vals_to_ids[node] = output_id
+
+        enn_graph.define_op(node.name, "MIN", [input_id_1, input_id_2], [output_id])
diff --git a/backends/samsung/builders/op_rsqrt.py b/backends/samsung/builders/op_rsqrt.py
new file mode 100644
index 00000000000..a62a0d9534c
--- /dev/null
+++ b/backends/samsung/builders/op_rsqrt.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class LinearVisitor(NodeVisitor):
+    target = "aten.rsqrt.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+        vals_to_ids[input] = input_id
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        vals_to_ids[node] = output_id
+
+        enn_graph.define_op(node.name, "RSQRT", [input_id], [output_id])
diff --git a/backends/samsung/builders/op_slice_copy.py b/backends/samsung/builders/op_slice_copy.py
new file mode 100644
index 00000000000..9000d1acc85
--- /dev/null
+++ b/backends/samsung/builders/op_slice_copy.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+from executorch.backends.transforms import get_shape
+
+
+@register_node_visitor
+class SliceCopyVisitor(NodeVisitor):
+    target = "aten.slice_copy.Tensor"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ):
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+        vals_to_ids[input] = input_id
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        vals_to_ids[node] = output_id
+
+        in_shape = get_shape(input)
+        dim = cast(int, node.args[1])
+        if dim < 0:
+            dim = dim + len(in_shape)
+        start_val = cast(int, node.args[2])
+        if start_val < 0:
+            start_val = start_val + in_shape[dim]
+        end_val = min(cast(int, node.args[3]), in_shape[dim])
+        if end_val < 0:
+            end_val = end_val + in_shape[dim]
+
+        step = cast(int, node.args[4]) if len(node.args) > 4 else 1
+
+        begin = [0] * len(in_shape)
+        begin[dim] = start_val
+        end = in_shape
+        end[dim] = end_val
+        strides = [1] * len(in_shape)
+        strides[dim] = step
+
+        params = {}
+        params["begin"] = begin
+        params["end"] = end
+        params["strides"] = strides
+
+        enn_graph.define_op(node.name, "STRIDEDSLICE", [input_id], [output_id], params)
diff --git a/backends/samsung/builders/op_sqrt.py b/backends/samsung/builders/op_sqrt.py
new file mode 100644
index 00000000000..ebc64f4c5e3
--- /dev/null
+++ b/backends/samsung/builders/op_sqrt.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class SqrtVisitor(NodeVisitor):
+    target = "aten.sqrt.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+        vals_to_ids[input] = input_id
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        vals_to_ids[node] = output_id
+
+        enn_graph.define_op(node.name, "SQRT", [input_id], [output_id])
diff --git a/backends/samsung/builders/op_to_copy.py b/backends/samsung/builders/op_to_copy.py
new file mode 100644
index 00000000000..c770602bb5f
--- /dev/null
+++ b/backends/samsung/builders/op_to_copy.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+
+from executorch.backends.samsung.builders.utils import get_map_dtype, get_tensor
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class ToCopyVisitor(NodeVisitor):
+    target = ["aten._to_copy.default", "dim_order_ops._to_dim_order_copy.default"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        memory_format_target = node.kwargs.get("memory_format", torch.contiguous_format)
+        to_contiguous = bool(memory_format_target == torch.contiguous_format)
+        assert to_contiguous, "Don't support other param in _to_copy"
+
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        params = {}
+        out_tensor = get_tensor(self.exported_program, node)
+        params["out_dtype"] = get_map_dtype(out_tensor.dtype)
+
+        enn_graph.define_op(node.name, "CAST", [input_id], [output_id], params)

From 49345f8f6bd096248a471dadaa6f2ce13d667195 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 2 Sep 2025 22:38:14 +0000
Subject: [PATCH 03/15] add squeeze, sub ops

- add squeeze, sub ops

Co-authored-by: chong-chen <chong.chen@samsung.com>
---
 backends/samsung/builders/__init__.py   |  5 ++
 backends/samsung/builders/op_squeeze.py | 62 +++++++++++++++++++++++++
 backends/samsung/builders/op_sub.py     | 44 ++++++++++++++++++
 3 files changed, 111 insertions(+)
 create mode 100644 backends/samsung/builders/op_squeeze.py
 create mode 100644 backends/samsung/builders/op_sub.py

diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py
index 667e03db3f3..5826d4fb049 100644
--- a/backends/samsung/builders/__init__.py
+++ b/backends/samsung/builders/__init__.py
@@ -31,6 +31,9 @@
     op_unsqueeze,
     op_slice_copy,
     op_sqrt,
+    op_squeeze,
+    op_sub,
+
     op_to_copy,
 
 
@@ -65,6 +68,8 @@
     op_unsqueeze,
     op_slice_copy,
     op_sqrt,
+    op_squeeze,
+    op_sub,
     op_to_copy,
 
 
diff --git a/backends/samsung/builders/op_squeeze.py b/backends/samsung/builders/op_squeeze.py
new file mode 100644
index 00000000000..ecdbeb0ad38
--- /dev/null
+++ b/backends/samsung/builders/op_squeeze.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class SqueezeVisitor(NodeVisitor):
+    target = "aten.squeeze_copy.dims"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+        vals_to_ids[input] = input_id
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        vals_to_ids[node] = output_id
+
+        enn_graph.define_op(node.name, "RESHAPE", [input_id], [output_id])
+
+
+@register_node_visitor
+class UnsqueezeVisitor(NodeVisitor):
+    target = "aten.unsqueeze_copy.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+        vals_to_ids[input] = input_id
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        vals_to_ids[node] = output_id
+
+        enn_graph.define_op(node.name, "RESHAPE", [input_id], [output_id])
diff --git a/backends/samsung/builders/op_sub.py b/backends/samsung/builders/op_sub.py
new file mode 100644
index 00000000000..5b958e3e970
--- /dev/null
+++ b/backends/samsung/builders/op_sub.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class SubVisitor(NodeVisitor):
+    target = "aten.sub.Tensor"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        # input1
+        input1 = node.args[0]
+        input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
+        vals_to_ids[input1] = input_id_1
+
+        # input2
+        input2 = node.args[1]
+        input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
+        vals_to_ids[input2] = input_id_2
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        vals_to_ids[node] = output_id
+
+        enn_graph.define_op(node.name, "SUB", [input_id_1, input_id_2], [output_id])

From 4d92c32901695968efb5f34867ecad55e2a8f433 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 9 Sep 2025 13:42:58 +0800
Subject: [PATCH 04/15] Support MV3 float model

1. Propagate constant and remove constant ops.
2. Prevent some ops from decomposing.(HardSwish, linear in mv3)
3. Add models name to support list (ic4, edsr enabled at the same time) in aot_compiler

Co-authored-by: xz-linghu <xz.linghu@samsung.com>
Co-authored-by: chong-chen <chong.chen@samsung.com>
---
 .../_passes/customized_constant_prop.py       | 40 +++++++++++++++++++
 backends/samsung/builders/__init__.py         | 19 +++------
 backends/samsung/builders/op_bmm.py           |  6 ---
 backends/samsung/builders/op_div.py           | 11 ++---
 backends/samsung/builders/op_hardswish.py     | 35 ++++++++++++++++
 backends/samsung/builders/op_maximum.py       |  3 +-
 backends/samsung/builders/op_minimum.py       |  5 +--
 backends/samsung/builders/op_rsqrt.py         |  2 -
 backends/samsung/builders/op_slice_copy.py    |  7 +---
 backends/samsung/builders/op_sqrt.py          |  2 -
 backends/samsung/builders/op_squeeze.py       | 26 ------------
 backends/samsung/builders/op_sub.py           |  7 +---
 backends/samsung/builders/op_to_copy.py       |  7 +---
 backends/samsung/enn_preprocess.py            |  4 ++
 backends/samsung/partition/enn_partitioner.py | 14 ++++++-
 backends/samsung/utils/export_utils.py        | 20 +++++++++-
 examples/samsung/aot_compiler.py              |  2 +-
 17 files changed, 126 insertions(+), 84 deletions(-)
 create mode 100644 backends/samsung/_passes/customized_constant_prop.py
 create mode 100644 backends/samsung/builders/op_hardswish.py

diff --git a/backends/samsung/_passes/customized_constant_prop.py b/backends/samsung/_passes/customized_constant_prop.py
new file mode 100644
index 00000000000..fa5bad3a056
--- /dev/null
+++ b/backends/samsung/_passes/customized_constant_prop.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import executorch.exir.passes.constant_prop_pass as constant_prop_module
+from executorch.exir import ExportedProgram
+from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.passes.constant_prop_pass import constant_prop_pass
+from torch.fx import GraphModule
+
+
+class _constant_prop_context:
+    def __init__(self):
+        self.backup = constant_prop_module._DEFAULT_SKIP_TARGETS
+
+    def __enter__(self):
+        constant_prop_module._DEFAULT_SKIP_TARGETS = (
+            constant_prop_module._DEFAULT_SKIP_TARGETS_NO_QUANT
+        )
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        constant_prop_module._DEFAULT_SKIP_TARGETS = self.backup
+
+
+class ConstantPropPass(ExportPass):
+    """
+    Official constant_prop_pass will not fold Q-DQ
+    But we need to fold quantized constant tensor as well as non-quantized one
+    """
+
+    def __init__(self, edge_program: ExportedProgram):
+        super().__init__()
+        self.edge_program = edge_program
+
+    def call(self, graph_module: GraphModule):
+        with _constant_prop_context():
+            _ = constant_prop_pass(self.edge_program)
+        return PassResult(graph_module, True)
diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py
index 5826d4fb049..dc284ea2aa7 100644
--- a/backends/samsung/builders/__init__.py
+++ b/backends/samsung/builders/__init__.py
@@ -13,8 +13,9 @@
     op_cat,
     op_clamp,
     op_conv2d,
-    op_getitem,
     op_div,
+    op_getitem,
+    op_hardswish,
     op_hardtanh,
     op_linear,
     op_max_pool2d,
@@ -26,18 +27,13 @@
     op_relu,
     op_reshape,
     op_rsqrt,
-
     op_select,
-    op_unsqueeze,
     op_slice_copy,
     op_sqrt,
     op_squeeze,
     op_sub,
-
     op_to_copy,
-
-
-
+    op_unsqueeze,
 )
 
 __all__ = [
@@ -49,9 +45,9 @@
     op_cat,
     op_clamp,
     op_conv2d,
-    op_getitem,
     op_div,
-
+    op_getitem,
+    op_hardswish,
     op_hardtanh,
     op_linear,
     op_max_pool2d,
@@ -63,14 +59,11 @@
     op_relu,
     op_reshape,
     op_rsqrt,
-
     op_select,
-    op_unsqueeze,
     op_slice_copy,
     op_sqrt,
     op_squeeze,
     op_sub,
     op_to_copy,
-
-
+    op_unsqueeze,
 ]
diff --git a/backends/samsung/builders/op_bmm.py b/backends/samsung/builders/op_bmm.py
index 6e1529bc4d8..6ba8864ebb3 100644
--- a/backends/samsung/builders/op_bmm.py
+++ b/backends/samsung/builders/op_bmm.py
@@ -27,19 +27,13 @@ def define_node(
         enn_graph: EnnGraph,
         vals_to_ids: Dict[torch.Tensor, int],
     ) -> None:
-        # input1
         input1 = node.args[0]
         input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
-        vals_to_ids[input1] = input_id_1
-
-        # input2
         input2 = node.args[1]
         input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
-        vals_to_ids[input2] = input_id_2
 
         # output
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        vals_to_ids[node] = output_id
 
         enn_graph.define_op(
             node.name, "BATCH_MATMUL", [input_id_1, input_id_2], [output_id]
diff --git a/backends/samsung/builders/op_div.py b/backends/samsung/builders/op_div.py
index 13d1e15adae..89d773ddb0e 100644
--- a/backends/samsung/builders/op_div.py
+++ b/backends/samsung/builders/op_div.py
@@ -27,18 +27,13 @@ def define_node(
         enn_graph: EnnGraph,
         vals_to_ids: Dict[torch.Tensor, int],
     ) -> None:
-        # input1
+        # inputs
         input1 = node.args[0]
         input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
-
-        # input2
         input2 = node.args[1]
         input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
-        params = {}
-        self._update_params_qdtype(node, params)
+
         # output
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
 
-        enn_graph.define_op(
-            node.name, "ELTDIV", [input_id_1, input_id_2], [output_id], params
-        )
+        enn_graph.define_op(node.name, "ELTDIV", [input_id_1, input_id_2], [output_id])
diff --git a/backends/samsung/builders/op_hardswish.py b/backends/samsung/builders/op_hardswish.py
new file mode 100644
index 00000000000..72a99d17b83
--- /dev/null
+++ b/backends/samsung/builders/op_hardswish.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class HardSwishVisitor(NodeVisitor):
+    target = "aten.hardswish.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        enn_graph.define_op(node.name, "HARDSWISH", [input_id], [output_id])
diff --git a/backends/samsung/builders/op_maximum.py b/backends/samsung/builders/op_maximum.py
index dcd596282d2..d3358d736f3 100644
--- a/backends/samsung/builders/op_maximum.py
+++ b/backends/samsung/builders/op_maximum.py
@@ -27,9 +27,8 @@ def define_node(
         enn_graph: EnnGraph,
         vals_to_ids: Dict[torch.Tensor, int],
     ) -> None:
-        # input1
+        # inputs
         input_id_1 = self.define_tensor(node.args[0], enn_graph, vals_to_ids)
-        # input2
         input_id_2 = self.define_tensor(node.args[1], enn_graph, vals_to_ids)
 
         # output
diff --git a/backends/samsung/builders/op_minimum.py b/backends/samsung/builders/op_minimum.py
index e338eaaac14..a32b462d45f 100644
--- a/backends/samsung/builders/op_minimum.py
+++ b/backends/samsung/builders/op_minimum.py
@@ -27,16 +27,13 @@ def define_node(
         enn_graph: EnnGraph,
         vals_to_ids: Dict[torch.Tensor, int],
     ) -> None:
-        # input1
+        # inputs
         input1 = node.args[0]
         input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
-
-        # input2
         input2 = node.args[1]
         input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
 
         # output
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        vals_to_ids[node] = output_id
 
         enn_graph.define_op(node.name, "MIN", [input_id_1, input_id_2], [output_id])
diff --git a/backends/samsung/builders/op_rsqrt.py b/backends/samsung/builders/op_rsqrt.py
index a62a0d9534c..b3600d41ee2 100644
--- a/backends/samsung/builders/op_rsqrt.py
+++ b/backends/samsung/builders/op_rsqrt.py
@@ -29,9 +29,7 @@ def define_node(
     ) -> None:
         input = node.args[0]
         input_id = self.define_tensor(input, enn_graph, vals_to_ids)
-        vals_to_ids[input] = input_id
 
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        vals_to_ids[node] = output_id
 
         enn_graph.define_op(node.name, "RSQRT", [input_id], [output_id])
diff --git a/backends/samsung/builders/op_slice_copy.py b/backends/samsung/builders/op_slice_copy.py
index 9000d1acc85..0d7a23118a0 100644
--- a/backends/samsung/builders/op_slice_copy.py
+++ b/backends/samsung/builders/op_slice_copy.py
@@ -30,11 +30,9 @@ def define_node(
     ):
         input = node.args[0]
         input_id = self.define_tensor(input, enn_graph, vals_to_ids)
-        vals_to_ids[input] = input_id
 
         # output
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        vals_to_ids[node] = output_id
 
         in_shape = get_shape(input)
         dim = cast(int, node.args[1])
@@ -56,9 +54,6 @@ def define_node(
         strides = [1] * len(in_shape)
         strides[dim] = step
 
-        params = {}
-        params["begin"] = begin
-        params["end"] = end
-        params["strides"] = strides
+        params = {"begin": begin, "end": end, "strides": strides}
 
         enn_graph.define_op(node.name, "STRIDEDSLICE", [input_id], [output_id], params)
diff --git a/backends/samsung/builders/op_sqrt.py b/backends/samsung/builders/op_sqrt.py
index ebc64f4c5e3..3560542a0bc 100644
--- a/backends/samsung/builders/op_sqrt.py
+++ b/backends/samsung/builders/op_sqrt.py
@@ -29,9 +29,7 @@ def define_node(
     ) -> None:
         input = node.args[0]
         input_id = self.define_tensor(input, enn_graph, vals_to_ids)
-        vals_to_ids[input] = input_id
 
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        vals_to_ids[node] = output_id
 
         enn_graph.define_op(node.name, "SQRT", [input_id], [output_id])
diff --git a/backends/samsung/builders/op_squeeze.py b/backends/samsung/builders/op_squeeze.py
index ecdbeb0ad38..d165a22fcb3 100644
--- a/backends/samsung/builders/op_squeeze.py
+++ b/backends/samsung/builders/op_squeeze.py
@@ -29,34 +29,8 @@ def define_node(
     ) -> None:
         input = node.args[0]
         input_id = self.define_tensor(input, enn_graph, vals_to_ids)
-        vals_to_ids[input] = input_id
 
         # output
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        vals_to_ids[node] = output_id
-
-        enn_graph.define_op(node.name, "RESHAPE", [input_id], [output_id])
-
-
-@register_node_visitor
-class UnsqueezeVisitor(NodeVisitor):
-    target = "aten.unsqueeze_copy.default"
-
-    def __init__(self, *args) -> None:
-        super().__init__(*args)
-
-    def define_node(
-        self,
-        node: torch.fx.Node,
-        enn_graph: EnnGraph,
-        vals_to_ids: Dict[torch.Tensor, int],
-    ) -> None:
-        input = node.args[0]
-        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
-        vals_to_ids[input] = input_id
-
-        # output
-        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        vals_to_ids[node] = output_id
 
         enn_graph.define_op(node.name, "RESHAPE", [input_id], [output_id])
diff --git a/backends/samsung/builders/op_sub.py b/backends/samsung/builders/op_sub.py
index 5b958e3e970..af2931f298e 100644
--- a/backends/samsung/builders/op_sub.py
+++ b/backends/samsung/builders/op_sub.py
@@ -27,18 +27,13 @@ def define_node(
         enn_graph: EnnGraph,
         vals_to_ids: Dict[torch.Tensor, int],
     ) -> None:
-        # input1
+        # inputs
         input1 = node.args[0]
         input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
-        vals_to_ids[input1] = input_id_1
-
-        # input2
         input2 = node.args[1]
         input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)
-        vals_to_ids[input2] = input_id_2
 
         # output
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        vals_to_ids[node] = output_id
 
         enn_graph.define_op(node.name, "SUB", [input_id_1, input_id_2], [output_id])
diff --git a/backends/samsung/builders/op_to_copy.py b/backends/samsung/builders/op_to_copy.py
index c770602bb5f..545672ef6a3 100644
--- a/backends/samsung/builders/op_to_copy.py
+++ b/backends/samsung/builders/op_to_copy.py
@@ -11,8 +11,6 @@
     NodeVisitor,
     register_node_visitor,
 )
-
-from executorch.backends.samsung.builders.utils import get_map_dtype, get_tensor
 from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
 
 
@@ -37,8 +35,5 @@ def define_node(
         input_id = self.define_tensor(input, enn_graph, vals_to_ids)
 
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
-        params = {}
-        out_tensor = get_tensor(self.exported_program, node)
-        params["out_dtype"] = get_map_dtype(out_tensor.dtype)
 
-        enn_graph.define_op(node.name, "CAST", [input_id], [output_id], params)
+        enn_graph.define_op(node.name, "CAST", [input_id], [output_id])
diff --git a/backends/samsung/enn_preprocess.py b/backends/samsung/enn_preprocess.py
index ca95e5e8611..035b89c6d46 100644
--- a/backends/samsung/enn_preprocess.py
+++ b/backends/samsung/enn_preprocess.py
@@ -9,6 +9,9 @@
 
 import executorch.backends.samsung.python.PyEnnWrapperAdaptor as PyEnnWrapper
 import torch
+from executorch.backends.samsung._passes.customized_constant_prop import (
+    ConstantPropPass,
+)
 from executorch.backends.samsung.builders.node_visitor import get_node_visitors
 from executorch.backends.samsung.serialization.compile_options import (
     ENN_COMPILE_OPTION_TITLE,
@@ -48,6 +51,7 @@ def preprocess(
 
         enn_preprocess_passes = PassManager(
             passes=[
+                ConstantPropPass(edge_program),
                 FuseBatchNormWithConvPass(edge_program),
                 AddmmToLinearTransform(),
                 RemoveGetItemPass(),
diff --git a/backends/samsung/partition/enn_partitioner.py b/backends/samsung/partition/enn_partitioner.py
index 466a7d13e08..b50a49c1ea4 100644
--- a/backends/samsung/partition/enn_partitioner.py
+++ b/backends/samsung/partition/enn_partitioner.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import logging
-from typing import Any, Dict, List
+from typing import Any, Callable, Dict, List, Optional, Tuple
 
 import executorch.backends.samsung.builders.node_visitor as node_visitor
 
@@ -109,3 +109,15 @@ def partition(self, edge_program: torch.export.ExportedProgram) -> PartitionResu
         return PartitionResult(
             tagged_exported_program=edge_program, partition_tags=self.partition_tags
         )
+
+    # override
+    def ops_to_not_decompose(
+        self, ep: torch.export.ExportedProgram
+    ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
+        ops_not_to_decompose = [
+            torch.ops.aten.hardswish.default,
+            torch.ops.aten.max_pool2d.default,
+            torch.ops.aten.linear.default,
+            torch.ops.aten._safe_softmax.default,
+        ]
+        return (ops_not_to_decompose, None)
diff --git a/backends/samsung/utils/export_utils.py b/backends/samsung/utils/export_utils.py
index a9b7de7c5ae..aaf407ef0b3 100644
--- a/backends/samsung/utils/export_utils.py
+++ b/backends/samsung/utils/export_utils.py
@@ -12,9 +12,27 @@
 from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
 from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.backend_details import CompileSpec
+
+from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.program._program import to_edge_transform_and_lower
 
 
+def get_edge_compile_config():
+    return EdgeCompileConfig(
+        _skip_dim_order=True,
+        _core_aten_ops_exception_list=[
+            exir_ops.edge.aten.max_pool2d.default,
+            exir_ops.edge.aten.linear.default,
+            exir_ops.edge.aten.hardswish.default,
+            exir_ops.edge.aten.prelu.default,
+            exir_ops.edge.aten.pixel_shuffle.default,
+            exir_ops.edge.aten._safe_softmax.default,
+            exir_ops.edge.aten.layer_norm.default,
+            exir_ops.edge.aten.matmul.default,
+        ],
+    )
+
+
 def to_edge_transform_and_lower_to_enn(
     module: torch.nn.Module,
     inputs: Tuple[torch.Tensor],
@@ -30,5 +48,5 @@ def to_edge_transform_and_lower_to_enn(
         prog,
         ahead_pass_list,
         {"forward": [EnnPartitioner(compile_specs)]},
-        compile_config=EdgeCompileConfig(_skip_dim_order=True),
+        compile_config=get_edge_compile_config(),
     )
diff --git a/examples/samsung/aot_compiler.py b/examples/samsung/aot_compiler.py
index 5b092d3d9ac..85ec65c6ad7 100644
--- a/examples/samsung/aot_compiler.py
+++ b/examples/samsung/aot_compiler.py
@@ -26,7 +26,7 @@
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
 
-SUPPORT_MODEL_NAMES = ["mv2", "ic3", "resnet18", "resnet50"]
+SUPPORT_MODEL_NAMES = ["mv2", "ic3", "ic4", "resnet18", "resnet50", "mv3", "edsr"]
 
 
 def save_tensors(tensors, prefix, artifact_dir):

From 3058d75fab48eb0b9778c3101b75fab4dd337893 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 9 Sep 2025 14:51:49 +0800
Subject: [PATCH 05/15] Add more ops not to decompose

Add prelu, softmax, layer_norm, upsample builders.
Add these ops to list which keep ops not to decompose

Co-authored-by: chong-chen <chong.chen@samsung.com>
Co-authored-by: xz-linghu <xz.linghu@samsung.com>
---
 backends/samsung/builders/op_layer_norm.py    | 54 +++++++++++++++++
 backends/samsung/builders/op_leaky_relu.py    | 58 +++++++++++++++++++
 backends/samsung/builders/op_softmax.py       | 39 +++++++++++++
 .../builders/op_upsample_bilinear2d.py        | 52 +++++++++++++++++
 .../samsung/builders/op_upsample_nearest2d.py | 52 +++++++++++++++++
 backends/samsung/partition/enn_partitioner.py |  4 ++
 6 files changed, 259 insertions(+)
 create mode 100644 backends/samsung/builders/op_layer_norm.py
 create mode 100644 backends/samsung/builders/op_leaky_relu.py
 create mode 100644 backends/samsung/builders/op_softmax.py
 create mode 100644 backends/samsung/builders/op_upsample_bilinear2d.py
 create mode 100644 backends/samsung/builders/op_upsample_nearest2d.py

diff --git a/backends/samsung/builders/op_layer_norm.py b/backends/samsung/builders/op_layer_norm.py
new file mode 100644
index 00000000000..e2ffc6c4f5e
--- /dev/null
+++ b/backends/samsung/builders/op_layer_norm.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+from executorch.backends.transforms import get_shape
+
+
+@register_node_visitor
+class LayerNormVisitor(NodeVisitor):
+    target = ["aten.layer_norm.default"]
+
+    def define_node(
+            self,
+            node: torch.fx.Node,
+            enn_graph: EnnGraph,
+            vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        all_input_tensors = []
+        input_node = node.args[0]
+        input_id = self.define_tensor(input_node, enn_graph, vals_to_ids)
+        all_input_tensors.append(input_id)
+
+        normalized_shapes = node.args[1]
+        assert (
+            len(normalized_shapes) == 1
+            and normalized_shapes[0] == get_shape(input_node)[-1]
+        ), "Enn Backend only support norm at last axis."
+
+        weight_node = node.args[2]
+        weight_id = self.define_tensor(weight_node, enn_graph, vals_to_ids)
+        all_input_tensors.append(weight_id)
+        bias_node = node.args[3]
+        bias_id = self.define_tensor(bias_node, enn_graph, vals_to_ids)
+        all_input_tensors.append(bias_id)
+
+        epsilon = node.args[4] if len(node.args) > 4 else 1e-5
+        params = {"epsilon": epsilon}
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        enn_graph.define_op(
+            node.name, "LAYERNORM", all_input_tensors, [output_id], params
+        )
diff --git a/backends/samsung/builders/op_leaky_relu.py b/backends/samsung/builders/op_leaky_relu.py
new file mode 100644
index 00000000000..c7ed37d12e5
--- /dev/null
+++ b/backends/samsung/builders/op_leaky_relu.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.builders.utils import get_map_dtype
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class LeakyReluVisitor(NodeVisitor):
+    target = ["aten.leaky_relu.default", "aten.prelu.default"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        all_input_tensors = []
+        input_id = self.define_tensor(node.args[0], enn_graph, vals_to_ids)
+        all_input_tensors.append(input_id)
+
+        if node.target.__name__ == "aten.prelu.default":
+            negative_slope = node.args[1]
+            negative_slope_id = self.define_tensor(
+                negative_slope, enn_graph, vals_to_ids
+            )
+        else:
+            negative_slope = cast(float, node.args[1]) if len(node.args) > 1 else 0.01
+            negative_slope_tensor = torch.tensor(negative_slope).to(torch.float32)
+            negative_slope_node_name = node.name + "_slope"
+            dims = list(negative_slope_tensor.size())
+            data_type = get_map_dtype(negative_slope_tensor.dtype)
+            negative_slope_id = enn_graph.define_tensor(
+                negative_slope_node_name,
+                dims,
+                data_type,
+                "CONSTANT",
+                negative_slope_tensor.detach().numpy(),
+            )
+
+        all_input_tensors.append(negative_slope_id)
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        enn_graph.define_op(node.name, "PRELU", all_input_tensors, [output_id])
diff --git a/backends/samsung/builders/op_softmax.py b/backends/samsung/builders/op_softmax.py
new file mode 100644
index 00000000000..b86870f44f8
--- /dev/null
+++ b/backends/samsung/builders/op_softmax.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class SoftmaxVisitor(NodeVisitor):
+    target = ["aten._softmax.default", "aten._safe_softmax.default"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+            self,
+            node: torch.fx.Node,
+            enn_graph: EnnGraph,
+            vals_to_ids: Dict[torch.Tensor, int],
+    ):
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        axis = cast(int, node.args[1])
+        params = {"axis": axis}
+
+        enn_graph.define_op(node.name, "SOFTMAX", [input_id], [output_id], params)
diff --git a/backends/samsung/builders/op_upsample_bilinear2d.py b/backends/samsung/builders/op_upsample_bilinear2d.py
new file mode 100644
index 00000000000..9d2f6c83503
--- /dev/null
+++ b/backends/samsung/builders/op_upsample_bilinear2d.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict, List
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+from executorch.backends.transforms import get_shape
+
+
+@register_node_visitor
+class UpsampleBilinear2dVisitor(NodeVisitor):
+    target = "aten.upsample_bilinear2d.vec"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+            self,
+            node: torch.fx.Node,
+            enn_graph: EnnGraph,
+            vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+        in_shape = get_shape(input)
+        output_size = cast(List[int], node.args[1])
+        scale_factor = [
+            output_size[0] * 1.0 / in_shape[-2],
+            output_size[1] * 1.0 / in_shape[-1],
+            ]
+
+        align_corners = cast(bool, node.args[2])
+        if len(node.args) > 3 and node.args[3]:
+            scale_factor = cast(List[float], node.args[3])
+
+        params = {
+            "align_corners": align_corners,
+            "upsampling_factor": scale_factor,
+            "half_pixel_centers": True,
+        }
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        enn_graph.define_op(
+            node.name, "RESIZE_BILINEAR", [input_id], [output_id], params
+        )
diff --git a/backends/samsung/builders/op_upsample_nearest2d.py b/backends/samsung/builders/op_upsample_nearest2d.py
new file mode 100644
index 00000000000..1f59b35f202
--- /dev/null
+++ b/backends/samsung/builders/op_upsample_nearest2d.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict, List
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+from executorch.backends.transforms import get_shape
+
+
+@register_node_visitor
+class UpsampleNearest2dVisitor(NodeVisitor):
+    target = "aten.upsample_nearest2d.vec"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+        in_shape = get_shape(input)
+        output_size = cast(List[int], node.args[1])
+        scale_factor = [
+            output_size[0] * 1.0 / in_shape[-2],
+            output_size[1] * 1.0 / in_shape[-1],
+            ]
+
+        if len(node.args) > 2 and node.args[2]:
+            scale_factor = cast(List[float], node.args[2])
+
+        params = {
+            "align_corners": False,
+            "upsampling_factor": scale_factor,
+            "half_pixel_centers": True,
+        }
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+        enn_graph.define_op(
+            node.name, "RESIZE_NEAREST_NEIGHBOR", [input_id], [output_id], params
+        )
diff --git a/backends/samsung/partition/enn_partitioner.py b/backends/samsung/partition/enn_partitioner.py
index b50a49c1ea4..ffb220977b4 100644
--- a/backends/samsung/partition/enn_partitioner.py
+++ b/backends/samsung/partition/enn_partitioner.py
@@ -119,5 +119,9 @@ def ops_to_not_decompose(
             torch.ops.aten.max_pool2d.default,
             torch.ops.aten.linear.default,
             torch.ops.aten._safe_softmax.default,
+            torch.ops.aten.upsample_bilinear2d.vec,
+            torch.ops.aten.upsample_nearest2d.vec,
+            torch.ops.aten.prelu.default,
+            torch.ops.aten.layer_norm.default,
         ]
         return (ops_not_to_decompose, None)

From d643e221abf263b139c8745e0a6520bc20ab5ce0 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 9 Sep 2025 15:07:25 +0800
Subject: [PATCH 06/15] Support torchvision VIT float model

1. Add necessary ops - gelu, expand, etc.
2. Preprocess mul/add/sub scalar ops, add ReplaceScalarOps pass.

Co-authored-by: xz-linghu <xz.linghu@samsung.com>
Co-authored-by: chong-chen <chong.chen@samsung.com>
---
 .../samsung/_passes/replace_scalar_ops.py     | 46 +++++++++++
 backends/samsung/builders/__init__.py         | 16 ++++
 backends/samsung/builders/op_expand_copy.py   | 77 +++++++++++++++++++
 backends/samsung/builders/op_gelu.py          | 34 ++++++++
 backends/samsung/builders/op_layer_norm.py    |  8 +-
 backends/samsung/builders/op_pixel_shuffle.py | 38 +++++++++
 backends/samsung/builders/op_softmax.py       |  8 +-
 .../builders/op_upsample_bilinear2d.py        | 10 +--
 .../samsung/builders/op_upsample_nearest2d.py |  2 +-
 backends/samsung/enn_preprocess.py            |  2 +
 backends/samsung/partition/enn_partitioner.py |  5 ++
 examples/samsung/aot_compiler.py              | 14 +++-
 12 files changed, 245 insertions(+), 15 deletions(-)
 create mode 100644 backends/samsung/_passes/replace_scalar_ops.py
 create mode 100644 backends/samsung/builders/op_expand_copy.py
 create mode 100644 backends/samsung/builders/op_gelu.py
 create mode 100644 backends/samsung/builders/op_pixel_shuffle.py

diff --git a/backends/samsung/_passes/replace_scalar_ops.py b/backends/samsung/_passes/replace_scalar_ops.py
new file mode 100644
index 00000000000..8ae54b0dc98
--- /dev/null
+++ b/backends/samsung/_passes/replace_scalar_ops.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, Tuple
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+from torch._export.pass_base import Argument
+from torch._export.pass_infra.node_metadata import NodeMetadata
+from torch._export.pass_infra.proxy_value import ProxyValue
+
+
+class ReplaceOpsWithScalar(ExportPass):
+    # Replace binary ops with scalar into binary ops with tensor.
+    # Ops list below.
+    _ops_with_scalar = {
+        exir_ops.edge.aten.add.Scalar: exir_ops.edge.aten.add.Tensor,
+        exir_ops.edge.aten.sub.Scalar: exir_ops.edge.aten.sub.Tensor,
+        exir_ops.edge.aten.div.Scalar: exir_ops.edge.aten.div.Tensor,
+        exir_ops.edge.aten.mul.Scalar: exir_ops.edge.aten.mul.Tensor,
+        exir_ops.edge.aten.pow.Tensor_Scalar: exir_ops.edge.aten.pow.Tensor_Tensor,
+    }
+
+    def __init__(self):
+        super(ReplaceOpsWithScalar, self).__init__()
+
+    def call_operator(
+        self,
+        op,
+        args: Tuple[Argument, ...],
+        kwargs: Dict[str, Argument],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        if op not in self._ops_with_scalar:
+            return super().call_operator(op, args, kwargs, meta)
+
+        return super().call_operator(
+            op=self._ops_with_scalar.get(op, op),
+            args=(args[0], torch.tensor(args[1])),
+            kwargs=kwargs,
+            meta=meta,
+        )
diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py
index dc284ea2aa7..92916df311b 100644
--- a/backends/samsung/builders/__init__.py
+++ b/backends/samsung/builders/__init__.py
@@ -14,9 +14,13 @@
     op_clamp,
     op_conv2d,
     op_div,
+    op_expand_copy,
+    op_gelu,
     op_getitem,
     op_hardswish,
     op_hardtanh,
+    op_layer_norm,
+    op_leaky_relu,
     op_linear,
     op_max_pool2d,
     op_maximum,
@@ -24,16 +28,20 @@
     op_minimum,
     op_mul,
     op_permute,
+    op_pixel_shuffle,
     op_relu,
     op_reshape,
     op_rsqrt,
     op_select,
     op_slice_copy,
+    op_softmax,
     op_sqrt,
     op_squeeze,
     op_sub,
     op_to_copy,
     op_unsqueeze,
+    op_upsample_bilinear2d,
+    op_upsample_nearest2d,
 )
 
 __all__ = [
@@ -46,9 +54,13 @@
     op_clamp,
     op_conv2d,
     op_div,
+    op_expand_copy,
+    op_gelu,
     op_getitem,
     op_hardswish,
     op_hardtanh,
+    op_layer_norm,
+    op_leaky_relu,
     op_linear,
     op_max_pool2d,
     op_maximum,
@@ -56,14 +68,18 @@
     op_minimum,
     op_mul,
     op_permute,
+    op_pixel_shuffle,
     op_relu,
     op_reshape,
     op_rsqrt,
     op_select,
     op_slice_copy,
+    op_softmax,
     op_sqrt,
     op_squeeze,
     op_sub,
     op_to_copy,
     op_unsqueeze,
+    op_upsample_bilinear2d,
+    op_upsample_nearest2d,
 ]
diff --git a/backends/samsung/builders/op_expand_copy.py b/backends/samsung/builders/op_expand_copy.py
new file mode 100644
index 00000000000..f4c707b8e62
--- /dev/null
+++ b/backends/samsung/builders/op_expand_copy.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict, List
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+from executorch.backends.transforms import get_shape
+
+
+@register_node_visitor
+class ExpandVisitor(NodeVisitor):
+    target = "aten.expand_copy.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ):
+        # inputs
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+
+        in_shape = get_shape(input)
+        sizes = cast(List[int], node.args[1])
+        expand_dims = self.check_expand_dims(sizes, in_shape)
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        if len(expand_dims) == 0:
+            params = {"new_shape": [*node.meta["val"].shape]}
+            enn_graph.define_op(node.name, "RESHAPE", [input_id], [output_id], params)
+        elif len(expand_dims) == 1:
+            expand_dim = expand_dims[0]
+            params = {"axis": expand_dim}
+            enn_graph.define_op(
+                node.name,
+                "CONCAT",
+                [input_id] * sizes[expand_dim],
+                [output_id],
+                params,
+            )
+        else:
+            raise NotImplementedError("Don't support expanding at more than one axes.")
+
+    def check_expand_dims(self, sizes, in_shape):
+        expand_dims = []
+        new_size_index = len(sizes)
+        in_shape_index = len(in_shape)
+
+        while in_shape_index > 0 and new_size_index > 0:
+            in_shape_index -= 1
+            new_size_index -= 1
+            if (
+                sizes[new_size_index] == -1
+                or sizes[new_size_index] == in_shape[in_shape_index]
+            ):
+                continue
+            expand_dims.append(in_shape_index)
+
+        while new_size_index > 0:
+            new_size_index -= 1
+            assert sizes[new_size_index] == 1, "Current expand is unsupported!"
+
+        return expand_dims
diff --git a/backends/samsung/builders/op_gelu.py b/backends/samsung/builders/op_gelu.py
new file mode 100644
index 00000000000..059a3b77850
--- /dev/null
+++ b/backends/samsung/builders/op_gelu.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class GeluVisitor(NodeVisitor):
+    target = "aten.gelu.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input_id = self.define_tensor(node.args[0], enn_graph, vals_to_ids)
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        enn_graph.define_op(node.name, "GELU", [input_id], [output_id])
diff --git a/backends/samsung/builders/op_layer_norm.py b/backends/samsung/builders/op_layer_norm.py
index e2ffc6c4f5e..e6f853178d8 100644
--- a/backends/samsung/builders/op_layer_norm.py
+++ b/backends/samsung/builders/op_layer_norm.py
@@ -21,10 +21,10 @@ class LayerNormVisitor(NodeVisitor):
     target = ["aten.layer_norm.default"]
 
     def define_node(
-            self,
-            node: torch.fx.Node,
-            enn_graph: EnnGraph,
-            vals_to_ids: Dict[torch.Tensor, int],
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
     ) -> None:
         all_input_tensors = []
         input_node = node.args[0]
diff --git a/backends/samsung/builders/op_pixel_shuffle.py b/backends/samsung/builders/op_pixel_shuffle.py
new file mode 100644
index 00000000000..28259299c81
--- /dev/null
+++ b/backends/samsung/builders/op_pixel_shuffle.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import cast, Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class PixelShuffleVisitor(NodeVisitor):
+    target = "aten.pixel_shuffle.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input_id = self.define_tensor(node.args[0], enn_graph, vals_to_ids)
+
+        scale_factor = cast(int, node.args[1])
+        params = {"block_size": scale_factor, "mode": "CRD"}
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        enn_graph.define_op(
+            node.name, "DEPTH_TO_SPACE", [input_id], [output_id], params
+        )
diff --git a/backends/samsung/builders/op_softmax.py b/backends/samsung/builders/op_softmax.py
index b86870f44f8..1e2e4a378dc 100644
--- a/backends/samsung/builders/op_softmax.py
+++ b/backends/samsung/builders/op_softmax.py
@@ -22,10 +22,10 @@ def __init__(self, *args) -> None:
         super().__init__(*args)
 
     def define_node(
-            self,
-            node: torch.fx.Node,
-            enn_graph: EnnGraph,
-            vals_to_ids: Dict[torch.Tensor, int],
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
     ):
         input = node.args[0]
         input_id = self.define_tensor(input, enn_graph, vals_to_ids)
diff --git a/backends/samsung/builders/op_upsample_bilinear2d.py b/backends/samsung/builders/op_upsample_bilinear2d.py
index 9d2f6c83503..a934b2789ba 100644
--- a/backends/samsung/builders/op_upsample_bilinear2d.py
+++ b/backends/samsung/builders/op_upsample_bilinear2d.py
@@ -23,10 +23,10 @@ def __init__(self, *args) -> None:
         super().__init__(*args)
 
     def define_node(
-            self,
-            node: torch.fx.Node,
-            enn_graph: EnnGraph,
-            vals_to_ids: Dict[torch.Tensor, int],
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
     ) -> None:
         input = node.args[0]
         input_id = self.define_tensor(input, enn_graph, vals_to_ids)
@@ -35,7 +35,7 @@ def define_node(
         scale_factor = [
             output_size[0] * 1.0 / in_shape[-2],
             output_size[1] * 1.0 / in_shape[-1],
-            ]
+        ]
 
         align_corners = cast(bool, node.args[2])
         if len(node.args) > 3 and node.args[3]:
diff --git a/backends/samsung/builders/op_upsample_nearest2d.py b/backends/samsung/builders/op_upsample_nearest2d.py
index 1f59b35f202..9859cd8f07e 100644
--- a/backends/samsung/builders/op_upsample_nearest2d.py
+++ b/backends/samsung/builders/op_upsample_nearest2d.py
@@ -35,7 +35,7 @@ def define_node(
         scale_factor = [
             output_size[0] * 1.0 / in_shape[-2],
             output_size[1] * 1.0 / in_shape[-1],
-            ]
+        ]
 
         if len(node.args) > 2 and node.args[2]:
             scale_factor = cast(List[float], node.args[2])
diff --git a/backends/samsung/enn_preprocess.py b/backends/samsung/enn_preprocess.py
index 035b89c6d46..75c5f3e7a8f 100644
--- a/backends/samsung/enn_preprocess.py
+++ b/backends/samsung/enn_preprocess.py
@@ -12,6 +12,7 @@
 from executorch.backends.samsung._passes.customized_constant_prop import (
     ConstantPropPass,
 )
+from executorch.backends.samsung._passes.replace_scalar_ops import ReplaceOpsWithScalar
 from executorch.backends.samsung.builders.node_visitor import get_node_visitors
 from executorch.backends.samsung.serialization.compile_options import (
     ENN_COMPILE_OPTION_TITLE,
@@ -54,6 +55,7 @@ def preprocess(
                 ConstantPropPass(edge_program),
                 FuseBatchNormWithConvPass(edge_program),
                 AddmmToLinearTransform(),
+                ReplaceOpsWithScalar(),
                 RemoveGetItemPass(),
             ]
         )
diff --git a/backends/samsung/partition/enn_partitioner.py b/backends/samsung/partition/enn_partitioner.py
index ffb220977b4..6faeb8a44b2 100644
--- a/backends/samsung/partition/enn_partitioner.py
+++ b/backends/samsung/partition/enn_partitioner.py
@@ -32,7 +32,12 @@
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 SUPPORTED_OPS = [
+    # support because preprocess in backend
     exir_ops.edge.aten.addmm.default,
+    exir_ops.edge.aten.add.Scalar,
+    exir_ops.edge.aten.sub.Scalar,
+    exir_ops.edge.aten.mul.Scalar,
+    exir_ops.edge.aten.div.Scalar,
 ]
 
 
diff --git a/examples/samsung/aot_compiler.py b/examples/samsung/aot_compiler.py
index 85ec65c6ad7..371949cd3e4 100644
--- a/examples/samsung/aot_compiler.py
+++ b/examples/samsung/aot_compiler.py
@@ -26,7 +26,17 @@
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
 
-SUPPORT_MODEL_NAMES = ["mv2", "ic3", "ic4", "resnet18", "resnet50", "mv3", "edsr"]
+SUPPORT_MODEL_NAMES = [
+    "mv2",
+    "ic3",
+    "ic4",
+    "resnet18",
+    "resnet50",
+    "mv3",
+    "edsr",
+    "dl3",
+    "vit",
+]
 
 
 def save_tensors(tensors, prefix, artifact_dir):
@@ -81,6 +91,8 @@ def save_tensors(tensors, prefix, artifact_dir):
     model = model.eval()
     outputs = model(*example_inputs)
 
+    print("start start ...")
+
     compile_specs = [gen_samsung_backend_compile_spec(args.chipset)]
     edge = to_edge_transform_and_lower_to_enn(
         model, example_inputs, compile_specs=compile_specs

From 9172064cabfa68c5b506acd254ab416697f21bd9 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 9 Sep 2025 15:39:43 +0800
Subject: [PATCH 07/15] Support w2l float model

Convert conv1d to conv2d and support logsoftmax op.
w2l can be enabled in samsung enn backend

Co-authored-by: Jonghun Cha <jhbb.cha@samsung.com>
Co-authored-by: chong-chen <chong.chen@samsung.com>
---
 backends/samsung/_passes/conv1d_to_conv2d.py | 88 ++++++++++++++++++++
 backends/samsung/builders/__init__.py        |  2 +
 backends/samsung/builders/op_log_softmax.py  | 39 +++++++++
 backends/samsung/enn_preprocess.py           |  2 +
 examples/samsung/aot_compiler.py             |  1 +
 5 files changed, 132 insertions(+)
 create mode 100644 backends/samsung/_passes/conv1d_to_conv2d.py
 create mode 100644 backends/samsung/builders/op_log_softmax.py

diff --git a/backends/samsung/_passes/conv1d_to_conv2d.py b/backends/samsung/_passes/conv1d_to_conv2d.py
new file mode 100644
index 00000000000..57f1074b348
--- /dev/null
+++ b/backends/samsung/_passes/conv1d_to_conv2d.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir import ExportedProgram
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch._export.utils import get_param
+
+
+class Conv1dToConv2d(ExportPass):
+
+    def __init__(self, edge_program: ExportedProgram):
+        super().__init__()
+        self.edge_program = edge_program
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        graph = graph_module.graph
+        node_list = list(graph.nodes)
+        for node in node_list:
+            if node.op == "call_function":
+                if node.target == exir_ops.edge.aten.convolution.default:
+                    stride = list(node.args[3])
+                    if len(stride) != 1:
+                        continue
+
+                    # convert 3dim weight to 4dim
+                    weight_node = node.args[1]
+                    weight_3dim = get_param(self.edge_program, weight_node)
+                    weight_4dim = torch.nn.Parameter(
+                        data=weight_3dim.data.contiguous().unsqueeze(dim=-1),
+                        requires_grad=False,
+                    )
+                    parameter_name = (
+                        self.edge_program.graph_signature.inputs_to_parameters[
+                            weight_node.name
+                        ]
+                    )
+                    self.edge_program.state_dict[parameter_name] = weight_4dim
+                    weight_node.meta["val"] = weight_node.meta["val"].data.unsqueeze(
+                        dim=-1
+                    )
+
+                    # Extend stride, padding, and dilation
+                    node.args = (
+                        node.args[0],
+                        node.args[1],
+                        node.args[2],
+                        node.args[3] + [1],  # stride
+                        node.args[4] + [0],  # padding
+                        node.args[5] + [1],  # dilation
+                        node.args[6],
+                        node.args[7],
+                        node.args[8],
+                    )
+
+                    # unsqueeze -> conv2d -> squeeze
+                    with graph.inserting_before(node):
+                        input_node = node.args[0]
+                        unsqueeze_before = graph.create_node(
+                            "call_function", exir_ops.edge.aten.unsqueeze_copy.default
+                        )
+                        unsqueeze_before.args = (
+                            input_node,
+                            -1,
+                        )
+                        node.replace_input_with(input_node, unsqueeze_before)
+
+                    with graph.inserting_after(node):
+                        squeeze_after = graph.create_node(
+                            "call_function", exir_ops.edge.aten.squeeze_copy.dims
+                        )
+                        squeeze_after.args = (
+                            node,
+                            [-1],
+                        )
+                        original_users = [
+                            user for user in node.users if user != squeeze_after
+                        ]
+                        for user in original_users:
+                            user.replace_input_with(node, squeeze_after)
+
+        graph_module.recompile()
+        graph_module = super().call(graph_module).graph_module
+        return PassResult(graph_module, True)
diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py
index 92916df311b..8f9b2d7d8c4 100644
--- a/backends/samsung/builders/__init__.py
+++ b/backends/samsung/builders/__init__.py
@@ -22,6 +22,7 @@
     op_layer_norm,
     op_leaky_relu,
     op_linear,
+    op_log_softmax,
     op_max_pool2d,
     op_maximum,
     op_mean_dim,
@@ -62,6 +63,7 @@
     op_layer_norm,
     op_leaky_relu,
     op_linear,
+    op_log_softmax,
     op_max_pool2d,
     op_maximum,
     op_mean_dim,
diff --git a/backends/samsung/builders/op_log_softmax.py b/backends/samsung/builders/op_log_softmax.py
new file mode 100644
index 00000000000..f2d87601cbb
--- /dev/null
+++ b/backends/samsung/builders/op_log_softmax.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class LogSoftmaxVisitor(NodeVisitor):
+    target = "aten._log_softmax.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ):
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        axis = cast(int, node.args[1])
+        meta_data = {"axis": axis}
+
+        enn_graph.define_op(node.name, "LOGSOFTMAX", [input_id], [output_id], meta_data)
diff --git a/backends/samsung/enn_preprocess.py b/backends/samsung/enn_preprocess.py
index 75c5f3e7a8f..dde01bc09c7 100644
--- a/backends/samsung/enn_preprocess.py
+++ b/backends/samsung/enn_preprocess.py
@@ -9,6 +9,7 @@
 
 import executorch.backends.samsung.python.PyEnnWrapperAdaptor as PyEnnWrapper
 import torch
+from executorch.backends.samsung._passes.conv1d_to_conv2d import Conv1dToConv2d
 from executorch.backends.samsung._passes.customized_constant_prop import (
     ConstantPropPass,
 )
@@ -53,6 +54,7 @@ def preprocess(
         enn_preprocess_passes = PassManager(
             passes=[
                 ConstantPropPass(edge_program),
+                Conv1dToConv2d(edge_program),
                 FuseBatchNormWithConvPass(edge_program),
                 AddmmToLinearTransform(),
                 ReplaceOpsWithScalar(),
diff --git a/examples/samsung/aot_compiler.py b/examples/samsung/aot_compiler.py
index 371949cd3e4..380e7f5b258 100644
--- a/examples/samsung/aot_compiler.py
+++ b/examples/samsung/aot_compiler.py
@@ -36,6 +36,7 @@
     "edsr",
     "dl3",
     "vit",
+    "w2l",
 ]
 
 

From 35b20727371771a00ce288136ca4c999dacf5426 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 9 Sep 2025 19:24:54 +0800
Subject: [PATCH 08/15] Support bert float model (finetune)

Support the ops of bert float model (finetune)

Co-authored-by: chong-chen <chong.chen@samsung.com>
---
 backends/samsung/builders/__init__.py         |   4 +
 .../samsung/builders/op_constant_pad_nd.py    |  56 ++++
 backends/samsung/builders/op_embedding.py     |  41 +++
 examples/samsung/aot_compiler.py              |  20 +-
 .../samsung/scripts/mobilebert_finetune.py    | 267 ++++++++++++++++++
 examples/samsung/utils.py                     |  20 ++
 6 files changed, 389 insertions(+), 19 deletions(-)
 create mode 100644 backends/samsung/builders/op_constant_pad_nd.py
 create mode 100644 backends/samsung/builders/op_embedding.py
 create mode 100644 examples/samsung/scripts/mobilebert_finetune.py
 create mode 100644 examples/samsung/utils.py

diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py
index 8f9b2d7d8c4..02a457fd06e 100644
--- a/backends/samsung/builders/__init__.py
+++ b/backends/samsung/builders/__init__.py
@@ -12,8 +12,10 @@
     op_bmm,
     op_cat,
     op_clamp,
+    op_constant_pad_nd,
     op_conv2d,
     op_div,
+    op_embedding,
     op_expand_copy,
     op_gelu,
     op_getitem,
@@ -54,7 +56,9 @@
     op_cat,
     op_clamp,
     op_conv2d,
+    op_constant_pad_nd,
     op_div,
+    op_embedding,
     op_expand_copy,
     op_gelu,
     op_getitem,
diff --git a/backends/samsung/builders/op_constant_pad_nd.py b/backends/samsung/builders/op_constant_pad_nd.py
new file mode 100644
index 00000000000..cc7cdc5751b
--- /dev/null
+++ b/backends/samsung/builders/op_constant_pad_nd.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict, List
+
+import numpy as np
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+from executorch.backends.transforms import get_shape
+
+
+@register_node_visitor
+class ConstantPadNDVisitor(NodeVisitor):
+    target = "aten.constant_pad_nd.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        input = node.args[0]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+
+        # torch padding order starts from the last axis, change the order to fit samsung lite-core
+        paddings = np.reshape(cast(List[int], node.args[1]), (-1, 2))[::-1].astype(
+            np.uint32
+        )
+        in_shape = get_shape(input)
+        paddings = paddings.reshape(-1).tolist()
+        paddings = [0] * (2 * len(in_shape) - len(paddings)) + paddings
+        paddings = paddings[::2] + paddings[1::2]
+
+        padding_value = node.args[2]
+        assert padding_value == 0.0, "Only Support pad constant 0 now."
+        # output
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        params = {
+            "explicit_padding": paddings,
+            "padding": "EXPLICIT",
+            "padding_type": "CONSTANT",
+        }
+
+        enn_graph.define_op(node.name, "PAD", [input_id], [output_id], params)
diff --git a/backends/samsung/builders/op_embedding.py b/backends/samsung/builders/op_embedding.py
new file mode 100644
index 00000000000..f37c46a56d6
--- /dev/null
+++ b/backends/samsung/builders/op_embedding.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+
+
+@register_node_visitor
+class EmbeddingVisitor(NodeVisitor):
+    target = "aten.embedding.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        weight_node = node.args[0]
+        weight_id = self.define_tensor(weight_node, enn_graph, vals_to_ids)
+
+        input = node.args[1]
+        input_id = self.define_tensor(input, enn_graph, vals_to_ids)
+
+        output_id = self.define_tensor(node, enn_graph, vals_to_ids)
+
+        params = {"axis": 0, "input_type": "indices"}
+        enn_graph.define_op(
+            node.name, "GATHER", [input_id, weight_id], [output_id], params
+        )
diff --git a/examples/samsung/aot_compiler.py b/examples/samsung/aot_compiler.py
index 380e7f5b258..210f15293bb 100644
--- a/examples/samsung/aot_compiler.py
+++ b/examples/samsung/aot_compiler.py
@@ -5,11 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
-import collections
 import logging
-import os
-
-import torch
 
 from executorch.backends.samsung.serialization.compile_options import (
     gen_samsung_backend_compile_spec,
@@ -17,6 +13,7 @@
 from executorch.backends.samsung.utils.export_utils import (
     to_edge_transform_and_lower_to_enn,
 )
+from executorch.examples.samsung.utils import save_tensors
 from executorch.exir import ExecutorchBackendConfig
 from executorch.extension.export_util.utils import save_pte_program
 
@@ -40,21 +37,6 @@
 ]
 
 
-def save_tensors(tensors, prefix, artifact_dir):
-    if isinstance(tensors, tuple):
-        for index, output in enumerate(tensors):
-            save_path = prefix + "_" + str(index) + ".bin"
-            output.detach().numpy().tofile(os.path.join(artifact_dir, save_path))
-    elif isinstance(tensors, torch.Tensor):
-        tensors.detach().numpy().tofile(os.path.join(artifact_dir, prefix + ".bin"))
-    elif isinstance(tensors, collections.OrderedDict):
-        for index, output in enumerate(tensors.values()):
-            save_path = prefix + "_" + str(index) + ".bin"
-            output.detach().numpy().tofile(os.path.join(artifact_dir, save_path))
-    else:
-        logging.warning("Unsupported type (", type(tensors), ") skip saving tensor. ")
-
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
diff --git a/examples/samsung/scripts/mobilebert_finetune.py b/examples/samsung/scripts/mobilebert_finetune.py
new file mode 100644
index 00000000000..78653142f0a
--- /dev/null
+++ b/examples/samsung/scripts/mobilebert_finetune.py
@@ -0,0 +1,267 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+from pathlib import Path
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.utils.export_utils import (
+    to_edge_transform_and_lower_to_enn,
+)
+from executorch.examples.samsung.utils import save_tensors
+from executorch.exir import ExecutorchBackendConfig
+from executorch.extension.export_util.utils import save_pte_program
+from transformers import AutoTokenizer, MobileBertForSequenceClassification
+
+
+# Output from pretrained model exceeds the representation scale of half-float.
+# Finetune bert model on specific task and make output more reasonable for hardware.
+# Here is an example.
+class MobileBertFinetune:
+    def __init__(self):
+        self.tokenizer = self.load_tokenizer()
+
+    def load_tokenizer(self):
+        return AutoTokenizer.from_pretrained("google/mobilebert-uncased")
+
+    def get_example_inputs(self):
+        encoding = self.tokenizer.encode_plus(
+            "Hello, my dog is cute",
+            add_special_tokens=True,
+            max_length=128,
+            return_token_type_ids=False,
+            return_attention_mask=True,
+            truncation=True,
+            return_tensors="pt",
+            padding="max_length",
+        )
+
+        return (
+            encoding["input_ids"],
+            encoding["attention_mask"].to(torch.float32),
+        )
+
+    def build_loader_from_dataset(self, dataset, batch_size, usage="train"):
+        """
+        :param data: Provide dataset in pandas table type. The header names should be ['text', 'label'],
+                    and label range from 0 (include) to total number of classification kinds (not include).
+                    For example:
+                    index          text                                                label
+                        0     despite its title , punch drunk love is never heavy handed    1
+                        1     at once half baked and overheated                             0
+                        2     this is a shameless sham, ...                                 0
+                        ...
+        :param batch_size: Size of data fetch in one batch.
+        :param usage: The type of dataset which is used to build dataloader, like train, val.
+        :return: dataloader
+        """
+        from torch.utils.data import (
+            DataLoader,
+            RandomSampler,
+            SequentialSampler,
+            TensorDataset,
+        )
+
+        encoded_dataset = self.tokenizer.batch_encode_plus(
+            dataset.text.values.tolist(),
+            return_attention_mask=True,
+            truncation=True,
+            padding="max_length",
+            max_length=128,
+            return_tensors="pt",
+        )
+
+        labels = torch.tensor(dataset.label.values.tolist())
+
+        tensor_dataset = TensorDataset(
+            encoded_dataset["input_ids"], encoded_dataset["attention_mask"], labels
+        )
+        data_loader = None
+        if usage == "train":
+            data_loader = DataLoader(
+                tensor_dataset,
+                sampler=RandomSampler(tensor_dataset),
+                batch_size=batch_size,
+            )
+        elif usage == "val":
+            data_loader = DataLoader(
+                tensor_dataset,
+                sampler=SequentialSampler(tensor_dataset),
+                batch_size=batch_size,
+                drop_last=True,
+            )
+        else:
+            raise NotImplementedError(
+                f"Unsupported `{usage}` dataset for building dataloader."
+            )
+
+        return data_loader
+
+    def get_finetune_mobilebert(self, artifacts_dir):
+        # Pretrained bert's output ranges in a large scale. It is challenge for enn backend to support directly.
+        # Please finetune mobilebert on specific tasks, make sure that bert's output and hidden states are friendly
+        # to resource-constraint device.
+        from io import BytesIO
+
+        import pandas as pd
+        import requests
+
+        from tqdm import tqdm
+        from transformers import get_linear_schedule_with_warmup
+
+        # sentiment classification
+        train_url = (
+            "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/"
+            "refs/heads/master/data/SST2/train.tsv"
+        )
+        content = requests.get(train_url, allow_redirects=True).content
+        train_data = pd.read_csv(
+            BytesIO(content), delimiter="\t", header=None, names=["text", "label"]
+        )
+        labels_set = train_data.label.unique()
+
+        train_data_loader = self.build_loader_from_dataset(
+            train_data, batch_size=64, usage="train"
+        )
+
+        val_url = (
+            "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/"
+            "refs/heads/master/data/SST2/test.tsv"
+        )
+        content = requests.get(val_url, allow_redirects=True).content
+        val_data = pd.read_csv(
+            BytesIO(content), delimiter="\t", header=None, names=["text", "label"]
+        )
+        val_data_loader = self.build_loader_from_dataset(
+            val_data, batch_size=64, usage="val"
+        )
+
+        artifacts_dir = artifacts_dir if artifacts_dir is not None else "./mobilebert"
+        need_finetune = True
+        os.makedirs(artifacts_dir, exist_ok=True)
+        pretrained_required_files = ["config.json", "model.safetensors"]
+        path = Path(artifacts_dir)
+        if (path / pretrained_required_files[0]).exists() and (
+            path / pretrained_required_files[1]
+        ).exists():
+            need_finetune = False
+
+        # get pre-trained mobilebert
+        model = MobileBertForSequenceClassification.from_pretrained(
+            "google/mobilebert-uncased" if need_finetune else artifacts_dir,
+            num_labels=len(labels_set),
+            return_dict=False,
+        )
+
+        if not need_finetune:
+            return model.eval(), val_data_loader
+
+        num_epochs = 5
+        num_train_steps = len(train_data_loader) * num_epochs
+
+        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
+        scheduler = get_linear_schedule_with_warmup(
+            optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
+        )
+
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model.to(device)
+        for epoch in range(1, num_epochs + 1):
+            print(f"Epoch {epoch}")
+            model.train()
+            for batch in tqdm(train_data_loader):
+                texts, attention_mask, labels = batch
+                texts = texts.to(device)
+                labels = labels.to(device)
+
+                loss = model(texts, attention_mask=attention_mask, labels=labels)[0]
+                # backward
+                optimizer.zero_grad()
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+                optimizer.step()
+                # update learning rate
+                scheduler.step()
+        model.to("cpu")
+
+        model.save_pretrained(artifacts_dir)
+
+        return model.eval(), val_data_loader
+
+    def validate(self, model, val_data_loader):
+        model.eval()
+        total_loss = 0
+        correct = 0
+        total = 0
+        with torch.no_grad():
+            for batch in val_data_loader:
+                texts, attention_mask, labels = batch
+
+                loss, output = model(
+                    texts, attention_mask=attention_mask, labels=labels
+                )
+                total_loss += loss.item()
+                predictions = torch.argmax(output, dim=1)
+                correct += (predictions == labels).sum().item()
+                total += labels.size(0)
+
+        val_loss, val_accuracy = total_loss / len(val_data_loader), correct / total
+        print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "-c",
+        "--chipset",
+        required=True,
+        help="Samsung chipset, i.e. E9945, E9955, etc",
+        type=str,
+    )
+    parser.add_argument(
+        "-a",
+        "--artifact",
+        help="path for storing generated artifacts by this example.",
+        default="./mobilebert",
+        type=str,
+    )
+    parser.add_argument(
+        "--dump",
+        default=False,
+        action="store_true",
+        help=("Whether to dump all outputs. If not set, we only dump pte."),
+    )
+    args = parser.parse_args()
+    # ensure the working directory exist.
+    os.makedirs(args.artifact, exist_ok=True)
+
+    mobilebert_finetune = MobileBertFinetune()
+    model, val_dataset = mobilebert_finetune.get_finetune_mobilebert(args.artifact)
+    mobilebert_finetune.validate(model, val_dataset)
+
+    example_inputs = mobilebert_finetune.get_example_inputs()
+    output = model(*example_inputs)
+
+    compile_specs = [gen_samsung_backend_compile_spec(args.chipset)]
+    edge = to_edge_transform_and_lower_to_enn(
+        model, example_inputs, compile_specs=compile_specs
+    )
+    model_name = "mobilebert_exynos_fp32"
+    exec_prog = edge.to_executorch(
+        config=ExecutorchBackendConfig(extract_delegate_segments=True)
+    )
+    save_pte_program(exec_prog, model_name, args.artifact)
+
+    if args.dump:
+        # Expect example inputs are tuple, including input ids and attn mask
+        save_tensors(example_inputs, prefix="float_input", artifact_dir=args.artifact)
+        save_tensors(output, prefix="float_output", artifact_dir=args.artifact)
diff --git a/examples/samsung/utils.py b/examples/samsung/utils.py
new file mode 100644
index 00000000000..4f2cc56086c
--- /dev/null
+++ b/examples/samsung/utils.py
@@ -0,0 +1,20 @@
+import collections
+import logging
+import os
+
+import torch
+
+
+def save_tensors(tensors, prefix, artifact_dir):
+    if isinstance(tensors, tuple):
+        for index, output in enumerate(tensors):
+            save_path = prefix + "_" + str(index) + ".bin"
+            output.detach().numpy().tofile(os.path.join(artifact_dir, save_path))
+    elif isinstance(tensors, torch.Tensor):
+        tensors.detach().numpy().tofile(os.path.join(artifact_dir, prefix + ".bin"))
+    elif isinstance(tensors, collections.OrderedDict):
+        for index, output in enumerate(tensors.values()):
+            save_path = prefix + "_" + str(index) + ".bin"
+            output.detach().numpy().tofile(os.path.join(artifact_dir, save_path))
+    else:
+        logging.warning("Unsupported type (", type(tensors), ") skip saving tensor. ")

From 5a909e15ebc45699d0af2a79cb0d80a4dd1e2253 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 9 Sep 2025 19:47:56 +0800
Subject: [PATCH 09/15] Support ops test

Implement test for each op supported in builders

Co-authored-by: chong-chen <chong.chen@samsung.com>
Co-authored-by: xz-linghu <xz.linghu@samsung.com>
Co-authored-by: Jonghun Cha <jhbb.cha@samsung.com>
Co-authored-by: jingya-zhang <jingya.zhang@samsung.com>
---
 backends/samsung/partition/enn_partitioner.py |  1 +
 backends/samsung/test/ops/test_batch_norm.py  | 49 ++++++++++++++
 backends/samsung/test/ops/test_bmm.py         | 47 ++++++++++++++
 backends/samsung/test/ops/test_cat.py         | 48 ++++++++++++++
 backends/samsung/test/ops/test_clamp.py       | 50 ++++++++++++++
 .../samsung/test/ops/test_constant_pad_nd.py  | 43 ++++++++++++
 backends/samsung/test/ops/test_div.py         | 48 ++++++++++++++
 backends/samsung/test/ops/test_embedding.py   | 43 ++++++++++++
 backends/samsung/test/ops/test_expand_copy.py | 44 +++++++++++++
 backends/samsung/test/ops/test_gelu.py        | 63 ++++++++++++++++++
 backends/samsung/test/ops/test_leaky_relu.py  | 45 +++++++++++++
 backends/samsung/test/ops/test_linear.py      | 48 ++++++++++++++
 backends/samsung/test/ops/test_log_softmax.py | 44 +++++++++++++
 backends/samsung/test/ops/test_mean_dim.py    | 50 ++++++++++++++
 backends/samsung/test/ops/test_minimum.py     | 43 ++++++++++++
 backends/samsung/test/ops/test_permute.py     | 53 +++++++++++++++
 .../samsung/test/ops/test_pixel_shuffle.py    | 48 ++++++++++++++
 backends/samsung/test/ops/test_relu.py        | 65 +++++++++++++++++++
 backends/samsung/test/ops/test_reshape.py     | 44 +++++++++++++
 backends/samsung/test/ops/test_rsqrt.py       | 44 +++++++++++++
 backends/samsung/test/ops/test_select.py      | 51 +++++++++++++++
 backends/samsung/test/ops/test_slice_copy.py  | 44 +++++++++++++
 backends/samsung/test/ops/test_softmax.py     | 46 +++++++++++++
 backends/samsung/test/ops/test_sqrt.py        | 45 +++++++++++++
 backends/samsung/test/ops/test_squeeze.py     | 47 ++++++++++++++
 backends/samsung/test/ops/test_sub.py         | 62 ++++++++++++++++++
 backends/samsung/test/ops/test_to_copy.py     | 48 ++++++++++++++
 backends/samsung/test/ops/test_unsqueeze.py   | 50 ++++++++++++++
 .../test/ops/test_upsample_bilinear2d.py      | 53 +++++++++++++++
 .../test/ops/test_upsample_nearest2d.py       | 53 +++++++++++++++
 .../samsung/test/tester/samsung_tester.py     |  5 +-
 31 files changed, 1421 insertions(+), 3 deletions(-)
 create mode 100644 backends/samsung/test/ops/test_batch_norm.py
 create mode 100644 backends/samsung/test/ops/test_bmm.py
 create mode 100644 backends/samsung/test/ops/test_cat.py
 create mode 100644 backends/samsung/test/ops/test_clamp.py
 create mode 100644 backends/samsung/test/ops/test_constant_pad_nd.py
 create mode 100644 backends/samsung/test/ops/test_div.py
 create mode 100644 backends/samsung/test/ops/test_embedding.py
 create mode 100644 backends/samsung/test/ops/test_expand_copy.py
 create mode 100644 backends/samsung/test/ops/test_gelu.py
 create mode 100644 backends/samsung/test/ops/test_leaky_relu.py
 create mode 100644 backends/samsung/test/ops/test_linear.py
 create mode 100644 backends/samsung/test/ops/test_log_softmax.py
 create mode 100644 backends/samsung/test/ops/test_mean_dim.py
 create mode 100644 backends/samsung/test/ops/test_minimum.py
 create mode 100644 backends/samsung/test/ops/test_permute.py
 create mode 100644 backends/samsung/test/ops/test_pixel_shuffle.py
 create mode 100644 backends/samsung/test/ops/test_relu.py
 create mode 100644 backends/samsung/test/ops/test_reshape.py
 create mode 100644 backends/samsung/test/ops/test_rsqrt.py
 create mode 100644 backends/samsung/test/ops/test_select.py
 create mode 100644 backends/samsung/test/ops/test_slice_copy.py
 create mode 100644 backends/samsung/test/ops/test_softmax.py
 create mode 100644 backends/samsung/test/ops/test_sqrt.py
 create mode 100644 backends/samsung/test/ops/test_squeeze.py
 create mode 100644 backends/samsung/test/ops/test_sub.py
 create mode 100644 backends/samsung/test/ops/test_to_copy.py
 create mode 100644 backends/samsung/test/ops/test_unsqueeze.py
 create mode 100644 backends/samsung/test/ops/test_upsample_bilinear2d.py
 create mode 100644 backends/samsung/test/ops/test_upsample_nearest2d.py

diff --git a/backends/samsung/partition/enn_partitioner.py b/backends/samsung/partition/enn_partitioner.py
index 6faeb8a44b2..952cb000429 100644
--- a/backends/samsung/partition/enn_partitioner.py
+++ b/backends/samsung/partition/enn_partitioner.py
@@ -128,5 +128,6 @@ def ops_to_not_decompose(
             torch.ops.aten.upsample_nearest2d.vec,
             torch.ops.aten.prelu.default,
             torch.ops.aten.layer_norm.default,
+            torch.ops.aten.pixel_shuffle.default,
         ]
         return (ops_not_to_decompose, None)
diff --git a/backends/samsung/test/ops/test_batch_norm.py b/backends/samsung/test/ops/test_batch_norm.py
new file mode 100644
index 00000000000..258205f62a6
--- /dev/null
+++ b/backends/samsung/test/ops/test_batch_norm.py
@@ -0,0 +1,49 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class BatchNorm(torch.nn.Module):
+    def __init__(self, num_features: int) -> None:
+        super().__init__()
+        self.num_features = num_features
+        self.bn = torch.nn.BatchNorm2d(num_features=self.num_features)
+        self.bn.weight.data.uniform_(-0.1, 0.1)
+        self.bn.bias.data.uniform_(-0.1, 0.1)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.bn(x)
+
+
+class TestBatchNorm(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(module, inputs, [gen_samsung_backend_compile_spec("E9955")])
+        (
+            tester.export()
+            .to_edge_transform_and_lower()
+            .check_not(
+            [
+                "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default"
+            ]
+            )
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+        )
+
+    def test_fp32_batch_norm(self):
+        num_features = 16
+        inputs = (torch.randn(4, num_features, 32, 32),)
+        self._test(BatchNorm(num_features), inputs)
diff --git a/backends/samsung/test/ops/test_bmm.py b/backends/samsung/test/ops/test_bmm.py
new file mode 100644
index 00000000000..8d04f83c79a
--- /dev/null
+++ b/backends/samsung/test/ops/test_bmm.py
@@ -0,0 +1,47 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+class BatchMatmul(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def get_example_inputs(self) -> tuple[torch.Tensor]:
+        input_1 = torch.randn(2, 16, 56)
+        input_2 = torch.randn(2, 56, 32)
+        return (input_1, input_2)
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return torch.bmm(x, y)
+
+
+
+class TestBatchMatmul(unittest.TestCase):
+    def _test(self, module: torch.nn.Module):
+        inputs = module.get_example_inputs()
+        tester = SamsungTester(module, inputs, [gen_samsung_backend_compile_spec("E9955")])
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.bmm.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_bmm_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    @unittest.skip("Temporarily disable it because TOT codes not well prepared.")
+    def test_fp32_bmm(self):
+        self._test(BatchMatmul())
diff --git a/backends/samsung/test/ops/test_cat.py b/backends/samsung/test/ops/test_cat.py
new file mode 100644
index 00000000000..522ae4c8586
--- /dev/null
+++ b/backends/samsung/test/ops/test_cat.py
@@ -0,0 +1,48 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Concat(torch.nn.Module):
+    def __init__(self, axis) -> None:
+        super().__init__()
+        self.axis = axis
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return torch.cat((x, y), dim=self.axis)
+
+
+class TestConcat(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,  inputs, [gen_samsung_backend_compile_spec("E9955")]
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.cat.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_cat_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_concat_on_axis1(self):
+        inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8))
+        self._test(Concat(axis=1), inputs)
+
+    def test_fp32_concat_on_axis3(self):
+        inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8))
+        self._test(Concat(axis=3), inputs)
diff --git a/backends/samsung/test/ops/test_clamp.py b/backends/samsung/test/ops/test_clamp.py
new file mode 100644
index 00000000000..773954863d8
--- /dev/null
+++ b/backends/samsung/test/ops/test_clamp.py
@@ -0,0 +1,50 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Clamp(torch.nn.Module):
+    def __init__(
+        self,
+        minimum=0.0,
+        maximum=0.0,
+    ) -> None:
+        super().__init__()
+        self.minimum = minimum
+        self.maximum = maximum
+
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.clamp(x, self.minimum, self.maximum)
+
+
+class TestClamp(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.clamp.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_clamp(self):
+        inputs = (torch.randn(1, 16, 8, 8),)
+        self._test(Clamp(minimum=0, maximum=2.), inputs)
diff --git a/backends/samsung/test/ops/test_constant_pad_nd.py b/backends/samsung/test/ops/test_constant_pad_nd.py
new file mode 100644
index 00000000000..c985fafb8ac
--- /dev/null
+++ b/backends/samsung/test/ops/test_constant_pad_nd.py
@@ -0,0 +1,43 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class ConstantPadND(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        pad = (0, 0, 1, 1)
+        return torch.nn.functional.pad(x, pad, mode="constant", value=0)
+
+
+class TestConstantPadND(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(module, inputs, [gen_samsung_backend_compile_spec("E9955")])
+        (
+            tester.export()
+                .to_edge_transform_and_lower()
+                .check_not(
+                ["executorch_exir_dialects_edge__ops_aten_constant_pad_nd_default"]
+            )
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_constant_pad_nd(self):
+        inputs = (torch.randn(1, 6, 8, 16),)
+        self._test(ConstantPadND(), inputs)
diff --git a/backends/samsung/test/ops/test_div.py b/backends/samsung/test/ops/test_div.py
new file mode 100644
index 00000000000..031da0230a1
--- /dev/null
+++ b/backends/samsung/test/ops/test_div.py
@@ -0,0 +1,48 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Div(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x / y
+
+class TestDiv(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.div.Tensor": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_simple_div(self):
+        inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3)
+        self._test(Div(), inputs)
+
+    def test_fp32_div_broadcast(self):
+        inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3)
+        self._test(Div(), inputs)
diff --git a/backends/samsung/test/ops/test_embedding.py b/backends/samsung/test/ops/test_embedding.py
new file mode 100644
index 00000000000..d45797bf8a8
--- /dev/null
+++ b/backends/samsung/test/ops/test_embedding.py
@@ -0,0 +1,43 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Embedding(torch.nn.Module):
+    def __init__(self, num_embeddings, embedding_dim=256) -> None:
+        super().__init__()
+        self.embedding = torch.nn.Embedding(num_embeddings, embedding_dim)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.embedding(x)
+
+
+class TestEmbedding(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(module, inputs, [gen_samsung_backend_compile_spec("E9955")])
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.embedding.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_embedding_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_embedding(self):
+        num_embeddings = 2048
+        inputs = (torch.randint(0, num_embeddings, (1, 64), dtype=torch.int32),)
+        self._test(Embedding(num_embeddings=num_embeddings), inputs)
diff --git a/backends/samsung/test/ops/test_expand_copy.py b/backends/samsung/test/ops/test_expand_copy.py
new file mode 100644
index 00000000000..5781904e5d7
--- /dev/null
+++ b/backends/samsung/test/ops/test_expand_copy.py
@@ -0,0 +1,44 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class ExpandCopy(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x.expand(1, 16, -1, -1)
+
+
+class TestExpand(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.expand.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_expand_copy_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_expand_copy(self):
+        inputs = (torch.randn(1, 1, 8, 8),)
+        self._test(ExpandCopy(), inputs)
diff --git a/backends/samsung/test/ops/test_gelu.py b/backends/samsung/test/ops/test_gelu.py
new file mode 100644
index 00000000000..41994728bd6
--- /dev/null
+++ b/backends/samsung/test/ops/test_gelu.py
@@ -0,0 +1,63 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class GELU(torch.nn.Module):
+    def __init__(self, with_conv=False) -> None:
+        super().__init__()
+        self.module = (
+            torch.nn.Sequential(
+                torch.nn.Conv2d(
+                    in_channels=3,
+                    out_channels=16,
+                    kernel_size=3,
+                    stride=(2, 2),
+                    padding=(1, 1),
+                    dilation=(1, 1),
+                ).to(torch.float),
+                torch.nn.GELU(),
+            )
+            if with_conv
+            else torch.nn.GELU()
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.module(x)
+
+
+class TestGELU(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.gelu.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_gelu_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_single_gelu(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(GELU(with_conv=False), inputs)
+
+    def test_fp32_conv_gelu(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(GELU(with_conv=True), inputs)
diff --git a/backends/samsung/test/ops/test_leaky_relu.py b/backends/samsung/test/ops/test_leaky_relu.py
new file mode 100644
index 00000000000..31f0ae7167e
--- /dev/null
+++ b/backends/samsung/test/ops/test_leaky_relu.py
@@ -0,0 +1,45 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class LeakyReLU(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.module = torch.nn.LeakyReLU()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.module(x)
+
+
+class TestLeakyReLU(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.leaky_relu.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_leaky_relu_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_leaky_relu(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(LeakyReLU(), inputs)
diff --git a/backends/samsung/test/ops/test_linear.py b/backends/samsung/test/ops/test_linear.py
new file mode 100644
index 00000000000..15ac3e75224
--- /dev/null
+++ b/backends/samsung/test/ops/test_linear.py
@@ -0,0 +1,48 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Linear(torch.nn.Module):
+    def __init__(self, in_features) -> None:
+        super().__init__()
+        self.module = torch.nn.Linear(in_features, 8)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.module(x)
+
+
+
+class TestLinear(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.linear.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_linear_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+                .run_method_and_compare_outputs()
+        )
+
+    def test_fp32_linear(self):
+        in_num_features = 24
+        inputs = (torch.randn(128, in_num_features),)
+        self._test(Linear(in_num_features), inputs)
\ No newline at end of file
diff --git a/backends/samsung/test/ops/test_log_softmax.py b/backends/samsung/test/ops/test_log_softmax.py
new file mode 100644
index 00000000000..0a6c46d2448
--- /dev/null
+++ b/backends/samsung/test/ops/test_log_softmax.py
@@ -0,0 +1,44 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class LogSoftmax(torch.nn.Module):
+    def __init__(self, dim) -> None:
+        super().__init__()
+        self.module = torch.nn.LogSoftmax(dim=dim)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.module(x)
+
+
+class TestLogSoftmax(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten__log_softmax_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_log_softmax(self):
+        inputs = (torch.randn(1, 16, 56, 56),)
+        self._test(LogSoftmax(dim=1), inputs)
diff --git a/backends/samsung/test/ops/test_mean_dim.py b/backends/samsung/test/ops/test_mean_dim.py
new file mode 100644
index 00000000000..e3c6ff64c25
--- /dev/null
+++ b/backends/samsung/test/ops/test_mean_dim.py
@@ -0,0 +1,50 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+
+class MeanDim(torch.nn.Module):
+    def __init__(self, keep_dims=True) -> None:
+        super().__init__()
+        self.keep_dims = keep_dims
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.mean(x, dim=[2, 3], keepdim=self.keep_dims)
+
+
+class TestMeanDim(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.mean.dim": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_mean_dim"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_mean_with_keep_dims(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(MeanDim(), inputs)
+
+    def test_fp32_mean_without_keep_dims(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(MeanDim(keep_dims=False), inputs)
\ No newline at end of file
diff --git a/backends/samsung/test/ops/test_minimum.py b/backends/samsung/test/ops/test_minimum.py
new file mode 100644
index 00000000000..4de45a2da67
--- /dev/null
+++ b/backends/samsung/test/ops/test_minimum.py
@@ -0,0 +1,43 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Minimum(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return torch.min(x, y)
+
+
+class TestMinimum(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_minimum_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_minimum(self):
+        inputs = (torch.randn(1, 8, 16, 16), torch.randn(1, 8, 16, 16))
+        self._test(Minimum(), inputs)
\ No newline at end of file
diff --git a/backends/samsung/test/ops/test_permute.py b/backends/samsung/test/ops/test_permute.py
new file mode 100644
index 00000000000..5f7e7e7185c
--- /dev/null
+++ b/backends/samsung/test/ops/test_permute.py
@@ -0,0 +1,53 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Permute(torch.nn.Module):
+    def __init__(self, order=None) -> None:
+        super().__init__()
+        self.order = order
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.permute(x, self.order)
+
+
+class TestPermute(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.permute.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_permute_0231(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(Permute(order=[0, 2, 3, 1]), inputs)
+
+    def test_fp32_permute_0312(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(Permute(order=[0, 3, 1, 2]), inputs)
+
+    def test_fp32_permute_0321(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(Permute(order=[0, 3, 2, 1]), inputs)
diff --git a/backends/samsung/test/ops/test_pixel_shuffle.py b/backends/samsung/test/ops/test_pixel_shuffle.py
new file mode 100644
index 00000000000..42afbe38870
--- /dev/null
+++ b/backends/samsung/test/ops/test_pixel_shuffle.py
@@ -0,0 +1,48 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+
+class PixelShuffle(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.module = torch.nn.PixelShuffle(upscale_factor=3)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.module(x)
+
+
+class TestPixelShuffle(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.pixel_shuffle.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(
+                ["executorch_exir_dialects_edge__ops_aten_pixel_shuffle_default"]
+            )
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+        )
+
+    def test_fp32_pixel_shuffle(self):
+        inputs = (torch.randn(1, 9, 4, 4),)
+        self._test(PixelShuffle(), inputs)
diff --git a/backends/samsung/test/ops/test_relu.py b/backends/samsung/test/ops/test_relu.py
new file mode 100644
index 00000000000..1abaacadb92
--- /dev/null
+++ b/backends/samsung/test/ops/test_relu.py
@@ -0,0 +1,65 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class ReLU(torch.nn.Module):
+    def __init__(self, with_conv=False) -> None:
+        super().__init__()
+        self.module = (
+            torch.nn.Sequential(
+                torch.nn.Conv2d(
+                    in_channels=3,
+                    out_channels=16,
+                    kernel_size=3,
+                    stride=(2, 2),
+                    padding=(1, 1),
+                    dilation=(1, 1),
+                ).to(torch.float),
+                torch.nn.ReLU(),
+            )
+            if with_conv
+            else torch.nn.ReLU()
+        )
+
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.module(x)
+
+
+class TestReLU(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.relu.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+
+    def test_fp32_single_relu(self):
+        inputs = (torch.randn(1, 3, 56, 56),)
+        self._test(ReLU(with_conv=False), inputs)
+
+    def test_fp32_conv_relu(self):
+        inputs = (torch.randn(1, 3, 56, 56),)
+        self._test(ReLU(with_conv=True), inputs)
diff --git a/backends/samsung/test/ops/test_reshape.py b/backends/samsung/test/ops/test_reshape.py
new file mode 100644
index 00000000000..c1ff23b969f
--- /dev/null
+++ b/backends/samsung/test/ops/test_reshape.py
@@ -0,0 +1,44 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Reshape(torch.nn.Module):
+    def __init__(self, new_shape) -> None:
+        super().__init__()
+        self.new_shape = new_shape
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x.reshape(*self.new_shape)
+
+
+class TestReshape(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_view_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_reshape(self):
+        inputs = (torch.randn(1, 16, 2, 8),)
+        self._test(Reshape(new_shape=[1, 32, 8]), inputs)
\ No newline at end of file
diff --git a/backends/samsung/test/ops/test_rsqrt.py b/backends/samsung/test/ops/test_rsqrt.py
new file mode 100644
index 00000000000..a76c66c16a9
--- /dev/null
+++ b/backends/samsung/test/ops/test_rsqrt.py
@@ -0,0 +1,44 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Rsqrt(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.rsqrt(x)
+
+
+class TestRsqrt(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.rsqrt.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_rsqrt_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_rsqrt(self):
+        inputs = (torch.randn(16, 8).abs().add(1e-6), )
+        self._test(Rsqrt(), inputs)
diff --git a/backends/samsung/test/ops/test_select.py b/backends/samsung/test/ops/test_select.py
new file mode 100644
index 00000000000..c938532fae6
--- /dev/null
+++ b/backends/samsung/test/ops/test_select.py
@@ -0,0 +1,51 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+
+class SelectCopy(torch.nn.Module):
+    def __init__(self, axis, index) -> None:
+        super().__init__()
+        self.axis = axis
+        self.index = index
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.select(x, self.axis, self.index)
+
+
+class TestSelectCopy(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.select.int": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_select_copy_int"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_select_on_axis1(self):
+        inputs = (torch.randn([1, 4, 16, 16]),)
+        self._test(SelectCopy(axis=1, index=0), inputs)
+
+    def test_fp32_concat_on_axis3(self):
+        inputs = (torch.randn([1, 4, 16, 16]),)
+        self._test(SelectCopy(axis=3, index=6), inputs)
\ No newline at end of file
diff --git a/backends/samsung/test/ops/test_slice_copy.py b/backends/samsung/test/ops/test_slice_copy.py
new file mode 100644
index 00000000000..5c3f527f556
--- /dev/null
+++ b/backends/samsung/test/ops/test_slice_copy.py
@@ -0,0 +1,44 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class SliceCopy(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x[:, :8, :, :8]
+
+
+class TestSliceCopy(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.slice.Tensor": 2})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_slice_copy(self):
+        inputs = (torch.randn(1, 16, 16, 16),)
+        self._test(SliceCopy(), inputs)
diff --git a/backends/samsung/test/ops/test_softmax.py b/backends/samsung/test/ops/test_softmax.py
new file mode 100644
index 00000000000..26368dc3960
--- /dev/null
+++ b/backends/samsung/test/ops/test_softmax.py
@@ -0,0 +1,46 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+
+class Softmax(torch.nn.Module):
+    def __init__(self, dim=0) -> None:
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.softmax(x, dim=self.dim)
+
+
+class TestSoftmax(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten__softmax_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+                .run_method_and_compare_outputs()
+        )
+
+    def test_fp32_softmax(self):
+        inputs = (torch.randn(1, 16, 8, 8),)
+        self._test(Softmax(dim=1), inputs)
diff --git a/backends/samsung/test/ops/test_sqrt.py b/backends/samsung/test/ops/test_sqrt.py
new file mode 100644
index 00000000000..68958a5d1ee
--- /dev/null
+++ b/backends/samsung/test/ops/test_sqrt.py
@@ -0,0 +1,45 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+
+class Sqrt(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.sqrt(x)
+
+
+class TestSqrt(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.sqrt.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_sqrt_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_sqrt(self):
+        inputs = (torch.randn(16, 8).abs(),)
+        self._test(Sqrt(), inputs)
diff --git a/backends/samsung/test/ops/test_squeeze.py b/backends/samsung/test/ops/test_squeeze.py
new file mode 100644
index 00000000000..cd0aa526877
--- /dev/null
+++ b/backends/samsung/test/ops/test_squeeze.py
@@ -0,0 +1,47 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+
+class Squeeze(torch.nn.Module):
+    def __init__(self, dims) -> None:
+        super().__init__()
+        self.dims = dims
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.squeeze(x, self.dims)
+
+
+class TestSqueeze(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.squeeze.dims": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_squeeze_dims"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_squeeze(self):
+        inputs = (torch.randn(1, 2, 1, 3, 1),)
+        self._test(Squeeze(dims=[2, 4]), inputs)
diff --git a/backends/samsung/test/ops/test_sub.py b/backends/samsung/test/ops/test_sub.py
new file mode 100644
index 00000000000..1ee73c9dbbf
--- /dev/null
+++ b/backends/samsung/test/ops/test_sub.py
@@ -0,0 +1,62 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class Sub(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x - y
+
+
+class SubConstant(torch.nn.Module):
+    def __init__(self, constant) -> None:
+        super().__init__()
+        self.constant = constant
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x - self.constant
+
+
+class TestSub(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.sub.Tensor": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_sub_Tensor"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_simple_sub(self):
+        inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8))
+        self._test(Sub(), inputs)
+
+    def test_fp32_const_sub(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test(SubConstant(torch.randn(1, 3, 8, 8)), inputs)
+
+    def test_fp32_sub_broadcast(self):
+        inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8))
+        self._test(Sub(), inputs)
diff --git a/backends/samsung/test/ops/test_to_copy.py b/backends/samsung/test/ops/test_to_copy.py
new file mode 100644
index 00000000000..954b4716f44
--- /dev/null
+++ b/backends/samsung/test/ops/test_to_copy.py
@@ -0,0 +1,48 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class ToCopy(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x.to(torch.float16)
+
+
+class TestToCopy(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .to_edge_transform_and_lower()
+                .check_not(
+                [
+                    "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default"
+                ]
+            )
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_to_copy(self):
+        inputs = (torch.randn(1, 3, 56, 56),)
+        self._test(ToCopy(), inputs)
diff --git a/backends/samsung/test/ops/test_unsqueeze.py b/backends/samsung/test/ops/test_unsqueeze.py
new file mode 100644
index 00000000000..a7954fb18c7
--- /dev/null
+++ b/backends/samsung/test/ops/test_unsqueeze.py
@@ -0,0 +1,50 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class UnSqueeze(torch.nn.Module):
+    def __init__(self, axis) -> None:
+        super().__init__()
+        self.axis = axis
+
+    def get_example_inputs(self) -> tuple[torch.Tensor]:
+        input_1 = torch.randn(2, 3, 1, 4)  # input should be positive
+        return (input_1,)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.unsqueeze(x, dim=self.axis)
+
+
+class TestSqueeze(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.unsqueeze.default": 1})
+                .to_edge_transform_and_lower()
+                .check_not(["executorch_exir_dialects_edge__ops_aten_unsqueeze_default"])
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_unsqueeze(self):
+        inputs = (torch.randn(2, 3, 1, 4),)
+        self._test(UnSqueeze(axis=1), inputs)
diff --git a/backends/samsung/test/ops/test_upsample_bilinear2d.py b/backends/samsung/test/ops/test_upsample_bilinear2d.py
new file mode 100644
index 00000000000..e9c181cca38
--- /dev/null
+++ b/backends/samsung/test/ops/test_upsample_bilinear2d.py
@@ -0,0 +1,53 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+class UpsampleBilinear2d(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        output_shape = [32, 32]
+        return torch.nn.functional.interpolate(
+            x,
+            size=output_shape,
+            mode="bilinear",
+            align_corners=False,
+        )
+
+
+class TestUpsampleBilinear2d(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.upsample_bilinear2d.vec": 1})
+                .to_edge_transform_and_lower()
+                .check_not(
+                ["executorch_exir_dialects_edge__ops_aten_upsample_bilinear2d_vec"]
+            )
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_upsample_bilinear2d(self):
+        inputs = (torch.randn(1, 16, 16, 16),)
+        self._test(UpsampleBilinear2d(), inputs)
diff --git a/backends/samsung/test/ops/test_upsample_nearest2d.py b/backends/samsung/test/ops/test_upsample_nearest2d.py
new file mode 100644
index 00000000000..eb7243045c1
--- /dev/null
+++ b/backends/samsung/test/ops/test_upsample_nearest2d.py
@@ -0,0 +1,53 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+import unittest
+
+import torch
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+
+
+
+class UpsampleNearest2d(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        output_shape = [32, 32]
+        return torch.nn.functional.interpolate(
+            x,
+            size=output_shape,
+            mode="nearest",
+        )
+
+
+class TestUpsampleNearest2d(unittest.TestCase):
+    def _test(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec("E9955")],
+        )
+        (
+            tester.export()
+                .check_count({"torch.ops.aten.upsample_nearest2d.vec": 1})
+                .to_edge_transform_and_lower()
+                .check_not(
+                ["executorch_exir_dialects_edge__ops_aten_upsample_nearest2d_vec"]
+            )
+                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+                .to_executorch()
+        )
+
+    def test_fp32_upsample_nearest2d(self):
+        inputs = (torch.randn(1, 4, 16, 16),)
+        self._test(UpsampleNearest2d(), inputs)
diff --git a/backends/samsung/test/tester/samsung_tester.py b/backends/samsung/test/tester/samsung_tester.py
index b750fb26a96..1a595d5d77a 100644
--- a/backends/samsung/test/tester/samsung_tester.py
+++ b/backends/samsung/test/tester/samsung_tester.py
@@ -9,6 +9,7 @@
 import executorch.backends.test.harness.stages as BaseStages
 import torch
 from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner
+from executorch.backends.samsung.utils.export_utils import get_edge_compile_config
 
 from executorch.backends.test.harness import Tester as TesterBase
 from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
@@ -33,9 +34,7 @@ def __init__(
     ):
         compile_specs = compile_specs or []
         self.partitioners = [EnnPartitioner(compile_specs=compile_specs)]
-        self.edge_compile_config = edge_compile_config or EdgeCompileConfig(
-            _skip_dim_order=True, _check_ir_validity=False
-        )
+        self.edge_compile_config = edge_compile_config or get_edge_compile_config()
         self.edge_dialect_program = None
 
     def run(

From 09d10d6e9b7c78352dac68359e3bd865c03808c5 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Tue, 9 Sep 2025 20:07:28 +0800
Subject: [PATCH 10/15] Add model test

add more model tests to test workflow. Like mv3, dl3, vit, etc.
---
 .github/workflows/pull.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 7a1d0222231..67d1c049efc 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -892,7 +892,7 @@ jobs:
         source .ci/scripts/setup-samsung-linux-deps.sh
 
         # Test models serially
-        models="mv2 ic3 resnet18 resnet50"
+        models="mv2 ic3 resnet18 resnet50 mv3 ic4 dl3 edsr vit w2l"
         for model in $models; do
           python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
         done

From 49e54c6c6516a8629d279c6463141c5292dd7cad Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Wed, 10 Sep 2025 10:11:54 +0000
Subject: [PATCH 11/15] fix lint errors

Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 backends/samsung/test/ops/test_batch_norm.py   | 10 ++++++----
 backends/samsung/test/ops/test_bmm.py          | 16 +++++++++-------
 backends/samsung/test/ops/test_cat.py          | 12 ++++++------
 backends/samsung/test/ops/test_clamp.py        | 13 ++++++-------
 .../samsung/test/ops/test_constant_pad_nd.py   | 12 +++++++-----
 backends/samsung/test/ops/test_div.py          | 11 ++++++-----
 backends/samsung/test/ops/test_embedding.py    | 14 ++++++++------
 backends/samsung/test/ops/test_expand_copy.py  | 13 +++++++------
 backends/samsung/test/ops/test_gelu.py         | 13 +++++++------
 backends/samsung/test/ops/test_leaky_relu.py   | 13 +++++++------
 backends/samsung/test/ops/test_linear.py       | 18 +++++++++---------
 backends/samsung/test/ops/test_log_softmax.py  | 11 ++++++-----
 backends/samsung/test/ops/test_mean_dim.py     | 16 ++++++++--------
 backends/samsung/test/ops/test_minimum.py      | 13 +++++++------
 backends/samsung/test/ops/test_permute.py      | 13 +++++++------
 .../samsung/test/ops/test_pixel_shuffle.py     | 10 +++++-----
 backends/samsung/test/ops/test_relu.py         | 15 +++++++--------
 backends/samsung/test/ops/test_reshape.py      | 13 +++++++------
 backends/samsung/test/ops/test_rsqrt.py        | 15 ++++++++-------
 backends/samsung/test/ops/test_select.py       | 16 ++++++++--------
 backends/samsung/test/ops/test_slice_copy.py   | 13 +++++++------
 backends/samsung/test/ops/test_softmax.py      | 14 +++++++-------
 backends/samsung/test/ops/test_sqrt.py         | 14 +++++++-------
 backends/samsung/test/ops/test_squeeze.py      | 11 +++++------
 backends/samsung/test/ops/test_sub.py          | 10 +++++-----
 backends/samsung/test/ops/test_to_copy.py      |  8 ++++----
 backends/samsung/test/ops/test_unsqueeze.py    | 10 +++++-----
 .../test/ops/test_upsample_bilinear2d.py       | 10 +++++-----
 .../test/ops/test_upsample_nearest2d.py        | 11 +++++------
 29 files changed, 191 insertions(+), 177 deletions(-)

diff --git a/backends/samsung/test/ops/test_batch_norm.py b/backends/samsung/test/ops/test_batch_norm.py
index 258205f62a6..7cb9db0e47c 100644
--- a/backends/samsung/test/ops/test_batch_norm.py
+++ b/backends/samsung/test/ops/test_batch_norm.py
@@ -30,14 +30,16 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 class TestBatchNorm(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
-        tester = SamsungTester(module, inputs, [gen_samsung_backend_compile_spec("E9955")])
+        tester = SamsungTester(
+            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+        )
         (
             tester.export()
             .to_edge_transform_and_lower()
             .check_not(
-            [
-                "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default"
-            ]
+                [
+                    "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default"
+                ]
             )
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/samsung/test/ops/test_bmm.py b/backends/samsung/test/ops/test_bmm.py
index 8d04f83c79a..91514a97968 100644
--- a/backends/samsung/test/ops/test_bmm.py
+++ b/backends/samsung/test/ops/test_bmm.py
@@ -15,6 +15,7 @@
 )
 from executorch.backends.samsung.test.tester import SamsungTester
 
+
 class BatchMatmul(torch.nn.Module):
     def __init__(self) -> None:
         super().__init__()
@@ -28,18 +29,19 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         return torch.bmm(x, y)
 
 
-
 class TestBatchMatmul(unittest.TestCase):
     def _test(self, module: torch.nn.Module):
         inputs = module.get_example_inputs()
-        tester = SamsungTester(module, inputs, [gen_samsung_backend_compile_spec("E9955")])
+        tester = SamsungTester(
+            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+        )
         (
             tester.export()
-                .check_count({"torch.ops.aten.bmm.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_bmm_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.bmm.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_bmm_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     @unittest.skip("Temporarily disable it because TOT codes not well prepared.")
diff --git a/backends/samsung/test/ops/test_cat.py b/backends/samsung/test/ops/test_cat.py
index 522ae4c8586..f744f9ca882 100644
--- a/backends/samsung/test/ops/test_cat.py
+++ b/backends/samsung/test/ops/test_cat.py
@@ -28,15 +28,15 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 class TestConcat(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module,  inputs, [gen_samsung_backend_compile_spec("E9955")]
+            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.cat.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_cat_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.cat.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_cat_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_concat_on_axis1(self):
diff --git a/backends/samsung/test/ops/test_clamp.py b/backends/samsung/test/ops/test_clamp.py
index 773954863d8..00e3eb72690 100644
--- a/backends/samsung/test/ops/test_clamp.py
+++ b/backends/samsung/test/ops/test_clamp.py
@@ -26,7 +26,6 @@ def __init__(
         self.minimum = minimum
         self.maximum = maximum
 
-
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         return torch.clamp(x, self.minimum, self.maximum)
 
@@ -38,13 +37,13 @@ def _test(self, module: torch.nn.Module, inputs):
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.clamp.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.clamp.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_clamp(self):
         inputs = (torch.randn(1, 16, 8, 8),)
-        self._test(Clamp(minimum=0, maximum=2.), inputs)
+        self._test(Clamp(minimum=0, maximum=2.0), inputs)
diff --git a/backends/samsung/test/ops/test_constant_pad_nd.py b/backends/samsung/test/ops/test_constant_pad_nd.py
index c985fafb8ac..dae24abb7d7 100644
--- a/backends/samsung/test/ops/test_constant_pad_nd.py
+++ b/backends/samsung/test/ops/test_constant_pad_nd.py
@@ -27,15 +27,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 class TestConstantPadND(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
-        tester = SamsungTester(module, inputs, [gen_samsung_backend_compile_spec("E9955")])
+        tester = SamsungTester(
+            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+        )
         (
             tester.export()
-                .to_edge_transform_and_lower()
-                .check_not(
+            .to_edge_transform_and_lower()
+            .check_not(
                 ["executorch_exir_dialects_edge__ops_aten_constant_pad_nd_default"]
             )
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_constant_pad_nd(self):
diff --git a/backends/samsung/test/ops/test_div.py b/backends/samsung/test/ops/test_div.py
index 031da0230a1..31384afd896 100644
--- a/backends/samsung/test/ops/test_div.py
+++ b/backends/samsung/test/ops/test_div.py
@@ -23,6 +23,7 @@ def __init__(self) -> None:
     def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         return x / y
 
+
 class TestDiv(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
@@ -32,11 +33,11 @@ def _test(self, module: torch.nn.Module, inputs):
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.div.Tensor": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.div.Tensor": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_simple_div(self):
diff --git a/backends/samsung/test/ops/test_embedding.py b/backends/samsung/test/ops/test_embedding.py
index d45797bf8a8..fb6aaaf7766 100644
--- a/backends/samsung/test/ops/test_embedding.py
+++ b/backends/samsung/test/ops/test_embedding.py
@@ -27,14 +27,16 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 class TestEmbedding(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
-        tester = SamsungTester(module, inputs, [gen_samsung_backend_compile_spec("E9955")])
+        tester = SamsungTester(
+            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+        )
         (
             tester.export()
-                .check_count({"torch.ops.aten.embedding.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_embedding_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.embedding.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_embedding_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_embedding(self):
diff --git a/backends/samsung/test/ops/test_expand_copy.py b/backends/samsung/test/ops/test_expand_copy.py
index 5781904e5d7..47df38be8e2 100644
--- a/backends/samsung/test/ops/test_expand_copy.py
+++ b/backends/samsung/test/ops/test_expand_copy.py
@@ -27,16 +27,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestExpand(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.expand.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_expand_copy_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.expand.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_expand_copy_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_expand_copy(self):
diff --git a/backends/samsung/test/ops/test_gelu.py b/backends/samsung/test/ops/test_gelu.py
index 41994728bd6..4e6f2d971ab 100644
--- a/backends/samsung/test/ops/test_gelu.py
+++ b/backends/samsung/test/ops/test_gelu.py
@@ -42,16 +42,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestGELU(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.gelu.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_gelu_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.gelu.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_gelu_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_single_gelu(self):
diff --git a/backends/samsung/test/ops/test_leaky_relu.py b/backends/samsung/test/ops/test_leaky_relu.py
index 31f0ae7167e..0af6ea0da90 100644
--- a/backends/samsung/test/ops/test_leaky_relu.py
+++ b/backends/samsung/test/ops/test_leaky_relu.py
@@ -28,16 +28,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestLeakyReLU(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.leaky_relu.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_leaky_relu_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.leaky_relu.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_leaky_relu_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_leaky_relu(self):
diff --git a/backends/samsung/test/ops/test_linear.py b/backends/samsung/test/ops/test_linear.py
index 15ac3e75224..f327464fc0c 100644
--- a/backends/samsung/test/ops/test_linear.py
+++ b/backends/samsung/test/ops/test_linear.py
@@ -25,24 +25,24 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return self.module(x)
 
 
-
 class TestLinear(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.linear.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_linear_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
-                .run_method_and_compare_outputs()
+            .check_count({"torch.ops.aten.linear.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_linear_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs()
         )
 
     def test_fp32_linear(self):
         in_num_features = 24
         inputs = (torch.randn(128, in_num_features),)
-        self._test(Linear(in_num_features), inputs)
\ No newline at end of file
+        self._test(Linear(in_num_features), inputs)
diff --git a/backends/samsung/test/ops/test_log_softmax.py b/backends/samsung/test/ops/test_log_softmax.py
index 0a6c46d2448..2e2b3ff0604 100644
--- a/backends/samsung/test/ops/test_log_softmax.py
+++ b/backends/samsung/test/ops/test_log_softmax.py
@@ -28,15 +28,16 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestLogSoftmax(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten__log_softmax_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten__log_softmax_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_log_softmax(self):
diff --git a/backends/samsung/test/ops/test_mean_dim.py b/backends/samsung/test/ops/test_mean_dim.py
index e3c6ff64c25..113e26c45b2 100644
--- a/backends/samsung/test/ops/test_mean_dim.py
+++ b/backends/samsung/test/ops/test_mean_dim.py
@@ -16,7 +16,6 @@
 from executorch.backends.samsung.test.tester import SamsungTester
 
 
-
 class MeanDim(torch.nn.Module):
     def __init__(self, keep_dims=True) -> None:
         super().__init__()
@@ -29,16 +28,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestMeanDim(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.mean.dim": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_mean_dim"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.mean.dim": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_mean_dim"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_mean_with_keep_dims(self):
@@ -47,4 +47,4 @@ def test_fp32_mean_with_keep_dims(self):
 
     def test_fp32_mean_without_keep_dims(self):
         inputs = (torch.randn(1, 3, 8, 8),)
-        self._test(MeanDim(keep_dims=False), inputs)
\ No newline at end of file
+        self._test(MeanDim(keep_dims=False), inputs)
diff --git a/backends/samsung/test/ops/test_minimum.py b/backends/samsung/test/ops/test_minimum.py
index 4de45a2da67..de275cc4d46 100644
--- a/backends/samsung/test/ops/test_minimum.py
+++ b/backends/samsung/test/ops/test_minimum.py
@@ -27,17 +27,18 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 class TestMinimum(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_minimum_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_minimum_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_minimum(self):
         inputs = (torch.randn(1, 8, 16, 16), torch.randn(1, 8, 16, 16))
-        self._test(Minimum(), inputs)
\ No newline at end of file
+        self._test(Minimum(), inputs)
diff --git a/backends/samsung/test/ops/test_permute.py b/backends/samsung/test/ops/test_permute.py
index 5f7e7e7185c..3889c803e85 100644
--- a/backends/samsung/test/ops/test_permute.py
+++ b/backends/samsung/test/ops/test_permute.py
@@ -28,16 +28,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestPermute(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.permute.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.permute.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_permute_0231(self):
diff --git a/backends/samsung/test/ops/test_pixel_shuffle.py b/backends/samsung/test/ops/test_pixel_shuffle.py
index 42afbe38870..bc7a53ff592 100644
--- a/backends/samsung/test/ops/test_pixel_shuffle.py
+++ b/backends/samsung/test/ops/test_pixel_shuffle.py
@@ -16,7 +16,6 @@
 from executorch.backends.samsung.test.tester import SamsungTester
 
 
-
 class PixelShuffle(torch.nn.Module):
     def __init__(self) -> None:
         super().__init__()
@@ -29,14 +28,15 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestPixelShuffle(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.pixel_shuffle.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(
+            .check_count({"torch.ops.aten.pixel_shuffle.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(
                 ["executorch_exir_dialects_edge__ops_aten_pixel_shuffle_default"]
             )
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
diff --git a/backends/samsung/test/ops/test_relu.py b/backends/samsung/test/ops/test_relu.py
index 1abaacadb92..386827109f0 100644
--- a/backends/samsung/test/ops/test_relu.py
+++ b/backends/samsung/test/ops/test_relu.py
@@ -35,7 +35,6 @@ def __init__(self, with_conv=False) -> None:
             else torch.nn.ReLU()
         )
 
-
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         return self.module(x)
 
@@ -43,19 +42,19 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestReLU(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.relu.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.relu.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
-
     def test_fp32_single_relu(self):
         inputs = (torch.randn(1, 3, 56, 56),)
         self._test(ReLU(with_conv=False), inputs)
diff --git a/backends/samsung/test/ops/test_reshape.py b/backends/samsung/test/ops/test_reshape.py
index c1ff23b969f..8c89d946361 100644
--- a/backends/samsung/test/ops/test_reshape.py
+++ b/backends/samsung/test/ops/test_reshape.py
@@ -28,17 +28,18 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestReshape(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_view_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_view_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_reshape(self):
         inputs = (torch.randn(1, 16, 2, 8),)
-        self._test(Reshape(new_shape=[1, 32, 8]), inputs)
\ No newline at end of file
+        self._test(Reshape(new_shape=[1, 32, 8]), inputs)
diff --git a/backends/samsung/test/ops/test_rsqrt.py b/backends/samsung/test/ops/test_rsqrt.py
index a76c66c16a9..4bf302c867f 100644
--- a/backends/samsung/test/ops/test_rsqrt.py
+++ b/backends/samsung/test/ops/test_rsqrt.py
@@ -27,18 +27,19 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestRsqrt(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.rsqrt.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_rsqrt_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.rsqrt.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_rsqrt_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_rsqrt(self):
-        inputs = (torch.randn(16, 8).abs().add(1e-6), )
+        inputs = (torch.randn(16, 8).abs().add(1e-6),)
         self._test(Rsqrt(), inputs)
diff --git a/backends/samsung/test/ops/test_select.py b/backends/samsung/test/ops/test_select.py
index c938532fae6..dcb0667d036 100644
--- a/backends/samsung/test/ops/test_select.py
+++ b/backends/samsung/test/ops/test_select.py
@@ -16,7 +16,6 @@
 from executorch.backends.samsung.test.tester import SamsungTester
 
 
-
 class SelectCopy(torch.nn.Module):
     def __init__(self, axis, index) -> None:
         super().__init__()
@@ -30,16 +29,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestSelectCopy(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.select.int": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_select_copy_int"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.select.int": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_select_copy_int"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_select_on_axis1(self):
@@ -48,4 +48,4 @@ def test_fp32_select_on_axis1(self):
 
     def test_fp32_concat_on_axis3(self):
         inputs = (torch.randn([1, 4, 16, 16]),)
-        self._test(SelectCopy(axis=3, index=6), inputs)
\ No newline at end of file
+        self._test(SelectCopy(axis=3, index=6), inputs)
diff --git a/backends/samsung/test/ops/test_slice_copy.py b/backends/samsung/test/ops/test_slice_copy.py
index 5c3f527f556..f31410b8a41 100644
--- a/backends/samsung/test/ops/test_slice_copy.py
+++ b/backends/samsung/test/ops/test_slice_copy.py
@@ -27,16 +27,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestSliceCopy(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.slice.Tensor": 2})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.slice.Tensor": 2})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_slice_copy(self):
diff --git a/backends/samsung/test/ops/test_softmax.py b/backends/samsung/test/ops/test_softmax.py
index 26368dc3960..a4c2f36acfc 100644
--- a/backends/samsung/test/ops/test_softmax.py
+++ b/backends/samsung/test/ops/test_softmax.py
@@ -16,7 +16,6 @@
 from executorch.backends.samsung.test.tester import SamsungTester
 
 
-
 class Softmax(torch.nn.Module):
     def __init__(self, dim=0) -> None:
         super().__init__()
@@ -29,16 +28,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestSoftmax(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten__softmax_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
-                .run_method_and_compare_outputs()
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten__softmax_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs()
         )
 
     def test_fp32_softmax(self):
diff --git a/backends/samsung/test/ops/test_sqrt.py b/backends/samsung/test/ops/test_sqrt.py
index 68958a5d1ee..e1a084c3611 100644
--- a/backends/samsung/test/ops/test_sqrt.py
+++ b/backends/samsung/test/ops/test_sqrt.py
@@ -16,7 +16,6 @@
 from executorch.backends.samsung.test.tester import SamsungTester
 
 
-
 class Sqrt(torch.nn.Module):
     def __init__(self) -> None:
         super().__init__()
@@ -28,16 +27,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestSqrt(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs,
+            module,
+            inputs,
             [gen_samsung_backend_compile_spec("E9955")],
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.sqrt.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_sqrt_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.sqrt.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_sqrt_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_sqrt(self):
diff --git a/backends/samsung/test/ops/test_squeeze.py b/backends/samsung/test/ops/test_squeeze.py
index cd0aa526877..ab93758f203 100644
--- a/backends/samsung/test/ops/test_squeeze.py
+++ b/backends/samsung/test/ops/test_squeeze.py
@@ -16,7 +16,6 @@
 from executorch.backends.samsung.test.tester import SamsungTester
 
 
-
 class Squeeze(torch.nn.Module):
     def __init__(self, dims) -> None:
         super().__init__()
@@ -35,11 +34,11 @@ def _test(self, module: torch.nn.Module, inputs):
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.squeeze.dims": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_squeeze_dims"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.squeeze.dims": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_squeeze_dims"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_squeeze(self):
diff --git a/backends/samsung/test/ops/test_sub.py b/backends/samsung/test/ops/test_sub.py
index 1ee73c9dbbf..5541a52c80c 100644
--- a/backends/samsung/test/ops/test_sub.py
+++ b/backends/samsung/test/ops/test_sub.py
@@ -42,11 +42,11 @@ def _test(self, module: torch.nn.Module, inputs):
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.sub.Tensor": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_sub_Tensor"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.sub.Tensor": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_sub_Tensor"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_simple_sub(self):
diff --git a/backends/samsung/test/ops/test_to_copy.py b/backends/samsung/test/ops/test_to_copy.py
index 954b4716f44..d6917c9403f 100644
--- a/backends/samsung/test/ops/test_to_copy.py
+++ b/backends/samsung/test/ops/test_to_copy.py
@@ -33,14 +33,14 @@ def _test(self, module: torch.nn.Module, inputs):
         )
         (
             tester.export()
-                .to_edge_transform_and_lower()
-                .check_not(
+            .to_edge_transform_and_lower()
+            .check_not(
                 [
                     "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default"
                 ]
             )
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_to_copy(self):
diff --git a/backends/samsung/test/ops/test_unsqueeze.py b/backends/samsung/test/ops/test_unsqueeze.py
index a7954fb18c7..543fa0bc282 100644
--- a/backends/samsung/test/ops/test_unsqueeze.py
+++ b/backends/samsung/test/ops/test_unsqueeze.py
@@ -38,11 +38,11 @@ def _test(self, module: torch.nn.Module, inputs):
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.unsqueeze.default": 1})
-                .to_edge_transform_and_lower()
-                .check_not(["executorch_exir_dialects_edge__ops_aten_unsqueeze_default"])
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.aten.unsqueeze.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_unsqueeze_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_unsqueeze(self):
diff --git a/backends/samsung/test/ops/test_upsample_bilinear2d.py b/backends/samsung/test/ops/test_upsample_bilinear2d.py
index e9c181cca38..37dcb28df83 100644
--- a/backends/samsung/test/ops/test_upsample_bilinear2d.py
+++ b/backends/samsung/test/ops/test_upsample_bilinear2d.py
@@ -39,13 +39,13 @@ def _test(self, module: torch.nn.Module, inputs):
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.upsample_bilinear2d.vec": 1})
-                .to_edge_transform_and_lower()
-                .check_not(
+            .check_count({"torch.ops.aten.upsample_bilinear2d.vec": 1})
+            .to_edge_transform_and_lower()
+            .check_not(
                 ["executorch_exir_dialects_edge__ops_aten_upsample_bilinear2d_vec"]
             )
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_upsample_bilinear2d(self):
diff --git a/backends/samsung/test/ops/test_upsample_nearest2d.py b/backends/samsung/test/ops/test_upsample_nearest2d.py
index eb7243045c1..e027ab23337 100644
--- a/backends/samsung/test/ops/test_upsample_nearest2d.py
+++ b/backends/samsung/test/ops/test_upsample_nearest2d.py
@@ -16,7 +16,6 @@
 from executorch.backends.samsung.test.tester import SamsungTester
 
 
-
 class UpsampleNearest2d(torch.nn.Module):
     def __init__(self) -> None:
         super().__init__()
@@ -39,13 +38,13 @@ def _test(self, module: torch.nn.Module, inputs):
         )
         (
             tester.export()
-                .check_count({"torch.ops.aten.upsample_nearest2d.vec": 1})
-                .to_edge_transform_and_lower()
-                .check_not(
+            .check_count({"torch.ops.aten.upsample_nearest2d.vec": 1})
+            .to_edge_transform_and_lower()
+            .check_not(
                 ["executorch_exir_dialects_edge__ops_aten_upsample_nearest2d_vec"]
             )
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
         )
 
     def test_fp32_upsample_nearest2d(self):

From 5f1734e9072627ecfefbd49ee244e9103eda0d50 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Wed, 10 Sep 2025 10:13:33 +0000
Subject: [PATCH 12/15] enable bmm op test

Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 backends/samsung/test/ops/test_bmm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/backends/samsung/test/ops/test_bmm.py b/backends/samsung/test/ops/test_bmm.py
index 91514a97968..7712a50a0c9 100644
--- a/backends/samsung/test/ops/test_bmm.py
+++ b/backends/samsung/test/ops/test_bmm.py
@@ -44,6 +44,5 @@ def _test(self, module: torch.nn.Module):
             .to_executorch()
         )
 
-    @unittest.skip("Temporarily disable it because TOT codes not well prepared.")
     def test_fp32_bmm(self):
         self._test(BatchMatmul())

From 4ae638bce827a60c9aba0d0b757c0587c165b249 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Wed, 10 Sep 2025 11:46:12 +0000
Subject: [PATCH 13/15] fix lint issue for url-issue

Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 examples/samsung/scripts/mobilebert_finetune.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/samsung/scripts/mobilebert_finetune.py b/examples/samsung/scripts/mobilebert_finetune.py
index 78653142f0a..6df77f7c8d7 100644
--- a/examples/samsung/scripts/mobilebert_finetune.py
+++ b/examples/samsung/scripts/mobilebert_finetune.py
@@ -119,8 +119,7 @@ def get_finetune_mobilebert(self, artifacts_dir):
 
         # sentiment classification
         train_url = (
-            "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/"
-            "refs/heads/master/data/SST2/train.tsv"
+            "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/refs/heads/master/data/SST2/train.tsv"
         )
         content = requests.get(train_url, allow_redirects=True).content
         train_data = pd.read_csv(
@@ -133,8 +132,7 @@ def get_finetune_mobilebert(self, artifacts_dir):
         )
 
         val_url = (
-            "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/"
-            "refs/heads/master/data/SST2/test.tsv"
+            "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/refs/heads/master/data/SST2/test.tsv"
         )
         content = requests.get(val_url, allow_redirects=True).content
         val_data = pd.read_csv(
@@ -224,7 +222,7 @@ def validate(self, model, val_data_loader):
         "-c",
         "--chipset",
         required=True,
-        help="Samsung chipset, i.e. E9945, E9955, etc",
+        help="Samsung chipset, i.e. E9955, etc",
         type=str,
     )
     parser.add_argument(

From 20780f2a23050ac89eeef4665986a913f69b4b4d Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Wed, 10 Sep 2025 22:40:04 +0000
Subject: [PATCH 14/15] fix lint error

Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 examples/samsung/scripts/mobilebert_finetune.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/examples/samsung/scripts/mobilebert_finetune.py b/examples/samsung/scripts/mobilebert_finetune.py
index 6df77f7c8d7..e0c11df246a 100644
--- a/examples/samsung/scripts/mobilebert_finetune.py
+++ b/examples/samsung/scripts/mobilebert_finetune.py
@@ -118,9 +118,7 @@ def get_finetune_mobilebert(self, artifacts_dir):
         from transformers import get_linear_schedule_with_warmup
 
         # sentiment classification
-        train_url = (
-            "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/refs/heads/master/data/SST2/train.tsv"
-        )
+        train_url = "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/refs/heads/master/data/SST2/train.tsv"
         content = requests.get(train_url, allow_redirects=True).content
         train_data = pd.read_csv(
             BytesIO(content), delimiter="\t", header=None, names=["text", "label"]
@@ -131,9 +129,7 @@ def get_finetune_mobilebert(self, artifacts_dir):
             train_data, batch_size=64, usage="train"
         )
 
-        val_url = (
-            "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/refs/heads/master/data/SST2/test.tsv"
-        )
+        val_url = "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/refs/heads/master/data/SST2/test.tsv"
         content = requests.get(val_url, allow_redirects=True).content
         val_data = pd.read_csv(
             BytesIO(content), delimiter="\t", header=None, names=["text", "label"]

From 3437e4ceb253035c954d56b330ac52ced600e4db Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Wed, 10 Sep 2025 23:31:40 +0000
Subject: [PATCH 15/15] apply review comments

Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 examples/samsung/aot_compiler.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/samsung/aot_compiler.py b/examples/samsung/aot_compiler.py
index 210f15293bb..771d74e0b45 100644
--- a/examples/samsung/aot_compiler.py
+++ b/examples/samsung/aot_compiler.py
@@ -74,8 +74,6 @@
     model = model.eval()
     outputs = model(*example_inputs)
 
-    print("start start ...")
-
     compile_specs = [gen_samsung_backend_compile_spec(args.chipset)]
     edge = to_edge_transform_and_lower_to_enn(
         model, example_inputs, compile_specs=compile_specs