Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions .ci/scripts/setup-samsung-linux-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,6 @@ install_enn_backend() {
rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
ANDROID_NDK_VERSION=r27b

pushd .
cd /tmp
curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"

# Print the content for manual verification
ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
popd
# build Exynos backend
export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
bash backends/samsung/build.sh --build all
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ jobs:
contents: read
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-gcc9
docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
Expand All @@ -892,7 +892,7 @@ jobs:
source .ci/scripts/setup-samsung-linux-deps.sh

# Test models serially
models="mv2 ic3 resnet18 resnet50"
models="mv2 ic3 resnet18 resnet50 mv3 ic4 dl3 edsr vit w2l"
for model in $models; do
python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
done
Expand Down
88 changes: 88 additions & 0 deletions backends/samsung/_passes/conv1d_to_conv2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (c) 2025 Samsung Electronics Co. LTD
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import torch
from executorch.exir import ExportedProgram
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
from torch._export.utils import get_param


class Conv1dToConv2d(ExportPass):

def __init__(self, edge_program: ExportedProgram):
super().__init__()
self.edge_program = edge_program

def call(self, graph_module: torch.fx.GraphModule):
graph = graph_module.graph
node_list = list(graph.nodes)
for node in node_list:
if node.op == "call_function":
if node.target == exir_ops.edge.aten.convolution.default:
stride = list(node.args[3])
if len(stride) != 1:
continue

# convert 3dim weight to 4dim
weight_node = node.args[1]
weight_3dim = get_param(self.edge_program, weight_node)
weight_4dim = torch.nn.Parameter(
data=weight_3dim.data.contiguous().unsqueeze(dim=-1),
requires_grad=False,
)
parameter_name = (
self.edge_program.graph_signature.inputs_to_parameters[
weight_node.name
]
)
self.edge_program.state_dict[parameter_name] = weight_4dim
weight_node.meta["val"] = weight_node.meta["val"].data.unsqueeze(
dim=-1
)

# Extend stride, padding, and dilation
node.args = (
node.args[0],
node.args[1],
node.args[2],
node.args[3] + [1], # stride
node.args[4] + [0], # padding
node.args[5] + [1], # dilation
node.args[6],
node.args[7],
node.args[8],
)

# unsqueeze -> conv2d -> squeeze
with graph.inserting_before(node):
input_node = node.args[0]
unsqueeze_before = graph.create_node(
"call_function", exir_ops.edge.aten.unsqueeze_copy.default
)
unsqueeze_before.args = (
input_node,
-1,
)
node.replace_input_with(input_node, unsqueeze_before)

with graph.inserting_after(node):
squeeze_after = graph.create_node(
"call_function", exir_ops.edge.aten.squeeze_copy.dims
)
squeeze_after.args = (
node,
[-1],
)
original_users = [
user for user in node.users if user != squeeze_after
]
for user in original_users:
user.replace_input_with(node, squeeze_after)

graph_module.recompile()
graph_module = super().call(graph_module).graph_module
return PassResult(graph_module, True)
40 changes: 40 additions & 0 deletions backends/samsung/_passes/customized_constant_prop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) 2025 Samsung Electronics Co. LTD
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import executorch.exir.passes.constant_prop_pass as constant_prop_module
from executorch.exir import ExportedProgram
from executorch.exir.pass_base import ExportPass, PassResult
from executorch.exir.passes.constant_prop_pass import constant_prop_pass
from torch.fx import GraphModule


class _constant_prop_context:
def __init__(self):
self.backup = constant_prop_module._DEFAULT_SKIP_TARGETS

def __enter__(self):
constant_prop_module._DEFAULT_SKIP_TARGETS = (
constant_prop_module._DEFAULT_SKIP_TARGETS_NO_QUANT
)

def __exit__(self, exc_type, exc_val, exc_tb):
constant_prop_module._DEFAULT_SKIP_TARGETS = self.backup


class ConstantPropPass(ExportPass):
"""
Official constant_prop_pass will not fold Q-DQ
But we need to fold quantized constant tensor as well as non-quantized one
"""

def __init__(self, edge_program: ExportedProgram):
super().__init__()
self.edge_program = edge_program

def call(self, graph_module: GraphModule):
with _constant_prop_context():
_ = constant_prop_pass(self.edge_program)
return PassResult(graph_module, True)
46 changes: 46 additions & 0 deletions backends/samsung/_passes/replace_scalar_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) 2025 Samsung Electronics Co. LTD
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from typing import Dict, Tuple

import torch
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass
from torch._export.pass_base import Argument
from torch._export.pass_infra.node_metadata import NodeMetadata
from torch._export.pass_infra.proxy_value import ProxyValue


class ReplaceOpsWithScalar(ExportPass):
# Replace binary ops with scalar into binary ops with tensor.
# Ops list below.
_ops_with_scalar = {
exir_ops.edge.aten.add.Scalar: exir_ops.edge.aten.add.Tensor,
exir_ops.edge.aten.sub.Scalar: exir_ops.edge.aten.sub.Tensor,
exir_ops.edge.aten.div.Scalar: exir_ops.edge.aten.div.Tensor,
exir_ops.edge.aten.mul.Scalar: exir_ops.edge.aten.mul.Tensor,
exir_ops.edge.aten.pow.Tensor_Scalar: exir_ops.edge.aten.pow.Tensor_Tensor,
}

def __init__(self):
super(ReplaceOpsWithScalar, self).__init__()

def call_operator(
self,
op,
args: Tuple[Argument, ...],
kwargs: Dict[str, Argument],
meta: NodeMetadata,
) -> ProxyValue:
if op not in self._ops_with_scalar:
return super().call_operator(op, args, kwargs, meta)

return super().call_operator(
op=self._ops_with_scalar.get(op, op),
args=(args[0], torch.tensor(args[1])),
kwargs=kwargs,
meta=meta,
)
44 changes: 44 additions & 0 deletions backends/samsung/builders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,83 @@
op_add,
op_avg_pool2d,
op_batch_norm,
op_bmm,
op_cat,
op_clamp,
op_constant_pad_nd,
op_conv2d,
op_div,
op_embedding,
op_expand_copy,
op_gelu,
op_getitem,
op_hardswish,
op_hardtanh,
op_layer_norm,
op_leaky_relu,
op_linear,
op_log_softmax,
op_max_pool2d,
op_maximum,
op_mean_dim,
op_minimum,
op_mul,
op_permute,
op_pixel_shuffle,
op_relu,
op_reshape,
op_rsqrt,
op_select,
op_slice_copy,
op_softmax,
op_sqrt,
op_squeeze,
op_sub,
op_to_copy,
op_unsqueeze,
op_upsample_bilinear2d,
op_upsample_nearest2d,
)

__all__ = [
node_visitor,
op_add,
op_avg_pool2d,
op_batch_norm,
op_bmm,
op_cat,
op_clamp,
op_conv2d,
op_constant_pad_nd,
op_div,
op_embedding,
op_expand_copy,
op_gelu,
op_getitem,
op_hardswish,
op_hardtanh,
op_layer_norm,
op_leaky_relu,
op_linear,
op_log_softmax,
op_max_pool2d,
op_maximum,
op_mean_dim,
op_minimum,
op_mul,
op_permute,
op_pixel_shuffle,
op_relu,
op_reshape,
op_rsqrt,
op_select,
op_slice_copy,
op_softmax,
op_sqrt,
op_squeeze,
op_sub,
op_to_copy,
op_unsqueeze,
op_upsample_bilinear2d,
op_upsample_nearest2d,
]
40 changes: 40 additions & 0 deletions backends/samsung/builders/op_bmm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) 2025 Samsung Electronics Co. LTD
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from typing import Dict

import torch
from executorch.backends.samsung.builders.node_visitor import (
NodeVisitor,
register_node_visitor,
)
from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph


@register_node_visitor
class BMMVisitor(NodeVisitor):
target = "aten.bmm.default"

def __init__(self, *args) -> None:
super().__init__(*args)

def define_node(
self,
node: torch.fx.Node,
enn_graph: EnnGraph,
vals_to_ids: Dict[torch.Tensor, int],
) -> None:
input1 = node.args[0]
input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids)
input2 = node.args[1]
input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids)

# output
output_id = self.define_tensor(node, enn_graph, vals_to_ids)

enn_graph.define_op(
node.name, "BATCH_MATMUL", [input_id_1, input_id_2], [output_id]
)
Comment on lines +30 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems like there's a lot of boilerplate in these visitor definitions. You could package up a few helper subclasses like UnaryOpVisitor, BinaryOpVisitor, etc. that get the operator name ("BATCH_MATMUL" etc.) from a class property similar to the existing target property, and then also accommodate the ones with params by having the helper subclass call self.get_params() (default implementation that returns None on the helper subclass) and pass the result to define_op if it isn't None.

56 changes: 56 additions & 0 deletions backends/samsung/builders/op_constant_pad_nd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) 2025 Samsung Electronics Co. LTD
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from typing import cast, Dict, List

import numpy as np

import torch
from executorch.backends.samsung.builders.node_visitor import (
NodeVisitor,
register_node_visitor,
)
from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
from executorch.backends.transforms import get_shape


@register_node_visitor
class ConstantPadNDVisitor(NodeVisitor):
target = "aten.constant_pad_nd.default"

def __init__(self, *args) -> None:
super().__init__(*args)
Comment on lines +24 to +25
Copy link
Contributor

@swolchok swolchok Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think __init__ definitions like this can just be omitted


def define_node(
self,
node: torch.fx.Node,
enn_graph: EnnGraph,
vals_to_ids: Dict[torch.Tensor, int],
) -> None:
input = node.args[0]
input_id = self.define_tensor(input, enn_graph, vals_to_ids)

# torch padding order starts from the last axis, change the order to fit samsung lite-core
paddings = np.reshape(cast(List[int], node.args[1]), (-1, 2))[::-1].astype(
np.uint32
)
in_shape = get_shape(input)
paddings = paddings.reshape(-1).tolist()
paddings = [0] * (2 * len(in_shape) - len(paddings)) + paddings
paddings = paddings[::2] + paddings[1::2]

padding_value = node.args[2]
assert padding_value == 0.0, "Only Support pad constant 0 now."
# output
output_id = self.define_tensor(node, enn_graph, vals_to_ids)

params = {
"explicit_padding": paddings,
"padding": "EXPLICIT",
"padding_type": "CONSTANT",
}

enn_graph.define_op(node.name, "PAD", [input_id], [output_id], params)
Loading
Loading