Skip to content

Commit 0e8c14c

Browse files
committed
Update
[ghstack-poisoned]
2 parents 84d6476 + 7bc06d1 commit 0e8c14c

38 files changed

+681
-191
lines changed

backends/arm/operator_support/to_copy_support.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool
125125
# Check dim_order (to_dim_order_copy)
126126
if "dim_order" in node.kwargs:
127127
dim_order = node.kwargs["dim_order"]
128+
# pyre-ignore[6]
128129
if dim_order != list(range(len(dim_order))):
129130
logger.info(
130131
f"Argument {dim_order=} is not supported for "

backends/cadence/aot/compiler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
ExecutorchProgramManager,
3434
to_edge,
3535
)
36+
from executorch.exir.dialects._ops import ops as exir_ops
3637
from executorch.exir.pass_base import PassResult
3738
from executorch.exir.passes import ToOutVarPass
3839
from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
@@ -186,14 +187,17 @@ def export_to_edge(
186187
edge_prog_manager = to_edge(
187188
expo_program,
188189
compile_config=EdgeCompileConfig(
189-
_skip_dim_order=True,
190190
# Allow specific non-core aten ops in the IR.
191191
_core_aten_ops_exception_list=[
192192
torch.ops.aten._native_batch_norm_legit_functional.default,
193193
torch.ops.aten.linear.default,
194194
torch.ops.aten.linalg_vector_norm.default,
195195
torch.ops.aten.unfold.default,
196196
torch.ops.aten.angle.default,
197+
# cadence replaced to_dim_order_copy with _to_copy for performance
198+
# skip _to_copy op to get around of dim order check
199+
# We should remove this op once cadence can support dim order
200+
exir_ops.edge.aten._to_copy.default,
197201
],
198202
),
199203
constant_methods=constant_methods,

backends/cadence/aot/replace_ops.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
# pyre-unsafe
1313

14+
import copy
1415
import math
1516
from operator import neg
1617
from typing import cast, Dict, Iterable, Sequence, Set, Tuple
@@ -35,7 +36,12 @@
3536
from executorch.backends.cadence.aot.utils import get_edge_overload_packet
3637
from executorch.exir.dialects._ops import ops as exir_ops
3738
from executorch.exir.dialects.edge._ops import EdgeOpOverload, EdgeOpOverloadPacket
39+
from executorch.exir.dim_order_utils import get_memory_format
3840
from executorch.exir.pass_base import ExportPass, NodeMetadata, PassResult, ProxyValue
41+
from executorch.exir.passes.dim_order_ops_registry import (
42+
DimOrderOpsMap,
43+
MemoryFormatOpsMap,
44+
)
3945
from torch._subclasses import FakeTensor
4046
from torch.fx.node import Argument
4147

@@ -1799,6 +1805,72 @@ def call_operator(
17991805
)
18001806

18011807

1808+
@register_cadence_pass(CadencePassAttribute(opt_level=0))
1809+
class ReplaceToDimOrderCopyWithToCopyPass(ExportPass):
1810+
"""
1811+
dim_order_ops::to_dim_order_copy is not supported, so this is an opt_level=0 pass.
1812+
If the dim order is sequential, we don't need the extra work with strides and
1813+
can just use to_copy.
1814+
"""
1815+
1816+
def call_operator(
1817+
self,
1818+
op,
1819+
args: Tuple[Argument, ...],
1820+
kwargs: Dict[str, Argument],
1821+
meta: NodeMetadata,
1822+
) -> ProxyValue:
1823+
if op not in DimOrderOpsMap:
1824+
return super().call_operator(op, args, kwargs, meta)
1825+
1826+
# new kwargs with dim_order, and no memory_format for the new op
1827+
nkwargs = dict(copy.deepcopy(kwargs)) # orig kwargs are immutable
1828+
1829+
ndim = None
1830+
1831+
# can always get the shape, assuming rank is specialized
1832+
1833+
# pyre-ignore[16]: `None` has no attribute `to_tensor`
1834+
if isinstance(args[0], ProxyValue) and args[0].is_tensor():
1835+
# pyre-ignore[16]: `None` has no attribute `to_tensor`
1836+
ndim = args[0].to_tensor().dim()
1837+
elif isinstance(args[0], torch.Tensor):
1838+
# pyre-ignore[16]: `None` has no attribute `dim`
1839+
ndim = args[0].dim()
1840+
elif isinstance(args[0], torch.fx.immutable_collections.immutable_list):
1841+
# pyre-ignore[6]: Incompatible parameter type
1842+
ndim = len(args[0])
1843+
else:
1844+
assert 0, f"Expecting a Tensor or a ProxyValue but got {type(args[0])}"
1845+
1846+
# get the "to" memory format for the EdgeOp
1847+
contiguous_dim_order = list(range(ndim))
1848+
dim_order = nkwargs.pop("dim_order", None)
1849+
1850+
# Cadence only supports contiguous memory format
1851+
assert (
1852+
dim_order is None
1853+
# pyre-ignore[6]: Incompatible parameter type
1854+
or len(dim_order) == 0
1855+
or dim_order == contiguous_dim_order
1856+
), "Expected dim order in congituous or prevserve memory format, but got {}".format(
1857+
dim_order
1858+
)
1859+
1860+
# bring back memory format
1861+
# pyre-ignore[6]: Incompatible parameter type
1862+
nkwargs["memory_format"] = get_memory_format(dim_order)
1863+
1864+
memory_format_op = MemoryFormatOpsMap[op]
1865+
1866+
return super().call_operator(
1867+
memory_format_op,
1868+
args,
1869+
nkwargs,
1870+
meta,
1871+
)
1872+
1873+
18021874
@register_cadence_pass(CadencePassAttribute(opt_level=0))
18031875
class ReplaceFullLikeWithFullPass(ExportPass):
18041876
"""
@@ -2108,4 +2180,5 @@ class CadenceReplaceOpsInGraph:
21082180
ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
21092181
ReplaceAtenAvgPoolWithJarvisAvgPoolPass,
21102182
ReplaceAtenLinalgVectorNormWithCadenceLinalgVectorNormPass,
2183+
ReplaceToDimOrderCopyWithToCopyPass,
21112184
]

backends/cadence/build_cadence_fusionG3.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ STEPWISE_BUILD=false
2121

2222
if $STEPWISE_BUILD; then
2323
echo "Building ExecuTorch"
24-
cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
24+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
2525
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
2626
-DCMAKE_BUILD_TYPE=Release \
2727
-DEXECUTORCH_ENABLE_EVENT_TRACER=OFF \
@@ -37,7 +37,7 @@ if $STEPWISE_BUILD; then
3737
-Bcmake-out .
3838

3939
echo "Building any Cadence-specific binaries on top"
40-
cmake -DBUCK2="$BUCK" \
40+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
4141
-DCMAKE_TOOLCHAIN_FILE=/home/zonglinpeng/ws/zonglinpeng/executorch/backends/cadence/cadence.cmake \
4242
-DCMAKE_INSTALL_PREFIX=cmake-out \
4343
-DCMAKE_BUILD_TYPE=Release \
@@ -61,7 +61,7 @@ if $STEPWISE_BUILD; then
6161
else
6262
echo "Building Cadence toolchain with ExecuTorch packages"
6363
cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
64-
cmake -DBUCK2="$BUCK" \
64+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
6565
-DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
6666
-DHAVE_SYS_STAT_H=ON \
6767
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \

backends/cadence/build_cadence_hifi4.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ STEPWISE_BUILD=false
2121

2222
if $STEPWISE_BUILD; then
2323
echo "Building ExecuTorch"
24-
cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
24+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
2525
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
2626
-DCMAKE_BUILD_TYPE=Release \
2727
-DEXECUTORCH_ENABLE_EVENT_TRACER=OFF \
@@ -36,7 +36,7 @@ if $STEPWISE_BUILD; then
3636
-Bcmake-out .
3737

3838
echo "Building any Cadence-specific binaries on top"
39-
cmake -DBUCK2="$BUCK" \
39+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
4040
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
4141
-DCMAKE_INSTALL_PREFIX=cmake-out \
4242
-DCMAKE_BUILD_TYPE=Release \
@@ -60,7 +60,7 @@ if $STEPWISE_BUILD; then
6060
else
6161
echo "Building Cadence toolchain with ExecuTorch packages"
6262
cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
63-
cmake -DBUCK2="$BUCK" \
63+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
6464
-DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
6565
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
6666
-DCMAKE_INSTALL_PREFIX=cmake-out \

backends/cadence/build_cadence_runner.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ main() {
2121
cd "${EXECUTORCH_ROOT}"
2222

2323
rm -rf cmake-out
24-
cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
24+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
2525
-DCMAKE_BUILD_TYPE=Release \
2626
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
2727
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
@@ -33,7 +33,7 @@ main() {
3333
local build_dir="cmake-out/${example_dir}"
3434
local cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
3535
rm -rf ${build_dir}
36-
cmake -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
36+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
3737
-DCMAKE_BUILD_TYPE=Release \
3838
-DEXECUTORCH_CADENCE_CPU_RUNNER=ON \
3939
-DEXECUTORCH_ENABLE_LOGGING=ON \

backends/cadence/fusion_g3/operators/op_exp.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
4949
out);
5050
#endif
5151

52-
if (out.scalar_type() == ScalarType::Float) {
53-
float* const out_data = out.mutable_data_ptr<float>();
54-
const float* const in_data = in.const_data_ptr<float>();
52+
if (in.scalar_type() == ScalarType::Float) {
53+
float* __restrict__ out_data = out.mutable_data_ptr<float>();
54+
const float* __restrict__ in_data = in.const_data_ptr<float>();
5555

5656
XT_KERNEL_CHECK(
5757
ctx, out, xa_nn_elm_exp_f32_f32, out_data, in_data, out.numel());
@@ -66,4 +66,4 @@ Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
6666
} // namespace native
6767
} // namespace G3
6868
} // namespace impl
69-
} // namespace cadence
69+
} // namespace cadence

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ void main() {
4141
div_by_x % out_limits.y,
4242
div_by_x / out_limits.y);
4343

44-
if (any(greaterThanEqual(pos, out_limits))) {
44+
if (pos.z >= out_limits.z) {
4545
return;
4646
}
4747

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ void main() {
5959
pos.y *= BATCH_SIZE_Y;
6060

6161
// do not process if top pixel does not fit within the output range
62-
if (any(greaterThanEqual(pos, out_limits))) {
62+
if (pos.z >= out_limits.z) {
6363
return;
6464
}
6565

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_sned_output_tile.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void main() {
4444
div_by_x % out_limits.y,
4545
div_by_x / out_limits.y);
4646

47-
if (any(greaterThanEqual(pos, out_limits))) {
47+
if (pos.z >= out_limits.z) {
4848
return;
4949
}
5050

0 commit comments

Comments
 (0)