Skip to content

Commit 13de990

Browse files
committed
Add iDMA support for softmax over last dimension within the softmax operator
- Added iDMA support within the softmax operator in vision/operators directory. - Modified op_add, op_embedding and op_view_copy based on comments from last PR. - Created a build script as an example on how build and run "add" on Vision 130. - Replaced IVP_PACKVNRN_2X64W with IVP_PACKVRNRN_2X64W. (Instruction also available on VP6)
1 parent 627971d commit 13de990

File tree

13 files changed

+343
-92
lines changed

13 files changed

+343
-92
lines changed

backends/cadence/aot/functions_vision.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
- op: add.out
2626
kernels:
2727
- arg_meta: null
28-
kernel_name: torch::executor::add_out
28+
kernel_name: cadence::impl::vision::add_out
2929

3030
- op: bmm.out
3131
kernels:
@@ -55,7 +55,7 @@
5555
- op: embedding.out
5656
kernels:
5757
- arg_meta: null
58-
kernel_name: torch::executor::embedding_out
58+
kernel_name: cadence::impl::vision::embedding_out
5959

6060
- op: empty.out
6161
kernels:
@@ -70,7 +70,7 @@
7070
- op: full.out
7171
kernels:
7272
- arg_meta: null
73-
kernel_name: torch::executor::full_out
73+
kernel_name: cadence::impl::vision::full_out
7474

7575
- op: gelu.out
7676
kernels:
@@ -135,7 +135,7 @@
135135
- op: view_copy.out
136136
kernels:
137137
- arg_meta: null
138-
kernel_name: torch::executor::view_copy_out
138+
kernel_name: cadence::impl::vision::view_copy_out
139139

140140
- op: where.self_out
141141
kernels:
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euo pipefail
9+
10+
unset CMAKE_PREFIX_PATH
11+
unset XTENSA_CORE
12+
export XTENSA_CORE=XRC_Vision_130_AO
13+
git submodule sync
14+
git submodule update --init --recursive
15+
./install_requirements.sh
16+
./install_executorch.sh
17+
18+
rm -rf cmake-out
19+
20+
STEPWISE_BUILD=false
21+
22+
if $STEPWISE_BUILD; then
23+
echo "Building ExecuTorch"
24+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
25+
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
26+
-DCMAKE_BUILD_TYPE=Release \
27+
-DEXECUTORCH_ENABLE_EVENT_TRACER=OFF \
28+
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
29+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
30+
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
31+
-DEXECUTORCH_BUILD_CPUINFO=OFF \
32+
-DEXECUTORCH_ENABLE_LOGGING=ON \
33+
-DEXECUTORCH_USE_DL=OFF \
34+
-DEXECUTORCH_BUILD_CADENCE=OFF \
35+
-Bcmake-out .
36+
37+
echo "Building any Cadence-specific binaries on top"
38+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
39+
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
40+
-DCMAKE_INSTALL_PREFIX=cmake-out \
41+
-DCMAKE_BUILD_TYPE=Release \
42+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
43+
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
44+
-DEXECUTORCH_BUILD_CADENCE=ON \
45+
-DEXECUTORCH_ENABLE_LOGGING=ON \
46+
-DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
47+
-DEXECUTORCH_USE_DL=OFF \
48+
-DEXECUTORCH_BUILD_PORTABLE_OPS=ON \
49+
-DEXECUTORCH_BUILD_KERNELS_LLM=OFF \
50+
-DPYTHON_EXECUTABLE=python3 \
51+
-DEXECUTORCH_VISION_OPT=ON \
52+
-DHAVE_FNMATCH_H=OFF \
53+
-Bcmake-out/backends/cadence \
54+
backends/cadence
55+
cmake --build cmake-out/backends/cadence -j8
56+
else
57+
echo "Building Cadence toolchain with ExecuTorch packages"
58+
cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
59+
CXXFLAGS="-fno-exceptions -fno-rtti" cmake -DBUCK2="$BUCK" \
60+
-DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
61+
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
62+
-DCMAKE_INSTALL_PREFIX=cmake-out \
63+
-DCMAKE_BUILD_TYPE=Release \
64+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
65+
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
66+
-DEXECUTORCH_BUILD_CPUINFO=OFF \
67+
-DEXECUTORCH_BUILD_CADENCE=ON \
68+
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
69+
-DEXECUTORCH_ENABLE_LOGGING=ON \
70+
-DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
71+
-DEXECUTORCH_USE_DL=OFF \
72+
-DEXECUTORCH_BUILD_PORTABLE_OPS=ON \
73+
-DEXECUTORCH_BUILD_KERNELS_LLM=OFF \
74+
-DPYTHON_EXECUTABLE=python3 \
75+
-DEXECUTORCH_VISION_OPT=ON \
76+
-DHAVE_FNMATCH_H=OFF \
77+
-Bcmake-out
78+
cmake --build cmake-out --target install --config Release -j8
79+
fi
80+
81+
echo "Run simple model to verify cmake build"
82+
python3 -m examples.portable.scripts.export --model_name="add"
83+
xt-run --turbo cmake-out/executor_runner --model_path=add.pte

backends/cadence/vision/kernels/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,5 @@ target_include_directories(
2727
${EXECUTORCH_ROOT}/backends/cadence/vision/third-party/include_private
2828
${_common_include_directories}
2929
)
30+
31+
target_link_libraries(cadence_kernels PRIVATE idma)

backends/cadence/vision/operators/op_add.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,19 @@
1111
#include <executorch/runtime/kernel/kernel_includes.h>
1212
#include <executorch/runtime/platform/assert.h>
1313

14-
namespace torch {
15-
namespace executor {
14+
using executorch::aten::ScalarType;
15+
using executorch::aten::Tensor;
16+
using executorch::aten::Scalar;
17+
using executorch::runtime::KernelRuntimeContext;
18+
using executorch::runtime::promoteTypes;
19+
using torch::executor::Error;
20+
using torch::executor::apply_binary_elementwise_fn;
21+
using executorch::runtime::canCast;
22+
using torch::executor::native::utils::extract_scalar;
23+
24+
namespace cadence {
25+
namespace impl {
26+
namespace vision {
1627
namespace native {
1728

1829
Tensor& add_out(
@@ -23,6 +34,8 @@ Tensor& add_out(
2334
Tensor& out) {
2435
(void)ctx;
2536

37+
using namespace torch::executor::native::utils;
38+
2639
ScalarType a_type = a.scalar_type();
2740
ScalarType b_type = b.scalar_type();
2841
ScalarType common_type = promoteTypes(a_type, b_type);
@@ -39,7 +52,7 @@ Tensor& add_out(
3952
using CTYPE_IN = float;
4053
using CTYPE_OUT = float;
4154
CTYPE_IN alpha_val;
42-
ET_EXTRACT_SCALAR(alpha, alpha_val);
55+
ET_CHECK_MSG(extract_scalar(alpha, &alpha_val), "Could not be extracted: wrong type or out of range");
4356

4457
apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
4558
[alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) {
@@ -57,5 +70,6 @@ Tensor& add_out(
5770
}
5871

5972
} // namespace native
60-
} // namespace executor
61-
} // namespace torch
73+
} // namespace vision
74+
} // namespace impl
75+
} // namespace cadence

backends/cadence/vision/operators/op_embedding.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88

99
#include <executorch/runtime/kernel/kernel_includes.h>
1010

11-
namespace torch {
12-
namespace executor {
13-
namespace native {
14-
1511
using executorch::aten::Tensor;
1612
using executorch::runtime::KernelRuntimeContext;
1713

14+
namespace cadence {
15+
namespace impl {
16+
namespace vision {
17+
namespace native {
18+
1819
void embedding_out(
1920
KernelRuntimeContext& ctx,
2021
const Tensor& weight,
@@ -37,5 +38,6 @@ void embedding_out(
3738
}
3839

3940
} // namespace native
40-
} // namespace executor
41-
} // namespace torch
41+
} // namespace vision
42+
} // namespace impl
43+
} // namespace cadence

backends/cadence/vision/operators/op_full.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,19 @@
99
#include <executorch/kernels/portable/cpu/scalar_utils.h>
1010
#include <executorch/runtime/kernel/kernel_includes.h>
1111

12-
namespace torch {
13-
namespace executor {
14-
namespace native {
15-
1612
using executorch::aten::ScalarType;
1713
using executorch::aten::Tensor;
14+
using executorch::aten::Scalar;
15+
using executorch::aten::IntArrayRef;
16+
using executorch::runtime::KernelRuntimeContext;
17+
using torch::executor::Error;
18+
using torch::executor::native::utils::get_scalar_dtype;
19+
using torch::executor::native::utils::extract_scalar;
20+
21+
namespace cadence {
22+
namespace impl {
23+
namespace vision {
24+
namespace native {
1825

1926
Tensor& full_out(
2027
KernelRuntimeContext& ctx,
@@ -23,15 +30,15 @@ Tensor& full_out(
2330
Tensor& out) {
2431
(void)ctx;
2532

26-
ScalarType val_type = utils::get_scalar_dtype(fill_value);
33+
ScalarType val_type = get_scalar_dtype(fill_value);
2734
ScalarType out_type = out.scalar_type();
2835

2936
Error err = resize_tensor(out, sizes);
3037
ET_CHECK_MSG(err == Error::Ok, "Could not resize out");
3138

3239
ET_SWITCH_REAL_TYPES_AND(Bool, val_type, ctx, "full", CTYPE_VAL, [&] {
3340
CTYPE_VAL val;
34-
ET_EXTRACT_SCALAR(fill_value, val);
41+
ET_CHECK_MSG(extract_scalar(fill_value, &val), "Could not be extracted: wrong type or out of range");
3542

3643
ET_SWITCH_REAL_TYPES_AND(Bool, out_type, ctx, "full", CTYPE_OUT, [&] {
3744
CTYPE_OUT val_casted = static_cast<CTYPE_OUT>(val);
@@ -46,5 +53,6 @@ Tensor& full_out(
4653
}
4754

4855
} // namespace native
49-
} // namespace executor
50-
} // namespace torch
56+
} // namespace vision
57+
} // namespace impl
58+
} // namespace cadence

0 commit comments

Comments
 (0)