Skip to content

Commit 6c5efe7

Browse files
committed
Add: Initial backends/cadence/vision module scaffold with optimized softmax (non-iDMA)
1 parent 910918e commit 6c5efe7

38 files changed

+4704
-0
lines changed

backends/cadence/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,18 @@ if(EXECUTORCH_NNLIB_OPT)
8484
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
8585
elseif(EXECUTORCH_FUSION_G3_OPT)
8686
set(TARGET_DIR fusion_g3)
87+
<<<<<<< HEAD
8788
add_subdirectory(
8889
${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
8990
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
9091
)
92+
=======
93+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
94+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
95+
elseif(EXECUTORCH_VISION_OPT)
96+
set(TARGET_DIR vision)
97+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
98+
>>>>>>> 992b83e373 ( Add: Initial backends/cadence/vision module scaffold with optimized softmax (non-iDMA))
9199
else()
92100
set(TARGET_DIR reference)
93101
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
#
3+
# This yaml file contains operators that are also defined by the ATen library.
4+
# For lean mode:
5+
# - Codegen'd target `executorch_generated_lib` will be reading all the information
6+
# from this file, including operator schema and kernel metadata.
7+
# - Selective build target `codegen:executorch_defined_ops` now is selecting all the
8+
# operators in this file, by dumping all the op names into `selected_operators.yaml`.
9+
#
10+
# See the README.md file in executorch/kernels/portable for a description of the syntax used
11+
# by this file.
12+
13+
14+
# aten ops
15+
- op: _to_copy.out
16+
kernels:
17+
- arg_meta: null
18+
kernel_name: torch::executor::to_copy_out
19+
20+
- op: _softmax.out
21+
kernels:
22+
- arg_meta: null
23+
kernel_name: cadence::impl::vision::_softmax_out
24+
25+
- op: add.out
26+
kernels:
27+
- arg_meta: null
28+
kernel_name: torch::executor::add_out
29+
30+
- op: bmm.out
31+
kernels:
32+
- arg_meta: null
33+
kernel_name: torch::executor::bmm_out
34+
35+
- op: cat.out
36+
kernels:
37+
- arg_meta: null
38+
kernel_name: torch::executor::cat_out
39+
40+
- op: clone.out
41+
kernels:
42+
- arg_meta: null
43+
kernel_name: torch::executor::clone_out
44+
45+
- op: div.out
46+
kernels:
47+
- arg_meta: null
48+
kernel_name: torch::executor::div_out
49+
50+
- op: div.out_mode
51+
kernels:
52+
- arg_meta: null
53+
kernel_name: torch::executor::div_out_mode
54+
55+
- op: embedding.out
56+
kernels:
57+
- arg_meta: null
58+
kernel_name: torch::executor::embedding_out
59+
60+
- op: empty.out
61+
kernels:
62+
- arg_meta: null
63+
kernel_name: torch::executor::empty_out
64+
65+
- op: expand_copy.out
66+
kernels:
67+
- arg_meta: null
68+
kernel_name: torch::executor::expand_copy_out
69+
70+
- op: full.out
71+
kernels:
72+
- arg_meta: null
73+
kernel_name: torch::executor::full_out
74+
75+
- op: gelu.out
76+
kernels:
77+
- arg_meta: null
78+
kernel_name: torch::executor::gelu_out
79+
80+
- op: hardtanh.out
81+
kernels:
82+
- arg_meta: null
83+
kernel_name: torch::executor::hardtanh_out
84+
85+
- op: max_pool2d_with_indices.out
86+
kernels:
87+
- arg_meta: null
88+
kernel_name: torch::executor::max_pool2d_with_indices_out
89+
90+
- op: mean.out
91+
kernels:
92+
- arg_meta: null
93+
kernel_name: torch::executor::mean_dim_out
94+
95+
- op: mul.out
96+
kernels:
97+
- arg_meta: null
98+
kernel_name: torch::executor::mul_out
99+
100+
- op: mul.Scalar_out
101+
kernels:
102+
- arg_meta: null
103+
kernel_name: torch::executor::mul_scalar_out
104+
105+
- op: permute_copy.out
106+
kernels:
107+
- arg_meta: null
108+
kernel_name: torch::executor::permute_copy_out
109+
110+
- op: rsqrt.out
111+
kernels:
112+
- arg_meta: null
113+
kernel_name: torch::executor::rsqrt_out
114+
115+
- op: sigmoid.out
116+
kernels:
117+
- arg_meta: null
118+
kernel_name: torch::executor::sigmoid_out
119+
120+
- op: slice_copy.Tensor_out
121+
kernels:
122+
- arg_meta: null
123+
kernel_name: torch::executor::slice_copy_Tensor_out
124+
125+
- op: split_with_sizes_copy.out
126+
kernels:
127+
- arg_meta: null
128+
kernel_name: torch::executor::split_with_sizes_copy_out
129+
130+
- op: sub.out
131+
kernels:
132+
- arg_meta: null
133+
kernel_name: torch::executor::sub_out
134+
135+
- op: view_copy.out
136+
kernels:
137+
- arg_meta: null
138+
kernel_name: torch::executor::view_copy_out
139+
140+
- op: where.self_out
141+
kernels:
142+
- arg_meta: null
143+
kernel_name: torch::executor::where_out
144+
145+
- op: transpose_copy.int_out
146+
kernels:
147+
- arg_meta: null
148+
kernel_name: torch::executor::transpose_copy_int_out
149+
150+
- op: eq.Scalar_out
151+
kernels:
152+
- arg_meta: null
153+
kernel_name: torch::executor::eq_scalar_out
154+
155+
- op: logical_not.out
156+
kernels:
157+
- arg_meta: null
158+
kernel_name: torch::executor::logical_not_out
159+
160+
- op: any.out
161+
kernels:
162+
- arg_meta: null
163+
kernel_name: torch::executor::any_out
164+
165+
- op: native_group_norm.out
166+
kernels:
167+
- arg_meta: null
168+
kernel_name: torch::executor::native_group_norm_out
169+
170+
- op: sum.IntList_out
171+
kernels:
172+
- arg_meta: null
173+
kernel_name: torch::executor::sum_dim_out
174+
175+
- op: select_copy.int_out
176+
kernels:
177+
- arg_meta: null
178+
kernel_name: torch::executor::select_copy_int_out
179+
180+
# custom ops
181+
- func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
182+
variants: function
183+
kernels:
184+
- arg_meta: null
185+
kernel_name: cadence::impl::vision::quantize_per_tensor_out
186+
187+
- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
188+
variants: function
189+
kernels:
190+
- arg_meta: null
191+
kernel_name: cadence::impl::vision::dequantize_per_tensor_out
192+
193+
- func: cadence::quantized_conv.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
194+
kernels:
195+
- arg_meta: null
196+
kernel_name: cadence::impl::vision::quantized_conv_out
197+
198+
- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
199+
kernels:
200+
- arg_meta: null
201+
kernel_name: cadence::impl::vision::quantized_layer_norm_out
202+
- func: cadence::quantized_layer_norm.per_tensor_out(Tensor input, float in_scale, int in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
203+
kernels:
204+
- arg_meta: null
205+
kernel_name: cadence::impl::vision::quantized_layer_norm_per_tensor_out
206+
207+
- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
208+
kernels:
209+
- arg_meta: null
210+
kernel_name: cadence::impl::vision::quantized_linear_out
211+
212+
- func: cadence::quantized_relu.out(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
213+
kernels:
214+
- arg_meta: null
215+
kernel_name: cadence::impl::vision::quantized_relu_out
216+
217+
- func: cadence::quantized_relu.per_tensor_out(Tensor X, int X_zero_point, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
218+
kernels:
219+
- arg_meta: null
220+
kernel_name: cadence::impl::vision::quantized_relu_per_tensor_out
221+
222+
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
223+
kernels:
224+
- arg_meta: null
225+
kernel_name: cadence::impl::vision::quantized_matmul_out
226+
227+
- func: cadence::quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
228+
kernels:
229+
- arg_meta: null
230+
kernel_name: cadence::impl::vision::quantized_linear_per_tensor_out
231+
232+
- func: cadence::im2row.out(Tensor input, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, Tensor in_zero_point, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
233+
kernels:
234+
- arg_meta: null
235+
kernel_name: cadence::impl::vision::im2row_out
236+
237+
- func: cadence::im2row.per_tensor_out(Tensor input, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, int in_zero_point, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
238+
kernels:
239+
- arg_meta: null
240+
kernel_name: cadence::impl::vision::im2row_per_tensor_out
241+
242+
- func: cadence::quantized_conv.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
243+
kernels:
244+
- arg_meta: null
245+
kernel_name: cadence::impl::vision::quantized_conv_per_tensor_out
246+
247+
- func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
248+
kernels:
249+
- arg_meta: null
250+
kernel_name: cadence::impl::vision::quantized_fully_connected_out
251+
252+
- func: cadence::quantized_fully_connected.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
253+
kernels:
254+
- arg_meta: null
255+
kernel_name: cadence::impl::vision::quantized_fully_connected_per_tensor_out
256+
257+
- func: cadence::requantize.out(Tensor input, Tensor in_scale, Tensor in_zero_point, Tensor out_scale, Tensor out_zero_point, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
258+
kernels:
259+
- arg_meta: null
260+
kernel_name: cadence::impl::vision::requantize_out
261+
262+
- func: cadence::requantize.per_tensor_out(Tensor input, float in_scale, int in_zero_point, float out_scale, int out_zero_point, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
263+
kernels:
264+
- arg_meta: null
265+
kernel_name: cadence::impl::vision::requantize_per_tensor_out
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# lint_cmake: -linelength
8+
add_library(
9+
cadence_kernels
10+
kernels.cpp
11+
${EXECUTORCH_ROOT}/backends/cadence/vision/third-party/library/api/tensor_transposef.c
12+
${EXECUTORCH_ROOT}/backends/cadence/vision/third-party/library/api/vsoftmaxf.c
13+
${EXECUTORCH_ROOT}/backends/cadence/vision/third-party/library/tables/expf_tbl.c
14+
${EXECUTORCH_ROOT}/backends/cadence/vision/third-party/library/tables/nanf_tbl.c
15+
${EXECUTORCH_ROOT}/backends/cadence/vision/third-party/library/tables/inff_tbl.c
16+
)
17+
18+
# Let files say "include <executorch/path/to/header.h>".
19+
set(_common_include_directories ${EXECUTORCH_ROOT}/..
20+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
21+
22+
target_include_directories(
23+
cadence_kernels
24+
PUBLIC
25+
.
26+
${EXECUTORCH_ROOT}/backends/cadence/vision/third-party/include
27+
${EXECUTORCH_ROOT}/backends/cadence/vision/third-party/include_private
28+
${_common_include_directories}
29+
)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
load("targets.bzl", "define_common_targets")
2+
3+
oncall("odai_jarvis")
4+
5+
define_common_targets()

0 commit comments

Comments
 (0)