Skip to content

Commit f3bc1e6

Browse files
committed
Update on "[ET-VK] Using a single GPU buffer for all tensor uniforms."
This diff changes Tensor class to store all uniforms in a single uniform buffer. Entities stored in uniforms ie. size, stride, numel and logical limits are now stored in a single buffer and their offsets are stored as unsigned ints in Tensor class. Other changes includes: Adding a new ctor for ParamsBuffer class to allow allocation with size without data ptr. Adding an offset input to Buffer::data function. Adding an offset parameter to BufferBindInfo ctor, so additional offset can be supplied when binding a buffer. Differential Revision: [D65841750](https://our.internmc.facebook.com/intern/diff/D65841750/) [ghstack-poisoned]
2 parents 9db69e5 + 7265606 commit f3bc1e6

File tree

51 files changed

+535
-357
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+535
-357
lines changed

CMakeLists.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,6 @@ option(EXECUTORCH_BUILD_EXTENSION_TENSOR "Build the Tensor extension" OFF)
200200

201201
option(EXECUTORCH_BUILD_EXTENSION_TRAINING "Build the training extension" OFF)
202202

203-
option(EXECUTORCH_BUILD_GTESTS "Build googletest based test binaries" OFF)
204-
205203
option(EXECUTORCH_BUILD_MPS "Build the MPS backend" OFF)
206204

207205
option(EXECUTORCH_BUILD_NEURON "Build the backends/mediatek directory" OFF)
@@ -216,6 +214,8 @@ option(EXECUTORCH_BUILD_KERNELS_QUANTIZED "Build the quantized kernels" OFF)
216214

217215
option(EXECUTORCH_BUILD_DEVTOOLS "Build the ExecuTorch Developer Tools")
218216

217+
option(EXECUTORCH_BUILD_TESTS "Build CMake-based unit tests" OFF)
218+
219219
option(EXECUTORCH_NNLIB_OPT "Build Cadence backend Hifi nnlib kernel" OFF)
220220

221221
option(EXECUTORCH_CADENCE_CPU_RUNNER "Build Cadence backend CPU runner" OFF)
@@ -330,6 +330,10 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
330330
)
331331
endif()
332332

333+
if(EXECUTORCH_BUILD_TESTS)
334+
include(CTest)
335+
endif()
336+
333337
if(NOT PYTHON_EXECUTABLE)
334338
resolve_python_executable()
335339
endif()
@@ -625,7 +629,7 @@ cmake_dependent_option(
625629
)
626630

627631
# Add googletest if any test targets should be built
628-
if(EXECUTORCH_BUILD_GTESTS)
632+
if(BUILD_TESTING)
629633
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/googletest)
630634
endif()
631635

@@ -829,5 +833,7 @@ if(EXECUTORCH_BUILD_VULKAN)
829833
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
830834
endif()
831835

836+
include(Test.cmake)
837+
832838
# Print all summary
833839
executorch_print_configuration_summary()

Test.cmake

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
#
8+
# A helper CMake file to trigger C++ unit tests.
9+
#
10+
11+
if(BUILD_TESTING)
12+
# This contains the list of tests which are always built
13+
add_subdirectory(extension/evalue_util/test)
14+
add_subdirectory(extension/kernel_util/test)
15+
add_subdirectory(extension/memory_allocator/test)
16+
add_subdirectory(extension/parallel/test)
17+
add_subdirectory(extension/pytree/test)
18+
add_subdirectory(kernels/portable/cpu/util/test)
19+
add_subdirectory(kernels/prim_ops/test)
20+
add_subdirectory(kernels/test)
21+
add_subdirectory(runtime/core/exec_aten/testing_util/test)
22+
add_subdirectory(runtime/core/exec_aten/util/test)
23+
add_subdirectory(runtime/core/portable_type/test)
24+
add_subdirectory(runtime/core/test)
25+
add_subdirectory(runtime/executor/test)
26+
add_subdirectory(runtime/kernel/test)
27+
add_subdirectory(runtime/platform/test)
28+
add_subdirectory(test/utils)
29+
endif()
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euo pipefail
9+
10+
unset CMAKE_PREFIX_PATH
11+
unset XTENSA_CORE
12+
export XTENSA_CORE=FCV_FG3GP
13+
git submodule sync
14+
git submodule update --init
15+
./install_requirements.sh
16+
17+
rm -rf cmake-out
18+
19+
STEPWISE_BUILD=false
20+
21+
if $STEPWISE_BUILD; then
22+
echo "Building ExecuTorch"
23+
cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
24+
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
25+
-DCMAKE_BUILD_TYPE=Release \
26+
-DEXECUTORCH_ENABLE_EVENT_TRACER=OFF \
27+
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
28+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
29+
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
30+
-DEXECUTORCH_BUILD_CPUINFO=OFF \
31+
-DEXECUTORCH_ENABLE_LOGGING=ON \
32+
-DEXECUTORCH_USE_DL=OFF \
33+
-DEXECUTORCH_BUILD_CADENCE=OFF \
34+
-DFLATC_EXECUTABLE="$(which flatc)" \
35+
-DHAVE_FNMATCH_H=OFF \
36+
-Bcmake-out .
37+
38+
echo "Building any Cadence-specific binaries on top"
39+
cmake -DBUCK2="$BUCK" \
40+
-DCMAKE_TOOLCHAIN_FILE=/home/zonglinpeng/ws/zonglinpeng/executorch/backends/cadence/cadence.cmake \
41+
-DCMAKE_INSTALL_PREFIX=cmake-out \
42+
-DCMAKE_BUILD_TYPE=Release \
43+
-DEXECUTORCH_BUILD_HOST_TARGETS=ON \
44+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
45+
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
46+
-DEXECUTORCH_BUILD_CADENCE=ON \
47+
-DFLATC_EXECUTABLE="$(which flatc)" \
48+
-DEXECUTORCH_ENABLE_LOGGING=ON \
49+
-DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
50+
-DEXECUTORCH_USE_DL=OFF \
51+
-DBUILD_EXECUTORCH_PORTABLE_OPS=ON \
52+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
53+
-DPYTHON_EXECUTABLE=python3 \
54+
-DEXECUTORCH_FUSION_G3_OPT=ON \
55+
-DEXECUTORCH_BUILD_GFLAGS=ON \
56+
-DHAVE_FNMATCH_H=OFF \
57+
-Bcmake-out/backends/cadence \
58+
backends/cadence
59+
cmake --build cmake-out/backends/cadence -j8
60+
else
61+
echo "Building Cadence toolchain with ExecuTorch packages"
62+
cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
63+
cmake -DBUCK2="$BUCK" \
64+
-DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
65+
-DHAVE_SYS_STAT_H=ON \
66+
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
67+
-DCMAKE_INSTALL_PREFIX=cmake-out \
68+
-DCMAKE_BUILD_TYPE=Release \
69+
-DEXECUTORCH_BUILD_HOST_TARGETS=ON \
70+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
71+
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
72+
-DEXECUTORCH_BUILD_CPUINFO=OFF \
73+
-DEXECUTORCH_BUILD_FLATC=OFF \
74+
-DEXECUTORCH_BUILD_CADENCE=ON \
75+
-DFLATC_EXECUTABLE="$(which flatc)" \
76+
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
77+
-DEXECUTORCH_ENABLE_LOGGING=ON \
78+
-DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
79+
-DEXECUTORCH_USE_DL=OFF \
80+
-DBUILD_EXECUTORCH_PORTABLE_OPS=ON \
81+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
82+
-DPYTHON_EXECUTABLE=python3 \
83+
-DEXECUTORCH_FUSION_G3_OPT=ON \
84+
-DHAVE_FNMATCH_H=OFF \
85+
-Bcmake-out
86+
cmake --build cmake-out --target install --config Release -j8
87+
fi
88+
89+
echo "Run simple model to verify cmake build"
90+
python3 -m examples.portable.scripts.export --model_name="add"
91+
xt-run --turbo cmake-out/executor_runner --model_path=add.pte

backends/cadence/build_cadence_xtensa.sh renamed to backends/cadence/build_cadence_hifi4.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
set -euo pipefail
99

1010
unset CMAKE_PREFIX_PATH
11+
unset XTENSA_CORE
12+
export XTENSA_CORE=nxp_rt600_RI23_11_newlib
1113
git submodule sync
1214
git submodule update --init
1315
./install_requirements.sh
@@ -53,7 +55,7 @@ if $STEPWISE_BUILD; then
5355
-DHAVE_FNMATCH_H=OFF \
5456
-Bcmake-out/backends/cadence \
5557
backends/cadence
56-
cmake --build cmake-out/backends/cadence -j16
58+
cmake --build cmake-out/backends/cadence -j8
5759
else
5860
echo "Building Cadence toolchain with ExecuTorch packages"
5961
cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
@@ -79,7 +81,7 @@ else
7981
-DEXECUTORCH_NNLIB_OPT=ON \
8082
-DHAVE_FNMATCH_H=OFF \
8183
-Bcmake-out
82-
cmake --build cmake-out --target install --config Release -j16
84+
cmake --build cmake-out --target install --config Release -j8
8385
fi
8486

8587
echo "Run simple model to verify cmake build"

backends/cadence/hifi/operators/op_mean.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,7 @@ Tensor& mean_dim_out(
145145
ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, "mean.out", CTYPE_IN, [&] {
146146
ET_SWITCH_FLOATH_TYPES(out.scalar_type(), ctx, "mean.out", CTYPE_OUT, [&] {
147147
CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
148-
const size_t num =
149-
torch::executor::exeget_reduced_dim_product(in, dim_list);
148+
const size_t num = torch::executor::get_reduced_dim_product(in, dim_list);
150149
for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
151150
CTYPE_OUT sum = 0;
152151
if (in.numel() > 0) {

backends/qualcomm/builders/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
op_ceil,
1515
op_clamp,
1616
op_conv2d,
17+
op_cos,
1718
op_depth_to_space,
1819
op_dequantize,
1920
op_div,
@@ -43,6 +44,7 @@
4344
op_rsqrt,
4445
op_select_copy,
4546
op_sigmoid,
47+
op_sin,
4648
op_skip_ops,
4749
op_slice_copy,
4850
op_softmax,
@@ -71,6 +73,7 @@
7173
op_ceil,
7274
op_clamp,
7375
op_conv2d,
76+
op_cos,
7477
op_depth_to_space,
7578
op_dequantize,
7679
op_div,
@@ -100,6 +103,7 @@
100103
op_rsqrt,
101104
op_select_copy,
102105
op_sigmoid,
106+
op_sin,
103107
op_skip_ops,
104108
op_slice_copy,
105109
op_softmax,
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Dict
8+
9+
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
10+
11+
import torch
12+
13+
from .node_visitor import NodeVisitor, register_node_visitor
14+
from .qnn_constants import OpElementWiseCos, QNN_OP_PACKAGE_NAME_QTI_AISW
15+
16+
17+
@register_node_visitor
18+
class Cos(NodeVisitor):
19+
target = ["aten.cos.default"]
20+
21+
def __init__(self, *args) -> None:
22+
super().__init__(*args)
23+
24+
def define_node(
25+
self,
26+
node: torch.fx.Node,
27+
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
28+
) -> PyQnnWrapper.PyQnnOpWrapper:
29+
input_node = node.args[0]
30+
input_tensor = self.get_tensor(input_node, node)
31+
input_tensor_wrapper = self.define_tensor(
32+
input_node,
33+
input_tensor,
34+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
35+
nodes_to_wrappers,
36+
is_input_tensor=True,
37+
)
38+
39+
output_tensor = self.get_tensor(node, node)
40+
output_tensor_wrapper = self.define_tensor(
41+
node,
42+
output_tensor,
43+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
44+
nodes_to_wrappers,
45+
is_input_tensor=False,
46+
)
47+
48+
cos_op = PyQnnWrapper.PyQnnOpWrapper(
49+
node.name,
50+
QNN_OP_PACKAGE_NAME_QTI_AISW,
51+
OpElementWiseCos.op_name,
52+
)
53+
cos_op.AddInputTensors([input_tensor_wrapper])
54+
cos_op.AddOutputTensors([output_tensor_wrapper])
55+
56+
return cos_op
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Dict
8+
9+
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
10+
11+
import torch
12+
13+
from .node_visitor import NodeVisitor, register_node_visitor
14+
from .qnn_constants import OpElementWiseSin, QNN_OP_PACKAGE_NAME_QTI_AISW
15+
16+
17+
@register_node_visitor
18+
class Sin(NodeVisitor):
19+
target = ["aten.sin.default"]
20+
21+
def __init__(self, *args) -> None:
22+
super().__init__(*args)
23+
24+
def define_node(
25+
self,
26+
node: torch.fx.Node,
27+
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
28+
) -> PyQnnWrapper.PyQnnOpWrapper:
29+
input_node = node.args[0]
30+
input_tensor = self.get_tensor(input_node, node)
31+
input_tensor_wrapper = self.define_tensor(
32+
input_node,
33+
input_tensor,
34+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
35+
nodes_to_wrappers,
36+
is_input_tensor=True,
37+
)
38+
39+
output_tensor = self.get_tensor(node, node)
40+
output_tensor_wrapper = self.define_tensor(
41+
node,
42+
output_tensor,
43+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
44+
nodes_to_wrappers,
45+
is_input_tensor=False,
46+
)
47+
48+
sin_op = PyQnnWrapper.PyQnnOpWrapper(
49+
node.name,
50+
QNN_OP_PACKAGE_NAME_QTI_AISW,
51+
OpElementWiseSin.op_name,
52+
)
53+
sin_op.AddInputTensors([input_tensor_wrapper])
54+
sin_op.AddOutputTensors([output_tensor_wrapper])
55+
56+
return sin_op

backends/qualcomm/builders/qnn_constants.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ class OpElementWiseCeil:
8585
op_name = "ElementWiseCeil"
8686

8787

88+
@dataclass(init=False, frozen=True)
89+
class OpElementWiseCos:
90+
op_name: str = "ElementWiseCos"
91+
92+
8893
@dataclass(init=False, frozen=True)
8994
class OpElementWiseDivide:
9095
op_name: str = "ElementWiseDivide"
@@ -113,6 +118,11 @@ class OpElementWiseRsqrt:
113118
op_name: str = "ElementWiseRsqrt"
114119

115120

121+
@dataclass(init=False, frozen=True)
122+
class OpElementWiseSin:
123+
op_name: str = "ElementWiseSin"
124+
125+
116126
@dataclass(init=False, frozen=True)
117127
class OpElementWiseSubtract:
118128
op_name = "ElementWiseSubtract"

backends/qualcomm/quantizer/annotators.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,16 @@ def annotate_relu(node: Node, quantization_config: QuantizationConfig) -> None:
271271
annotate_single_in_single_out(node, quantization_config)
272272

273273

274+
@register_annotator([torch.ops.aten.cos.default])
275+
def annotate_cos(node: Node, quantization_config: QuantizationConfig) -> None:
276+
annotate_single_in_single_out(node, quantization_config)
277+
278+
279+
@register_annotator([torch.ops.aten.sin.default])
280+
def annotate_sin(node: Node, quantization_config: QuantizationConfig) -> None:
281+
annotate_single_in_single_out(node, quantization_config)
282+
283+
274284
@register_annotator([torch.ops.aten.tanh.default])
275285
def annotate_tanh(node: Node, quantization_config: QuantizationConfig) -> None:
276286
annotate_single_in_single_out(node, quantization_config)

0 commit comments

Comments
 (0)