Skip to content

Commit 9105c8f

Browse files
committed
Update on "add attention_sink.py"
This PR adds `KVCacheWithAttentionSink`, which is required for `AttentionSink`. It keeps the first `sink_size` tokens as attention sinks and maintains a sliding window with `window_size` for new tokens. Note: I am trying to implement and verify `AttentionSink` in eager mode first. So the current implementation may still have some lower errors or performance issue. For example, it does not support the case when dynamic shape is disabled. Will leave these problems to resolve when we are ready to deploy `AttentionSink` to edge. Differential Revision: [D65235798](https://our.internmc.facebook.com/intern/diff/D65235798/) [ghstack-poisoned]
2 parents 349af4f + 0afa4e1 commit 9105c8f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+4039
-280
lines changed

.ci/scripts/test_llama.sh

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ UPLOAD_DIR="${UPLOAD_DIR:-}"
5151
# Default PT2E_QUANTIZE to empty string if not set
5252
PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
5353

54+
# Default CMake Build Type to release mode
55+
CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
56+
5457
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
5558
echo "Expecting atleast 4 positional arguments"
5659
echo "Usage: [...]"
@@ -143,7 +146,7 @@ cmake_install_executorch_libraries() {
143146
rm -rf cmake-out
144147
retry cmake \
145148
-DCMAKE_INSTALL_PREFIX=cmake-out \
146-
-DCMAKE_BUILD_TYPE=Debug \
149+
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
147150
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
148151
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
149152
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
@@ -157,22 +160,22 @@ cmake_install_executorch_libraries() {
157160
-DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
158161
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
159162
-Bcmake-out .
160-
cmake --build cmake-out -j9 --target install --config Debug
163+
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
161164
}
162165

163166
cmake_build_llama_runner() {
164167
echo "Building llama runner"
165168
dir="examples/models/llama"
166169
retry cmake \
167170
-DCMAKE_INSTALL_PREFIX=cmake-out \
168-
-DCMAKE_BUILD_TYPE=Debug \
171+
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
169172
-DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
170173
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
171174
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
172175
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
173176
-Bcmake-out/${dir} \
174177
${dir}
175-
cmake --build cmake-out/${dir} -j9 --config Debug
178+
cmake --build cmake-out/${dir} -j9 --config "$CMAKE_BUILD_TYPE"
176179

177180
}
178181

.ci/scripts/test_llava.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
set -exu
99
# shellcheck source=/dev/null
1010

11-
BUILD_TYPE=${1:-Debug}
1211
TARGET_OS=${2:-Native}
1312
BUILD_DIR=${3:-cmake-out}
13+
CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
1414

15-
echo "Building with BUILD_TYPE: $BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR"
15+
echo "Building with CMAKE_BUILD_TYPE: $CMAKE_BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR"
1616

1717
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
1818
PYTHON_EXECUTABLE=python3
@@ -32,7 +32,7 @@ if hash nproc &> /dev/null; then NPROC=$(nproc); fi
3232

3333
EXECUTORCH_COMMON_CMAKE_ARGS=" \
3434
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
35-
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
35+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
3636
-DEXECUTORCH_ENABLE_LOGGING=ON \
3737
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3838
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -49,7 +49,7 @@ cmake_install_executorch_libraries() {
4949
${EXECUTORCH_COMMON_CMAKE_ARGS} \
5050
-B${BUILD_DIR} .
5151

52-
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
52+
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE}
5353
}
5454

5555
cmake_install_executorch_libraries_for_android() {
@@ -59,14 +59,14 @@ cmake_install_executorch_libraries_for_android() {
5959
${EXECUTORCH_COMMON_CMAKE_ARGS} \
6060
-B${BUILD_DIR} .
6161

62-
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
62+
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE}
6363
}
6464

6565

6666
LLAVA_COMMON_CMAKE_ARGS=" \
6767
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6868
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
69-
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
69+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
7070
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
7171
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7272
-DEXECUTORCH_BUILD_XNNPACK=ON"
@@ -81,7 +81,7 @@ cmake_build_llava_runner() {
8181
-B${BUILD_DIR}/${dir} \
8282
${dir}
8383

84-
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
84+
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${CMAKE_BUILD_TYPE}
8585
}
8686

8787

@@ -98,7 +98,7 @@ cmake_build_llava_runner_for_android() {
9898
-B${BUILD_DIR}/${dir} \
9999
${dir}
100100

101-
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
101+
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${CMAKE_BUILD_TYPE}
102102
}
103103

104104
# only export the one without custom op for now since it's

.github/workflows/ghstack_land.yml

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,7 @@ on:
33
pull_request:
44
types: [closed]
55
branches:
6-
- 'gh/cccclai/[0-9]+/base'
7-
- 'gh/dbort/[0-9]+/base'
8-
- 'gh/dvorjackz/[0-9]+/base'
9-
- 'gh/guangy10/[0-9]+/base'
10-
- 'gh/helunwencser/[0-9]+/base'
11-
- 'gh/jorgep31415/[0-9]+/base'
12-
- 'gh/kimishpatel/[0-9]+/base'
13-
- 'gh/kirklandsign/[0-9]+/base'
14-
- 'gh/larryliu0820/[0-9]+/base'
15-
- 'gh/lucylq/[0-9]+/base'
16-
- 'gh/manuelcandales/[0-9]+/base'
17-
- 'gh/mcr229/[0-9]+/base'
18-
- 'gh/swolchok/[0-9]+/base'
19-
- 'gh/SS-JIA/[0-9]+/base'
20-
- 'gh/trivedivivek/[0-9]+/base'
6+
- 'gh/*/[0-9]+/base'
217

228
jobs:
239
ghstack_merge_to_main:

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ jobs:
290290
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
291291

292292
# # run e2e (export, tokenizer and runner)
293-
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh Release
293+
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh
294294

295295
test-qnn-model:
296296
name: test-qnn-model

CMakeLists.txt

Lines changed: 16 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,22 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
682682
endif()
683683

684684
if(EXECUTORCH_BUILD_PYBIND)
685+
# Setup RPATH.
686+
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
687+
if(APPLE)
688+
set(CMAKE_MACOSX_RPATH ON)
689+
set(_rpath_portable_origin "@loader_path")
690+
else()
691+
set(_rpath_portable_origin $ORIGIN)
692+
endif(APPLE)
693+
# Use separate rpaths during build and install phases
694+
set(CMAKE_SKIP_BUILD_RPATH FALSE)
695+
# Don't use the install-rpath during the build phase
696+
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
697+
set(CMAKE_INSTALL_RPATH "${_rpath_portable_origin}")
698+
# Automatically add all linked folders that are NOT in the build directory to
699+
# the rpath (per library?)
700+
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
685701
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
686702

687703
if(NOT EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
@@ -765,46 +781,6 @@ if(EXECUTORCH_BUILD_PYBIND)
765781
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
766782
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
767783
target_link_libraries(portable_lib PRIVATE ${_dep_libs})
768-
if(APPLE)
769-
# pip wheels will need to be able to find the torch libraries. On Linux, the
770-
# .so has non-absolute dependencies on libs like "libtorch.so" without
771-
# paths; as long as we `import torch` first, those dependencies will work.
772-
# But Apple dylibs do not support non-absolute dependencies, so we need to
773-
# tell the loader where to look for its libraries. The LC_LOAD_DYLIB entries
774-
# for the torch libraries will look like "@rpath/libtorch.dylib", so we can
775-
# add an LC_RPATH entry to look in a directory relative to the installed
776-
# location of our _portable_lib.so file. To see these LC_* values, run
777-
# `otool -l _portable_lib*.so`.
778-
set_target_properties(
779-
portable_lib
780-
PROPERTIES # Assume that this library will be installed in
781-
# `site-packages/executorch/extension/pybindings`, and that
782-
# the torch libs are in `site-packages/torch/lib`.
783-
BUILD_RPATH "@loader_path/../../../torch/lib"
784-
INSTALL_RPATH "@loader_path/../../../torch/lib"
785-
# Assume <executorch> is the root `site-packages/executorch`
786-
# Need to add <executorch>/extension/llm/custom_ops for
787-
# libcustom_ops_aot_lib.dylib
788-
BUILD_RPATH "@loader_path/../../extension/llm/custom_ops"
789-
INSTALL_RPATH "@loader_path/../../extension/llm/custom_ops"
790-
# Need to add <executorch>/kernels/quantized for
791-
# libquantized_ops_aot_lib.dylib
792-
BUILD_RPATH "@loader_path/../../kernels/quantized"
793-
INSTALL_RPATH "@loader_path/../../kernels/quantized"
794-
)
795-
else()
796-
set_target_properties(
797-
portable_lib
798-
PROPERTIES
799-
# Assume <executorch> is the root `site-packages/executorch`
800-
# Need to add <executorch>/extension/llm/custom_ops for
801-
# libcustom_ops_aot_lib
802-
# Need to add <executorch>/kernels/quantized for
803-
# libquantized_ops_aot_lib
804-
BUILD_RPATH
805-
"$ORIGIN:$ORIGIN/../../extension/llm/custom_ops:$ORIGIN/../../kernels/quantized"
806-
)
807-
endif()
808784

809785
install(TARGETS portable_lib
810786
LIBRARY DESTINATION executorch/extension/pybindings

backends/arm/arm_backend.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder":
135135
self.quantize_io = quantize_io
136136
return self
137137

138-
def set_input_order(self, input_order: str = None) -> "ArmCompileSpecBuilder":
138+
def set_input_order(
139+
self, input_order: Optional[str] = None
140+
) -> "ArmCompileSpecBuilder":
139141
"""
140142
Reorder the inputs coming in. This may be required when inputs > 1.
141143
And while using the U55/U85 CompileSpec.

backends/arm/test/ops/test_avg_pool.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323

2424
test_data_suite = [
2525
# (test_name, test_data, [kernel_size, stride, padding])
26-
("zeros", torch.zeros(20, 16, 50, 32), [4, 2, 0]),
27-
("ones", torch.zeros(20, 16, 50, 32), [4, 2, 0]),
28-
("rand", torch.rand(20, 16, 50, 32), [4, 2, 0]),
29-
("randn", torch.randn(20, 16, 50, 32), [4, 2, 0]),
26+
("zeros", torch.zeros(1, 16, 50, 32), [4, 2, 0]),
27+
("ones", torch.zeros(1, 16, 50, 32), [4, 2, 0]),
28+
("rand", torch.rand(1, 16, 50, 32), [4, 2, 0]),
29+
("randn", torch.randn(1, 16, 50, 32), [4, 2, 0]),
3030
]
3131

3232

@@ -101,7 +101,7 @@ def _test_avgpool2d_tosa_ethos_BI_pipeline(
101101
test_data: Tuple[torch.tensor],
102102
):
103103
quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config())
104-
(
104+
tester = (
105105
ArmTester(
106106
module,
107107
example_inputs=test_data,
@@ -116,7 +116,10 @@ def _test_avgpool2d_tosa_ethos_BI_pipeline(
116116
.check_not(["executorch_exir_dialects_edge__ops_aten_avg_pool2d_default"])
117117
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
118118
.to_executorch()
119+
.serialize()
119120
)
121+
if common.is_option_enabled("corstone300"):
122+
tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
120123

121124
@parameterized.expand(test_data_suite)
122125
def test_avgpool2d_tosa_MI(

backends/arm/test/ops/test_bmm.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def forward(self, x, y):
4141
class BMMSingleInput(torch.nn.Module):
4242
test_parameters = [
4343
(torch.rand(20, 3, 3),),
44-
(torch.ones(2, 128, 128),),
44+
(torch.rand(2, 128, 128),),
4545
(10000 * torch.randn(4, 25, 25),),
4646
(5 + 5 * torch.randn(3, 64, 64),),
4747
]
@@ -96,7 +96,7 @@ def _test_bmm_ethosu_BI_pipeline(
9696
compile_spec: CompileSpec,
9797
test_data: Tuple[torch.Tensor, ...],
9898
):
99-
(
99+
tester = (
100100
ArmTester(
101101
module,
102102
example_inputs=test_data,
@@ -110,7 +110,10 @@ def _test_bmm_ethosu_BI_pipeline(
110110
.partition()
111111
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
112112
.to_executorch()
113+
.serialize()
113114
)
115+
if common.is_option_enabled("corstone300"):
116+
tester.run_method_and_compare_outputs(inputs=test_data, qtol=1)
114117

115118
@parameterized.expand(BMM.test_parameters)
116119
def test_bmm_tosa_MI(self, operand1: torch.Tensor, operand2: torch.Tensor):
@@ -143,9 +146,20 @@ def test_bmm_single_input_tosa_BI(self, operand1: torch.Tensor):
143146
self._test_bmm_tosa_BI_pipeline(self.BMMSingleInput(), test_data)
144147

145148
@parameterized.expand(BMM.test_parameters)
149+
@unittest.expectedFailure
146150
def test_bmm_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
147151
test_data = (operand1, operand2)
148-
self._test_bmm_tosa_BI_pipeline(self.BMM(), test_data)
152+
self._test_bmm_ethosu_BI_pipeline(
153+
self.BMM(), common.get_u55_compile_spec(), test_data
154+
)
155+
156+
@parameterized.expand(BMM.test_parameters)
157+
@common.expectedFailureOnFVP
158+
def test_bmm_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
159+
test_data = (operand1, operand2)
160+
self._test_bmm_ethosu_BI_pipeline(
161+
self.BMM(), common.get_u85_compile_spec(), test_data
162+
)
149163

150164
# Expected to fail with error: Warning, unsupported fusing of TOSA Rescale previous operator is of type: Memcpy
151165
@parameterized.expand(BMMSingleInput.test_parameters)
@@ -156,7 +170,9 @@ def test_bmm_single_input_u55_BI(self, operand1: torch.Tensor):
156170
self.BMMSingleInput(), common.get_u55_compile_spec(), test_data
157171
)
158172

173+
# Numerical issues on FVP, MLETORCH 534
159174
@parameterized.expand(BMMSingleInput.test_parameters)
175+
@common.expectedFailureOnFVP
160176
def test_bmm_single_input_u85_BI(self, operand1: torch.Tensor):
161177
test_data = (operand1,)
162178
self._test_bmm_ethosu_BI_pipeline(

backends/arm/test/ops/test_cat.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def _test_cat_ethosu_BI_pipeline(
9696
compile_spec: CompileSpec,
9797
test_data: Tuple[tuple[torch.Tensor, ...], int],
9898
):
99-
(
99+
tester = (
100100
ArmTester(
101101
module,
102102
example_inputs=test_data,
@@ -108,10 +108,14 @@ def _test_cat_ethosu_BI_pipeline(
108108
.check(["torch.ops.quantized_decomposed"])
109109
.to_edge()
110110
.partition()
111+
.dump_artifact()
111112
.check_not(["executorch_exir_dialects_edge__ops_aten_cat_default"])
112113
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
113114
.to_executorch()
115+
.serialize()
114116
)
117+
if common.is_option_enabled("corstone300"):
118+
tester.run_method_and_compare_outputs(inputs=test_data)
115119

116120
@parameterized.expand(Cat.test_parameters)
117121
def test_cat_tosa_MI(self, operands: tuple[torch.Tensor, ...], dim: int):
@@ -129,14 +133,18 @@ def test_cat_tosa_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
129133
test_data = (operands, dim)
130134
self._test_cat_tosa_BI_pipeline(self.Cat(), test_data)
131135

136+
# Mismatch in provided number of inputs and model signature, MLETORCH 519
132137
@parameterized.expand(Cat.test_parameters)
138+
@common.expectedFailureOnFVP
133139
def test_cat_u55_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
134140
test_data = (operands, dim)
135141
self._test_cat_ethosu_BI_pipeline(
136142
self.Cat(), common.get_u55_compile_spec(), test_data
137143
)
138144

145+
# Mismatch in provided number of inputs and model signature, MLETORCH 519
139146
@parameterized.expand(Cat.test_parameters)
147+
@common.expectedFailureOnFVP
140148
def test_cat_u85_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
141149
test_data = (operands, dim)
142150
self._test_cat_ethosu_BI_pipeline(

backends/arm/test/ops/test_clone.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def _test_clone_tosa_ethos_pipeline(
8585
test_data: Tuple[torch.Tensor],
8686
):
8787
quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config())
88-
(
88+
tester = (
8989
ArmTester(module, example_inputs=test_data, compile_spec=compile_spec)
9090
.quantize(Quantize(quantizer, get_symmetric_quantization_config()))
9191
.export()
@@ -94,7 +94,10 @@ def _test_clone_tosa_ethos_pipeline(
9494
.partition()
9595
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
9696
.to_executorch()
97+
.serialize()
9798
)
99+
if common.is_option_enabled("corstone300"):
100+
tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
98101

99102
def _test_clone_tosa_u55_pipeline(
100103
self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]

0 commit comments

Comments
 (0)