Skip to content

Commit 1c7a7d0

Browse files
author
morelos
committed
Update on "[ET-VK] double, short, and uint16 dtype runtime support"
Creating support for double, short, and uint16 for quantization ops. Registering the short keyword since theres already support. Also changing the cpu implementation to support half Differential Revision: [D75959063](https://our.internmc.facebook.com/intern/diff/D75959063/) [ghstack-poisoned]
2 parents 1621801 + f58f97f commit 1c7a7d0

File tree

237 files changed

+11053
-2264
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

237 files changed

+11053
-2264
lines changed

.ci/scripts/analyze_benchmark_stability.py

Lines changed: 1523 additions & 0 deletions
Large diffs are not rendered by default.

.ci/scripts/build_llama_android.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ build_llama_runner() {
4242
popd
4343
ANDROID_ABI=arm64-v8a
4444
cmake -DBUCK2="${BUCK2}" \
45+
-DBUILD_TESTING=OFF \
4546
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
4647
-DANDROID_ABI="${ANDROID_ABI}" \
4748
-DCMAKE_INSTALL_PREFIX=cmake-android-out \

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ cmake_build_llama_runner() {
169169
popd
170170
dir="examples/models/llama"
171171
retry cmake \
172+
-DBUILD_TESTING=OFF \
172173
-DCMAKE_INSTALL_PREFIX=cmake-out \
173174
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
174175
-Bcmake-out/${dir} \

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ cmake --build cmake-out -j16 --target install --config Release
4040

4141
# Install llama runner with torchao
4242
cmake -DPYTHON_EXECUTABLE=python \
43+
-DBUILD_TESTING=OFF \
4344
-DCMAKE_BUILD_TYPE=Release \
4445
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
4546
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \

.ci/scripts/test_llava.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@ cmake_install_executorch_libraries_for_android() {
6464

6565

6666
LLAVA_COMMON_CMAKE_ARGS=" \
67+
-DBUILD_TESTING=OFF \
6768
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6869
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
69-
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
70+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
7071
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
7172
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7273
-DEXECUTORCH_BUILD_XNNPACK=ON"

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ jobs:
371371
size=${arr[4]}
372372
# threshold=48120 on devserver with gcc11.4
373373
# todo(lfq): update once binary size is below 50kb.
374-
threshold="51408"
374+
threshold="55584"
375375
if [[ "$size" -le "$threshold" ]]; then
376376
echo "Success $size <= $threshold"
377377
else
@@ -406,7 +406,7 @@ jobs:
406406
output=$(ls -la cmake-out/test/size_test)
407407
arr=($output)
408408
size=${arr[4]}
409-
threshold="47560"
409+
threshold="51728"
410410
if [[ "$size" -le "$threshold" ]]; then
411411
echo "Success $size <= $threshold"
412412
else

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ jobs:
262262
output=$(ls -la ${elf})
263263
arr=($output)
264264
size=${arr[4]}
265-
threshold="102400" # 100KiB
265+
threshold="103068" # ~100KiB
266266
echo "size: $size, threshold: $threshold"
267267
if [[ "$size" -le "$threshold" ]]; then
268268
echo "Success $size <= $threshold"

CMakeLists.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,13 @@ if(NOT PYTHON_EXECUTABLE)
7575
endif()
7676
announce_configured_options(PYTHON_EXECUTABLE)
7777

78+
if(NOT BUCK2)
79+
resolve_buck2()
80+
endif()
81+
announce_configured_options(BUCK2)
82+
7883
announce_configured_options(CMAKE_CXX_COMPILER_ID)
7984
announce_configured_options(CMAKE_TOOLCHAIN_FILE)
80-
announce_configured_options(BUCK2)
8185

8286
load_build_preset()
8387
include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
@@ -290,9 +294,6 @@ set(_common_include_directories
290294
#
291295

292296
if(NOT EXECUTORCH_SRCS_FILE)
293-
# Find or download buck2 binary.
294-
resolve_buck2()
295-
296297
# A file wasn't provided. Run a script to extract the source lists from the
297298
# buck2 build system and write them to a file we can include.
298299
#

backends/arm/_passes/annotate_channels_last_dim_order_pass.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@
3535
def _transpose_impl(*args, **kwargs):
3636
# Validate length of dim_order array
3737
dim = args[1]
38-
assert len(dim) in (4, 5)
38+
if len(dim) != 4 and len(dim) != 5:
39+
raise ValueError(
40+
f"Dim order length must be either 4 or 5, got {len(dim)}: {dim}"
41+
)
3942
# Pass-through in edge-IR
4043
return args[0]
4144

backends/arm/_passes/convert_split_to_slice.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@ def call(self, graph_module: torch.fx.GraphModule):
4141
dim = split_node.args[2] if len(split_node.args) > 2 else 0
4242
dim = (dim + rank) % rank
4343

44-
assert (
45-
sum(split_lengths) == shape[dim]
46-
), "Given split lengths don't sum up to the size of the dimension."
44+
# Validate that split lengths cover the entire dimension
45+
length_sum = sum(split_lengths)
46+
dim_size = shape[dim]
47+
if length_sum != dim_size:
48+
raise ValueError(
49+
f"Split sizes {split_lengths} sum to {length_sum}, "
50+
f"but dimension {dim} has size {dim_size}"
51+
)
4752

4853
# Convert split argument 'split_lengths' to slice arguments start and end.
4954
starts = [0] * len(split_lengths)

0 commit comments

Comments
 (0)