Skip to content

Commit c99c363

Browse files
committed
Update on "[ET-VK][ez] Enable dynamic shape support when using push constants"
## Changes * Call `encode_execute()` upon resize in `VulkanBackend.cpp` * Minor update to `DispatchNode` to store push constant data array as a persistent member of the class ## Motivation Passing in tensor metadata (i.e. sizes, strides) via push constants is typically more performant than passing them via a UBO (uniform buffer object). However, currently dynamic shapes do not work when push constants are used as I realized that the tensor metadata contained in the push constants do not get updated. It appears that that `vkCmdPushConstants` sets the push constants when encoding the command buffer, however the push constants will not be updated if the command buffer is submitted for execution multiple times. Therefore, to update push constant values **the command buffer needs to be re-encoded**. ## Performance Impact This may add a small performance overhead (i.e. re-encoding the command buffer) when executing models with dynamic shapes. Models that do not trigger tensor resizing will not be impacted. However, I measured the impact on a llama 3.2 1B model and the impact of re-encoding a command buffer appears to be negligible. In any case, re-encoding the command buffer is a "necessary evil" when working with dynamic shapes, otherwise the tensor metadata seen by shaders may never get updated. Furthermore, re-encoding the command buffer can allow an opportunity to adjust global work group sizing to match current tensor sizes, which may have a huge performance impact when maximum tensor sizes far exceeds what tensor sizes will realistically be during inference (one instance of this is for transformer models when the max sequence length is very long). Differential Revision: [D75686051](https://our.internmc.facebook.com/intern/diff/D75686051/) [ghstack-poisoned]
2 parents e4b080a + f2f869f commit c99c363

File tree

44 files changed

+1123
-242
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1123
-242
lines changed

.Package.swift/kernels_portable/dummy.swift

Whitespace-only changes.

.Package.swift/kernels_portable_debug/dummy.swift

Whitespace-only changes.

.ci/scripts/test_ios_ci.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@ say "Installing CoreML Backend Requirements"
4242

4343
./backends/apple/coreml/scripts/install_requirements.sh
4444

45-
say "Installing MPS Backend Requirements"
46-
47-
./backends/apple/mps/install_requirements.sh
48-
4945
say "Exporting Models"
5046

5147
python3 -m examples.portable.scripts.export --model_name="$MODEL_NAME" --segment_alignment=0x4000

.github/workflows/apple-perf.yml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,6 @@ jobs:
188188
backends/apple/coreml/scripts/install_requirements.sh
189189
fi
190190
191-
if [[ ${{ matrix.config }} == *"mps"* ]]; then
192-
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
193-
backends/apple/mps/install_requirements.sh
194-
fi
195-
196191
# Install requirements for export_llama
197192
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
198193
@@ -379,10 +374,6 @@ jobs:
379374
# Install CoreML Backend Requirements
380375
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
381376
backends/apple/coreml/scripts/install_requirements.sh
382-
383-
# Install MPS Backend Requirements
384-
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
385-
backends/apple/mps/install_requirements.sh
386377
echo "::endgroup::"
387378
388379
echo "::group::Build ExecuTorch iOS frameworks"

.github/workflows/apple.yml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ jobs:
154154
"backend_xnnpack"
155155
"kernels_custom"
156156
"kernels_optimized"
157-
"kernels_portable"
158157
"kernels_quantized"
159158
"threadpool"
160159
)
@@ -169,10 +168,6 @@ jobs:
169168
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
170169
backends/apple/coreml/scripts/install_requirements.sh
171170
172-
# Install MPS Backend Requirements
173-
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
174-
backends/apple/mps/install_requirements.sh
175-
176171
# Build iOS Frameworks
177172
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output scripts/build_apple_frameworks.sh
178173
@@ -307,10 +302,6 @@ jobs:
307302
# Install CoreML Backend Requirements
308303
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
309304
backends/apple/coreml/scripts/install_requirements.sh
310-
311-
# Install MPS Backend Requirements
312-
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
313-
backends/apple/mps/install_requirements.sh
314305
echo "::endgroup::"
315306
316307
echo "::group::Build ExecuTorch iOS frameworks"

.github/workflows/trunk.yml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,7 @@ jobs:
414414
# Setup executorch
415415
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake
416416
417-
if [[ "${MODE}" == "mps" ]]; then
418-
# Install mps delegate
419-
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
420-
echo "Finishing installing mps."
421-
elif [[ "${MODE}" == "coreml" ]]; then
417+
if [[ "${MODE}" == "coreml" ]]; then
422418
# Install coreml delegate
423419
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
424420
echo "Finishing installing coreml."
@@ -504,8 +500,6 @@ jobs:
504500
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
505501
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
506502
echo "Finishing installing coreml."
507-
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
508-
echo "Finishing installing mps."
509503
510504
# Build and test coreml model
511505
MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l)

CMakeLists.txt

Lines changed: 80 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,16 @@
3939
# ~~~
4040
# cmake-format -i CMakeLists.txt
4141
# ~~~
42-
# It should also be cmake-lint clean.
42+
# It should also be checked with a linter via
43+
# ~~~
44+
# cmake-lint CMakeLists.txt
45+
# ~~~
4346
#
4447

4548
cmake_minimum_required(VERSION 3.24)
4649
project(executorch)
4750

48-
# MARK: - Start EXECUTORCH_H12025_BUILD_MIGRATION --------------------------------------------------
51+
# MARK: - Start EXECUTORCH_H12025_BUILD_MIGRATION
4952

5053
include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
5154
include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
@@ -82,24 +85,25 @@ include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
8285
# Print all the configs that were called with announce_configured_options.
8386
print_configured_options()
8487

85-
# MARK: - End EXECUTORCH_H12025_BUILD_MIGRATION ----------------------------------------------------
88+
# MARK: - End EXECUTORCH_H12025_BUILD_MIGRATION
8689

8790
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
8891

89-
# Setup RPATH.
90-
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
92+
# Setup RPATH. See
93+
# https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
9194
# Use separate rpaths during build and install phases
9295
set(CMAKE_SKIP_BUILD_RPATH OFF)
9396
# Don't use the install-rpath during the build phase
9497
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
9598
# Automatically add all linked folders that are NOT in the build directory to
9699
# the rpath (per library?)
97-
# TODO: Doesn't work for us right now because we are not installing .so's into the
98-
# correct locations. For example we have libcustom_ops_aot_lib.so depending on
99-
# _portable_lib.so, which was eventually put under <site-packages>/executorch/extension/pybindings/
100-
# but this rpath is not automatically added because at build time it seems `portable_lib`
101-
# is being built under the same directory, so no extra rpath is being added. To
102-
# properly fix this we need to install `portable_lib` into the correct path.
100+
# TODO: Doesn't work for us right now because we are
101+
# not installing .so's into the correct locations. For example we have
102+
# libcustom_ops_aot_lib.so depending on _portable_lib.so, which was eventually
103+
# put under <site-packages>/executorch/extension/pybindings/ but this rpath is
104+
# not automatically added because at build time it seems `portable_lib` is being
105+
# built under the same directory, so no extra rpath is being added. To properly
106+
# fix this we need to install `portable_lib` into the correct path.
103107
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
104108
# ------------------------------ OPTIONS -------------------------------------
105109
# WARNING: Please don't add example specific options in this CMakeLists.txt.
@@ -143,6 +147,11 @@ else()
143147
set(CMAKE_CXX_FLAGS_RELEASE "-O2 ${CMAKE_CXX_FLAGS_RELEASE}")
144148
endif()
145149

150+
if(EXECUTORCH_BUILD_TESTS)
151+
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
152+
include(CTest)
153+
endif()
154+
146155
add_subdirectory(third-party)
147156

148157
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
@@ -172,7 +181,7 @@ endif()
172181

173182
if(NOT DEFINED FXDIV_SOURCE_DIR)
174183
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
175-
${CMAKE_POSITION_INDEPENDENT_CODE}
184+
${CMAKE_POSITION_INDEPENDENT_CODE}
176185
)
177186
set(FXDIV_SOURCE_DIR "backends/xnnpack/third-party/FXdiv")
178187
add_subdirectory("${FXDIV_SOURCE_DIR}")
@@ -254,11 +263,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
254263
)
255264
endif()
256265

257-
if(EXECUTORCH_BUILD_TESTS)
258-
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
259-
include(CTest)
260-
endif()
261-
262266
# TODO(dbort): Fix these warnings and remove this flag.
263267
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
264268

@@ -276,7 +280,10 @@ if(NOT "${_repo_dir_name}" STREQUAL "executorch")
276280
"fix for this restriction."
277281
)
278282
endif()
279-
set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type/c10)
283+
set(_common_include_directories
284+
${CMAKE_CURRENT_SOURCE_DIR}/..
285+
${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type/c10
286+
)
280287

281288
#
282289
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
@@ -310,9 +317,9 @@ endif()
310317
# Detect if an Android toolchain is set.
311318
if(CMAKE_TOOLCHAIN_FILE MATCHES ".*android\.toolchain\.cmake$")
312319
set(CMAKE_TOOLCHAIN_ANDROID ON)
313-
if(NOT ANDROID_PLATFORM)
314-
set(ANDROID_PLATFORM android-30)
315-
endif()
320+
if(NOT ANDROID_PLATFORM)
321+
set(ANDROID_PLATFORM android-30)
322+
endif()
316323
else()
317324
set(CMAKE_TOOLCHAIN_ANDROID OFF)
318325
endif()
@@ -334,7 +341,6 @@ if(EXECUTORCH_USE_CPP_CODE_COVERAGE)
334341
endif()
335342
endif()
336343

337-
338344
#
339345
# program_schema: Generated .h files from schema/*.fbs inputs
340346
#
@@ -376,7 +382,9 @@ endif()
376382
target_include_directories(
377383
executorch_core PUBLIC ${_common_include_directories}
378384
)
379-
target_compile_definitions(executorch_core PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
385+
target_compile_definitions(
386+
executorch_core PUBLIC C10_USING_CUSTOM_GENERATED_MACROS
387+
)
380388
target_compile_options(executorch_core PUBLIC ${_common_compile_options})
381389
if(MAX_KERNEL_NUM)
382390
target_compile_definitions(
@@ -386,9 +394,7 @@ endif()
386394

387395
if(EXECUTORCH_BUILD_PYBIND AND APPLE)
388396
# shared version
389-
add_library(
390-
executorch_core_shared SHARED ${_executorch_core__srcs}
391-
)
397+
add_library(executorch_core_shared SHARED ${_executorch_core__srcs})
392398
target_link_libraries(executorch_core_shared PRIVATE program_schema)
393399
if(DL_LIBRARY_EXISTS)
394400
# For dladdr()
@@ -397,7 +403,9 @@ if(EXECUTORCH_BUILD_PYBIND AND APPLE)
397403
target_include_directories(
398404
executorch_core_shared PUBLIC ${_common_include_directories}
399405
)
400-
target_compile_definitions(executorch_core_shared PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
406+
target_compile_definitions(
407+
executorch_core_shared PUBLIC C10_USING_CUSTOM_GENERATED_MACROS
408+
)
401409
target_compile_options(
402410
executorch_core_shared PUBLIC ${_common_compile_options}
403411
)
@@ -430,9 +438,8 @@ target_link_options_shared_lib(executorch)
430438
# operators necessary for the models that will run.
431439
#
432440
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
433-
# find pytorch lib here to make it available to all
434-
# sub-directories. Find it before including portable so that
435-
# optimized_portable_kernels can use it.
441+
# find pytorch lib here to make it available to all sub-directories. Find it
442+
# before including portable so that optimized_portable_kernels can use it.
436443
find_package_torch_headers()
437444
endif()
438445

@@ -458,24 +465,50 @@ endif()
458465

459466
# Install `executorch` library as well as `executorch-config.cmake` under
460467
# ${CMAKE_INSTALL_PREFIX}/
461-
install(DIRECTORY runtime/core/ DESTINATION include/executorch/runtime/core FILES_MATCHING PATTERN "*.h")
462-
install(DIRECTORY runtime/kernel/ DESTINATION include/executorch/runtime/kernel FILES_MATCHING PATTERN "*.h")
463-
install(DIRECTORY runtime/platform/ DESTINATION include/executorch/runtime/platform FILES_MATCHING PATTERN "*.h")
464-
install(DIRECTORY extension/kernel_util/ DESTINATION include/executorch/extension/kernel_util FILES_MATCHING PATTERN "*.h")
465-
install(DIRECTORY extension/tensor/ DESTINATION include/executorch/extension/tensor FILES_MATCHING PATTERN "*.h")
466-
install(DIRECTORY extension/threadpool/ DESTINATION include/executorch/extension/threadpool FILES_MATCHING PATTERN "*.h")
468+
install(
469+
DIRECTORY runtime/core/
470+
DESTINATION include/executorch/runtime/core
471+
FILES_MATCHING
472+
PATTERN "*.h"
473+
)
474+
install(
475+
DIRECTORY runtime/kernel/
476+
DESTINATION include/executorch/runtime/kernel
477+
FILES_MATCHING
478+
PATTERN "*.h"
479+
)
480+
install(
481+
DIRECTORY runtime/platform/
482+
DESTINATION include/executorch/runtime/platform
483+
FILES_MATCHING
484+
PATTERN "*.h"
485+
)
486+
install(
487+
DIRECTORY extension/kernel_util/
488+
DESTINATION include/executorch/extension/kernel_util
489+
FILES_MATCHING
490+
PATTERN "*.h"
491+
)
492+
install(
493+
DIRECTORY extension/tensor/
494+
DESTINATION include/executorch/extension/tensor
495+
FILES_MATCHING
496+
PATTERN "*.h"
497+
)
498+
install(
499+
DIRECTORY extension/threadpool/
500+
DESTINATION include/executorch/extension/threadpool
501+
FILES_MATCHING
502+
PATTERN "*.h"
503+
)
467504
install(
468505
TARGETS executorch executorch_core
469-
DESTINATION lib
470506
INCLUDES
471507
DESTINATION ${_common_include_directories}
472508
)
473-
install(FILES tools/cmake/executorch-config.cmake DESTINATION lib/cmake/ExecuTorch)
474-
475-
# Add googletest if any test targets should be built
476-
if(BUILD_TESTING)
477-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/googletest)
478-
endif()
509+
install(FILES tools/cmake/executorch-config.cmake
510+
DESTINATION lib/cmake/ExecuTorch
511+
)
479512

480513
if(EXECUTORCH_BUILD_ARM_BAREMETAL)
481514
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
@@ -613,17 +646,14 @@ if(EXECUTORCH_BUILD_PYBIND)
613646
endif()
614647

615648
if(EXECUTORCH_BUILD_XNNPACK)
616-
# need to explicitly specify XNNPACK and microkernels-prod
617-
# here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
649+
# need to explicitly specify XNNPACK and microkernels-prod here otherwise
650+
# uses XNNPACK and microkernel-prod symbols from libtorch_cpu
618651
list(APPEND _dep_libs xnnpack_backend XNNPACK microkernels-prod)
619652
endif()
620653

621654
# compile options for pybind
622-
set(_pybind_compile_options
623-
-Wno-deprecated-declarations
624-
-fPIC
625-
-frtti
626-
-fexceptions
655+
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
656+
-fexceptions
627657
)
628658

629659
# util lib

0 commit comments

Comments
 (0)