Skip to content

Commit 2b3dec5

Browse files
committed
Update on "[ET-VK] 5/n Split dispatches between multiple command buffers. Track previously submitted command buffers in context and add function to execute all previous command buffers."
The diff adds changes to store command buffers submitted with final_use set to false. Storing these buffers is necessary for `execute()` function. Since, `encode_execute()` function is typically called once but `execute()` can be called multiple times, `submit_all_non_final_cmds` function is added so all recorded command buffers with `final_use = False` can be called multiple times in `execute()`. #### Key Changes * Added a flag `execute_pending_first_submission` to the `ComputeGraph` class to track whether execute nodes have been freshly encoded and need to be submitted first. * Added a new function `submit_all_non_final_cmds` to the `Context` class, which submits all non-final command buffers to the GPU. * Modified the `submit_cmd_to_gpu` function to add the submitted command buffer to the `non_final_cmds_` list if it's not marked as final use. * Updated the `execute` function in `ComputeGraph` to submit all non-final command buffers before executing the graph. Differential Revision: [D78360038](https://our.internmc.facebook.com/intern/diff/D78360038/) [ghstack-poisoned]
2 parents 0c32833 + cc0bf11 commit 2b3dec5

File tree

78 files changed

+2099
-653
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+2099
-653
lines changed

.ci/scripts/test_phi_3_mini.sh

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,31 +22,14 @@ NPROC=8
2222
if hash nproc &> /dev/null; then NPROC=$(nproc); fi
2323

2424
cmake_install_executorch_libraries() {
25-
cmake -DPYTHON_EXECUTABLE=python \
26-
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
27-
-DEXECUTORCH_ENABLE_LOGGING=1 \
28-
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
29-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
30-
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
31-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
32-
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
33-
-DEXECUTORCH_BUILD_XNNPACK=ON \
34-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
35-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
36-
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
37-
-B${BUILD_DIR} .
38-
39-
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
25+
rm -rf cmake-out
26+
cmake --preset llm -DCMAKE_INSTALL_PREFIX=cmake-out -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
27+
cmake --build cmake-out -j16 --target install --config ${BUILD_TYPE}
4028
}
4129

4230
cmake_build_phi_3_mini() {
43-
cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
44-
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
31+
cmake -DCMAKE_PREFIX_PATH=${BUILD_DIR} \
4532
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
46-
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
47-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
48-
-DEXECUTORCH_BUILD_XNNPACK=ON \
49-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
5033
-B${BUILD_DIR}/${MODEL_DIR} \
5134
${MODEL_DIR}
5235

@@ -81,7 +64,7 @@ run_and_verify() {
8164
${BUILD_DIR}/${MODEL_DIR}/phi_3_mini_runner \
8265
--model_path=phi-3-mini.pte \
8366
--tokenizer_path=tokenizer.bin \
84-
--seq_len=128 \
67+
--seq_len=60 \
8568
--temperature=0 \
8669
--prompt="<|system|>
8770
You are a helpful assistant.<|end|>

.github/workflows/build-wheels-linux.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ on:
99
- examples/**/*
1010
- pyproject.toml
1111
- setup.py
12+
tags:
13+
- ciflow/binaries/*
1214
push:
1315
branches:
1416
- nightly

.github/workflows/build-wheels-macos.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ on:
99
- examples/**/*
1010
- pyproject.toml
1111
- setup.py
12+
tags:
13+
- ciflow/binaries/*
1214
push:
1315
branches:
1416
- nightly

.github/workflows/pull.yml

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ jobs:
603603
bash examples/models/phi-3-mini/install_requirements.sh
604604
605605
# run e2e (export, tokenizer and runner)
606-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh
606+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh Release
607607
608608
test-eval_llama-wikitext-linux:
609609
name: test-eval_llama-wikitext-linux
@@ -762,3 +762,66 @@ jobs:
762762
763763
# Test selective build
764764
PYTHON_EXECUTABLE=python bash examples/wasm/test_build_wasm.sh
765+
766+
unittest-nxp-neutron:
767+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
768+
permissions:
769+
id-token: write
770+
contents: read
771+
with:
772+
runner: linux.2xlarge
773+
docker-image: executorch-ubuntu-22.04-clang12
774+
submodules: 'recursive'
775+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
776+
timeout: 90
777+
script: |
778+
set -eux
779+
780+
# The generic Linux job chooses to use base env, not the one setup by the image
781+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
782+
conda activate "${CONDA_ENV}"
783+
784+
# Build and install Executorch
785+
PYTHON_EXECUTABLE=python \
786+
CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
787+
.ci/scripts/setup-linux.sh --build-tool "cmake"
788+
789+
# Install test requirements
790+
pip install -r backends/nxp/requirements-tests.txt
791+
792+
# Run pytest
793+
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
794+
795+
# Run aot example:
796+
PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh
797+
798+
799+
nxp-build-test:
800+
name: nxp-build-test
801+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
802+
permissions:
803+
id-token: write
804+
contents: read
805+
with:
806+
runner: linux.2xlarge
807+
docker-image: executorch-ubuntu-22.04-arm-sdk
808+
submodules: 'recursive'
809+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
810+
timeout: 90
811+
script: |
812+
# The generic Linux job chooses to use base env, not the one setup by the image
813+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
814+
conda activate "${CONDA_ENV}"
815+
816+
# Build
817+
cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
818+
cmake --build cmake-out --target executorch_delegate_neutron --config Release
819+
820+
# Build check for the neutron backend library
821+
lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
822+
if [ -f $lib_neutron ]; then
823+
echo "Neutron backend library built."
824+
else
825+
echo "Neutron backend library not found!"
826+
exit 1
827+
fi

.github/workflows/trunk.yml

Lines changed: 0 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -302,36 +302,6 @@ jobs:
302302
exit 1
303303
fi
304304
305-
nxp-build-test:
306-
name: nxp-build-test
307-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
308-
permissions:
309-
id-token: write
310-
contents: read
311-
with:
312-
runner: linux.2xlarge
313-
docker-image: executorch-ubuntu-22.04-arm-sdk
314-
submodules: 'recursive'
315-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
316-
timeout: 90
317-
script: |
318-
# The generic Linux job chooses to use base env, not the one setup by the image
319-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
320-
conda activate "${CONDA_ENV}"
321-
322-
# Build
323-
cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
324-
cmake --build cmake-out --target executorch_delegate_neutron --config Release
325-
326-
# Build check for the neutron backend library
327-
lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
328-
if [ -f $lib_neutron ]; then
329-
echo "Neutron backend library built."
330-
else
331-
echo "Neutron backend library not found!"
332-
exit 1
333-
fi
334-
335305
test-coreml-delegate:
336306
name: test-coreml-delegate
337307
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -771,32 +741,3 @@ jobs:
771741
build-mode: Release
772742
build-tool: cmake
773743
docker-image: executorch-ubuntu-22.04-clang12
774-
775-
unittest-nxp-neutron:
776-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
777-
permissions:
778-
id-token: write
779-
contents: read
780-
with:
781-
runner: linux.2xlarge
782-
docker-image: executorch-ubuntu-22.04-clang12
783-
submodules: 'recursive'
784-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
785-
timeout: 90
786-
script: |
787-
set -eux
788-
789-
# The generic Linux job chooses to use base env, not the one setup by the image
790-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
791-
conda activate "${CONDA_ENV}"
792-
793-
# Build and install Executorch
794-
PYTHON_EXECUTABLE=python \
795-
CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
796-
.ci/scripts/setup-linux.sh --build-tool "cmake"
797-
798-
# Install test requirements
799-
pip install -r backends/nxp/requirements-tests.txt
800-
801-
# Run pytest
802-
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh

CMakeLists.txt

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,12 @@ set(CMAKE_SKIP_BUILD_RPATH OFF)
112112
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
113113
# Automatically add all linked folders that are NOT in the build directory to
114114
# the rpath (per library?)
115-
# TODO: Doesn't work for us right now because we are
116-
# not installing .so's into the correct locations. For example we have
117-
# libcustom_ops_aot_lib.so depending on _portable_lib.so, which was eventually
118-
# put under <site-packages>/executorch/extension/pybindings/ but this rpath is
119-
# not automatically added because at build time it seems `portable_lib` is being
115+
#
116+
# TODO: Doesn't work for us right now because we are not installing .so's into
117+
# the correct locations. For example we have libcustom_ops_aot_lib.so depending
118+
# on _portable_lib.so, which was eventually put under
119+
# <site-packages>/executorch/extension/pybindings/ but this rpath is not
120+
# automatically added because at build time it seems `portable_lib` is being
120121
# built under the same directory, so no extra rpath is being added. To properly
121122
# fix this we need to install `portable_lib` into the correct path.
122123
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
@@ -321,8 +322,9 @@ if(EXECUTORCH_USE_CPP_CODE_COVERAGE)
321322
" -fprofile-instr-generate -fcoverage-mapping"
322323
)
323324
else()
324-
message(FATAL_ERROR
325-
"Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported"
325+
message(
326+
FATAL_ERROR
327+
"Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported"
326328
)
327329
endif()
328330
endif()
@@ -633,8 +635,8 @@ if(EXECUTORCH_BUILD_PYBIND)
633635
endif()
634636

635637
if(EXECUTORCH_BUILD_XNNPACK)
636-
# need to explicitly specify XNNPACK and xnnpack-microkernels-prod here otherwise
637-
# uses XNNPACK and microkernel-prod symbols from libtorch_cpu
638+
# need to explicitly specify XNNPACK and xnnpack-microkernels-prod here
639+
# otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
638640
list(APPEND _dep_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
639641
endif()
640642

backends/apple/coreml/CMakeLists.txt

Lines changed: 31 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,9 @@ if(APPLE)
104104
endif()
105105

106106
add_library(coreml_util ${UTIL_SOURCES})
107-
target_include_directories(coreml_util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/runtime/util)
107+
target_include_directories(
108+
coreml_util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/runtime/util
109+
)
108110
if(APPLE)
109111
target_link_libraries(coreml_util PRIVATE ${FOUNDATION_FRAMEWORK})
110112
endif()
@@ -119,21 +121,25 @@ install(
119121

120122
# CoreML inmemoryfs
121123

122-
set(
123-
INMEMORYFS_SOURCES
124-
runtime/inmemoryfs/inmemory_filesystem.cpp
125-
runtime/inmemoryfs/memory_buffer.cpp
126-
runtime/inmemoryfs/memory_stream.cpp
127-
runtime/inmemoryfs/reversed_memory_stream.cpp
124+
set(INMEMORYFS_SOURCES
125+
runtime/inmemoryfs/inmemory_filesystem.cpp
126+
runtime/inmemoryfs/memory_buffer.cpp runtime/inmemoryfs/memory_stream.cpp
127+
runtime/inmemoryfs/reversed_memory_stream.cpp
128128
)
129129
if(APPLE)
130-
list(APPEND INMEMORYFS_SOURCES runtime/inmemoryfs/inmemory_filesystem_utils.mm)
130+
list(APPEND INMEMORYFS_SOURCES
131+
runtime/inmemoryfs/inmemory_filesystem_utils.mm
132+
)
131133
endif()
132134

133135
add_library(coreml_inmemoryfs ${INMEMORYFS_SOURCES})
134-
target_include_directories(coreml_inmemoryfs PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/runtime/inmemoryfs)
136+
target_include_directories(
137+
coreml_inmemoryfs PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/runtime/inmemoryfs
138+
)
135139
if(APPLE)
136-
target_link_libraries(coreml_inmemoryfs PRIVATE coreml_util ${FOUNDATION_FRAMEWORK})
140+
target_link_libraries(
141+
coreml_inmemoryfs PRIVATE coreml_util ${FOUNDATION_FRAMEWORK}
142+
)
137143
endif()
138144
target_compile_options(coreml_inmemoryfs PUBLIC -fPIC)
139145

@@ -148,17 +154,12 @@ install(
148154

149155
if(EXECUTORCH_BUILD_PYBIND)
150156
pybind11_add_module(
151-
executorchcoreml
152-
SHARED
153-
runtime/inmemoryfs/inmemory_filesystem_py.cpp
154-
runtime/inmemoryfs/inmemory_filesystem_utils.cpp
157+
executorchcoreml SHARED runtime/inmemoryfs/inmemory_filesystem_py.cpp
158+
runtime/inmemoryfs/inmemory_filesystem_utils.cpp
155159
)
156160
target_link_libraries(
157-
executorchcoreml
158-
PRIVATE
159-
coreml_util
160-
coreml_inmemoryfs
161-
nlohmann_json::nlohmann_json
161+
executorchcoreml PRIVATE coreml_util coreml_inmemoryfs
162+
nlohmann_json::nlohmann_json
162163
)
163164
target_compile_options(executorchcoreml PUBLIC -fPIC)
164165
endif()
@@ -179,8 +180,12 @@ if(APPLE)
179180
coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/delegate
180181
)
181182
target_include_directories(coremldelegate PRIVATE ${PROJECT_SOURCE_DIR}/..)
182-
target_include_directories(coremldelegate PRIVATE ${PROJECT_SOURCE_DIR}/runtime/core/portable_type/c10)
183-
target_compile_definitions(coremldelegate PRIVATE C10_USING_CUSTOM_GENERATED_MACROS)
183+
target_include_directories(
184+
coremldelegate PRIVATE ${PROJECT_SOURCE_DIR}/runtime/core/portable_type/c10
185+
)
186+
target_compile_definitions(
187+
coremldelegate PRIVATE C10_USING_CUSTOM_GENERATED_MACROS
188+
)
184189

185190
if(EXECUTORCH_BUILD_DEVTOOLS)
186191
target_sources(coremldelegate PRIVATE ${SDK_SOURCES} ${PROTOBUF_SOURCES})
@@ -200,13 +205,9 @@ if(APPLE)
200205

201206
target_link_libraries(
202207
coremldelegate
203-
PUBLIC coreml_util
204-
coreml_inmemoryfs
205-
PRIVATE executorch_core
206-
${ACCELERATE_FRAMEWORK}
207-
${COREML_FRAMEWORK}
208-
${FOUNDATION_FRAMEWORK}
209-
${SQLITE_LIBRARY}
208+
PUBLIC coreml_util coreml_inmemoryfs
209+
PRIVATE executorch_core ${ACCELERATE_FRAMEWORK} ${COREML_FRAMEWORK}
210+
${FOUNDATION_FRAMEWORK} ${SQLITE_LIBRARY}
210211
)
211212

212213
target_link_options_shared_lib(coremldelegate)
@@ -218,13 +219,8 @@ if(APPLE)
218219
endif()
219220

220221
target_compile_options(
221-
coremldelegate
222-
PRIVATE
223-
-fobjc-arc
224-
-fno-exceptions
225-
-x objective-c++
226-
-Wno-null-character
227-
-Wno-receiver-expr
222+
coremldelegate PRIVATE -fobjc-arc -fno-exceptions -x objective-c++
223+
-Wno-null-character -Wno-receiver-expr
228224
)
229225

230226
if(EXECUTORCH_BUILD_DEVTOOLS)

backends/arm/test/tester/arm_tester.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,7 @@ def _get_dtype_distribution(
726726
if node.op == "placeholder":
727727
placeholder_dtypes.append(str(node.meta["val"].dtype))
728728
if node.op == "call_function":
729-
if "val" in node.meta:
729+
if "val" in node.meta and isinstance(node.meta["val"], torch.Tensor):
730730
dtype, _, _ = extract_tensor_meta(node.meta, tosa_spec)
731731
call_function_dtypes.append(ts.DTypeNames[dtype])
732732
return Counter(placeholder_dtypes), Counter(call_function_dtypes)

backends/mediatek/CMakeLists.txt

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,9 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/include)
2424
# targets
2525
add_library(neuron_backend SHARED)
2626
target_compile_options(neuron_backend PRIVATE "-frtti" "-fexceptions")
27-
target_link_libraries(neuron_backend
28-
PRIVATE
29-
executorch_core
30-
portable_ops_lib
31-
portable_kernels
32-
android
33-
log
27+
target_link_libraries(
28+
neuron_backend PRIVATE executorch_core portable_ops_lib portable_kernels
29+
android log
3430
)
3531
target_sources(
3632
neuron_backend

0 commit comments

Comments
 (0)