Skip to content

Commit 1c48c20

Browse files
committed
Update base for Update on "[ET-VK][ez] Fix Vulkan Validation layer errors due to consecutive command buffer encoding"
## Changes * In `VulkanBackend.cpp` do not call `encode_execute()` during model load if the model compile spec specifies `requires_dynamic_shapes` as true * In test files, do not call `encode_execute()` if `propagate_resize()` is subsequently called. ## Motivation Recently, it was discovered that a command buffer re-encode was required to update push constant values. This means that for dynamic shapes to work correctly, `encode_execute()` must be called after updating tensor sizes. As a result, `propagate_resize()` now calls `encode_execute()` internally. This results in scenarios where `encode_execute()` is called once during model load, then again right before the first inference during `propagate_resize()`, without actually executing the command buffer in-between. This causes Validation layer errors like ``` UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout(ERROR / SPEC): msgNum: 1303270965 - Validation Error: [ UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout ] Object 0: handle = 0x24086224ec0, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x88d2b500000000e2, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x4dae5635 | vkQueueSubmit(): pSubmits[0].pCommandBuffers[0] command buffer VkCommandBuffer 0x24086224ec0[] expects VkImage 0x88d2b500000000e2[] (subresource: aspectMask VK_IMAGE_ASPECT_COLOR_BIT array layer 0, mip level 0) to be in layout VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL--instead, current layout is VK_IMAGE_LAYOUT_UNDEFINED. Objects: 2 [0] 0x24086224ec0, type: 6, name: NULL [1] 0x88d2b500000000e2, type: 10, name: NULL UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout(ERROR / SPEC): msgNum: 1303270965 - Validation Error: [ UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout ] Object 0: handle = 0x24086224ec0, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x6caffc00000000e3, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x4dae5635 | vkQueueSubmit(): pSubmits[0].pCommandBuffers[0] command buffer VkCommandBuffer 0x24086224ec0[] expects VkImage 0x6caffc00000000e3[] (subresource: aspectMask VK_IMAGE_ASPECT_COLOR_BIT array layer 0, mip level 0) to be in layout VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL--instead, current layout is VK_IMAGE_LAYOUT_UNDEFINED. Objects: 2 [0] 0x24086224ec0, type: 6, name: NULL [1] 0x6caffc00000000e3, type: 10, name: NULL ``` because the last access information of image/buffer resources are inaccurate during the second command buffer encoding, since the first command buffer never executed. ## Perf Impact * Performance improvement for first inference of dynamic shape models if actual tensor sizes are much smaller than maximum possible sizes * No impact for non-dynamic shape models Differential Revision: [D76047203](https://our.internmc.facebook.com/intern/diff/D76047203/) cc manuelcandales cbilgin [ghstack-poisoned]
1 parent bfc6dfb commit 1c48c20

File tree

31 files changed

+522
-696
lines changed

31 files changed

+522
-696
lines changed

.ci/scripts/build_llama_android.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ build_llama_runner() {
4242
popd
4343
ANDROID_ABI=arm64-v8a
4444
cmake -DBUCK2="${BUCK2}" \
45-
-DBUILD_TESTING=OFF \
4645
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
4746
-DANDROID_ABI="${ANDROID_ABI}" \
4847
-DCMAKE_INSTALL_PREFIX=cmake-android-out \

.ci/scripts/test_llama.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,6 @@ cmake_build_llama_runner() {
169169
popd
170170
dir="examples/models/llama"
171171
retry cmake \
172-
-DBUILD_TESTING=OFF \
173172
-DCMAKE_INSTALL_PREFIX=cmake-out \
174173
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
175174
-Bcmake-out/${dir} \

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ cmake --build cmake-out -j16 --target install --config Release
4040

4141
# Install llama runner with torchao
4242
cmake -DPYTHON_EXECUTABLE=python \
43-
-DBUILD_TESTING=OFF \
4443
-DCMAKE_BUILD_TYPE=Release \
4544
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
4645
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \

.ci/scripts/test_llava.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,9 @@ cmake_install_executorch_libraries_for_android() {
6464

6565

6666
LLAVA_COMMON_CMAKE_ARGS=" \
67-
-DBUILD_TESTING=OFF \
6867
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6968
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
70-
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
69+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
7170
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
7271
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7372
-DEXECUTORCH_BUILD_XNNPACK=ON"

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3069,8 +3069,10 @@ void test_to_copy() {
30693069

30703070
EXPECT_EQ(data_in.size(), output_data.size());
30713071

3072+
#ifdef VULKAN_DEBUG
30723073
float mse_ex = 0.0f;
30733074
float mse_vk = 0.0f;
3075+
#endif
30743076

30753077
// check results
30763078
for (size_t i = 0; i < output_data.size(); ++i) {
@@ -3092,6 +3094,9 @@ void test_to_copy() {
30923094
std::bitset<16>(*output_bits).to_string() + ")";
30933095

30943096
std::cout << msg << std::endl;
3097+
3098+
mse_ex += std::pow(expected_output - input, 2);
3099+
mse_vk += std::pow(output - input, 2);
30953100
#endif
30963101

30973102
// Note: Torch executor half "rounds up" when converting to fp16 whereas
@@ -3113,13 +3118,12 @@ void test_to_copy() {
31133118
EXPECT_TRUE(
31143119
(*output_bits == *expected_bits) ||
31153120
/*rounding error*/ ((*output_bits + 1u) == *expected_bits));
3116-
mse_ex += std::pow(expected_output - input, 2);
3117-
mse_vk += std::pow(output - input, 2);
31183121
}
31193122

3123+
#ifdef VULKAN_DEBUG
31203124
mse_ex /= output_data.size();
31213125
mse_vk /= output_data.size();
3122-
#ifdef VULKAN_DEBUG
3126+
31233127
std::cout << "========================================================="
31243128
<< std::endl;
31253129
std::cout << "mse_ex = " << mse_ex << ", mse_vk = " << mse_vk << std::endl;

examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
using executorch::extension::llm::GenerationConfig;
1616
using executorch::extension::llm::Image;
17-
using executorch::extension::llm::TextLLMRunner;
1817
using executorch::runtime::Error;
1918

2019
NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
@@ -24,15 +23,15 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
2423
@end
2524

2625
@implementation LLaMARunner {
27-
std::unique_ptr<TextLLMRunner> _runner;
26+
std::unique_ptr<example::Runner> _runner;
2827
}
2928

3029
- (instancetype)initWithModelPath:(NSString*)modelPath
3130
tokenizerPath:(NSString*)tokenizerPath {
3231
self = [super init];
3332
if (self) {
3433
[ExecuTorchLog.sharedLog addSink:self];
35-
_runner = example::create_llama_runner(
34+
_runner = example::Runner::create(
3635
modelPath.UTF8String, tokenizerPath.UTF8String);
3736
}
3837
return self;

examples/models/llama/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ endif()
220220
target_include_directories(
221221
llama_main
222222
PUBLIC ${_common_include_directories}
223+
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
223224
)
224225
target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
225226
target_compile_options(llama_main PUBLIC ${_common_compile_options})

examples/models/llama/main.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,8 @@ int32_t main(int32_t argc, char** argv) {
8181
}
8282
#endif
8383
// create llama runner
84-
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
85-
example::create_llama_runner(model_path, tokenizer_path, data_path);
86-
87-
if (runner == nullptr) {
88-
ET_LOG(Error, "Failed to create llama runner");
89-
return 1;
90-
}
84+
std::unique_ptr<example::Runner> runner =
85+
example::Runner::create(model_path, tokenizer_path, data_path);
9186

9287
if (warmup) {
9388
runner->warmup(prompt, /*max_new_tokens=*/seq_len);

examples/models/llama/runner/CMakeLists.txt

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,20 +52,23 @@ else()
5252
add_library(llama_runner SHARED ${_llama_runner__srcs})
5353
endif()
5454

55-
# For extension_llm_runner
56-
if(NOT TARGET extension_llm_runner)
57-
add_subdirectory(
58-
${EXECUTORCH_ROOT}/extension/llm/runner
59-
${CMAKE_CURRENT_BINARY_DIR}/../../../../extension/llm/runner
60-
)
61-
endif()
62-
6355
set(llama_runner_deps executorch_core extension_data_loader extension_module
64-
extension_tensor extension_flat_tensor extension_llm_runner
56+
extension_tensor extension_flat_tensor
6557
)
6658

6759
target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
6860

61+
target_include_directories(
62+
llama_runner
63+
INTERFACE ${_common_include_directories}
64+
)
65+
66+
# Include tokenizers dependency
67+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
68+
add_subdirectory(
69+
${EXECUTORCH_ROOT}/extension/llm/tokenizers
70+
${CMAKE_CURRENT_BINARY_DIR}/tokenizers
71+
)
6972
target_link_libraries(
7073
llama_runner PUBLIC tokenizers
7174
)

0 commit comments

Comments
 (0)