diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index 594c00854a2..ecdd4f6d2d4 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -507,7 +507,7 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { compute_graph->prepare(); compute_graph->prepare_pipelines(); - compute_graph->run_prepack(); + compute_graph->prepack(); // If dynamic shapes are not expected, then the command buffer only needs to // be encoded once. Otherwise, wait until the first inference to encode the diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index cafe2f5e502..e576dfae394 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -769,23 +769,7 @@ void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) { context_->flush(); } -void ComputeGraph::encode_prepack() { - for (std::unique_ptr& node : prepack_nodes_) { - node->encode(this); - } -} - -void ComputeGraph::prepack() const { - // Submit and execute the command buffer - vkapi::VulkanFence fence = context_->fences().get_fence(); - context_->submit_cmd_to_gpu(fence.get_submit_handle(), /*final_use = */ true); - fence.wait(); - context_->fences().return_fence(fence); - - context_->flush(); -} - -void ComputeGraph::run_prepack() { +void ComputeGraph::prepack() { int i = 0; bool submitted = false; const bool reduce_peak_memory = total_constant_nbytes_ > 500 * MB; diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index a8405bb312d..87f0d9c7584 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -846,14 +846,11 @@ class ComputeGraph final { staging_nbytes_in_cmd_ += staging_bytes; } - void encode_prepack(); - void prepack() const; - /* * Executes prepacking operations to transfer model weight data from the CPU * to GPU. */ - void run_prepack(); + void prepack(); // // Graph Execution diff --git a/backends/vulkan/test/op_tests/choose_qparams_test.cpp b/backends/vulkan/test/op_tests/choose_qparams_test.cpp index 75b7cbc8960..f45d4f82448 100644 --- a/backends/vulkan/test/op_tests/choose_qparams_test.cpp +++ b/backends/vulkan/test/op_tests/choose_qparams_test.cpp @@ -456,7 +456,7 @@ void test_vulkan_choose_qparams_tensor_impl( ValueRef staging_zero_point = graph.set_output_tensor(r_zero_point); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -676,7 +676,7 @@ void test_vulkan_choose_qparams_per_token_asymmetric_impl( ValueRef staging_zero_point = graph.set_output_tensor(r_zero_point); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); diff --git a/backends/vulkan/test/op_tests/dequantize_test.cpp b/backends/vulkan/test/op_tests/dequantize_test.cpp index b4c4ac274dc..91d49406fbb 100644 --- a/backends/vulkan/test/op_tests/dequantize_test.cpp +++ b/backends/vulkan/test/op_tests/dequantize_test.cpp @@ -1138,7 +1138,7 @@ void test_vulkan_dequantize_per_token_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -1670,7 +1670,6 @@ void test_vulkan_dequantize_per_channel_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); graph.prepack(); graph.encode_execute(); @@ -2345,7 +2344,6 @@ void test_vulkan_dequantize_per_tensor_tensor_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); graph.prepack(); graph.encode_execute(); diff --git a/backends/vulkan/test/op_tests/quantize_test.cpp b/backends/vulkan/test/op_tests/quantize_test.cpp index 19e098eeee7..43c97071874 100644 --- a/backends/vulkan/test/op_tests/quantize_test.cpp +++ b/backends/vulkan/test/op_tests/quantize_test.cpp @@ -929,7 +929,7 @@ void test_vulkan_quantize_per_token_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -1412,7 +1412,6 @@ void test_vulkan_quantize_per_channel_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); graph.prepack(); graph.encode_execute(); @@ -2042,7 +2041,6 @@ void test_vulkan_quantize_per_tensor_tensor_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); graph.prepack(); graph.encode_execute(); diff --git a/backends/vulkan/test/op_tests/quantized_linear_test.cpp b/backends/vulkan/test/op_tests/quantized_linear_test.cpp index 0cd27ea07f3..26316344b0e 100644 --- a/backends/vulkan/test/op_tests/quantized_linear_test.cpp +++ b/backends/vulkan/test/op_tests/quantized_linear_test.cpp @@ -454,7 +454,7 @@ void test_vulkan_linear_qga4w_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -549,7 +549,7 @@ void test_vulkan_linear_qcs4w_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -683,7 +683,7 @@ void test_vulkan_linear_qta8a_qga4w_impl( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); diff --git a/backends/vulkan/test/op_tests/rotary_embedding_test.cpp b/backends/vulkan/test/op_tests/rotary_embedding_test.cpp index eebbb89ab40..2955a54e5f3 100644 --- a/backends/vulkan/test/op_tests/rotary_embedding_test.cpp +++ b/backends/vulkan/test/op_tests/rotary_embedding_test.cpp @@ -112,7 +112,7 @@ void test_reference( ValueRef staging_xk_out = graph.set_output_tensor(r_xk_out); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); diff --git a/backends/vulkan/test/op_tests/sdpa_test.cpp b/backends/vulkan/test/op_tests/sdpa_test.cpp index 79b679674a5..9a3da49ddad 100644 --- a/backends/vulkan/test/op_tests/sdpa_test.cpp +++ b/backends/vulkan/test/op_tests/sdpa_test.cpp @@ -350,7 +350,7 @@ void test_vulkan_sdpa( ValueRef staging_out = graph.set_output_tensor(r_out); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); diff --git a/backends/vulkan/test/op_tests/utils/gen_computegraph.py b/backends/vulkan/test/op_tests/utils/gen_computegraph.py index 38a3ee93627..08eb3b61c36 100644 --- a/backends/vulkan/test/op_tests/utils/gen_computegraph.py +++ b/backends/vulkan/test/op_tests/utils/gen_computegraph.py @@ -681,7 +681,6 @@ def gen_graph_build_code(self, include_declarations: bool = True) -> str: graph_build += self.set_output(self.refs["out"], include_declarations) graph_build += f"{self.graph}{self.dot}prepare();\n" - graph_build += f"{self.graph}{self.dot}encode_prepack();\n" graph_build += f"{self.graph}{self.dot}prepack();\n" graph_build += f"{self.graph}{self.dot}encode_execute();\n" diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 17f197dfdeb..f3fed8b6622 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -1435,7 +1435,6 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) { graph.prepare(); - graph.encode_prepack(); graph.prepack(); graph.encode_execute(); @@ -2568,7 +2567,7 @@ void test_binary_op( out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -2641,7 +2640,7 @@ void test_mm( B, M, K, N, dtype, storage_type, memory_layout, mat2_data, prepack); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); for (int i = 1; i < 4; i++) { @@ -2722,7 +2721,7 @@ void test_mm_with_resize_reencode( B, M, K, N, dtype, storage_type, memory_layout, mat2_data, false); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -2800,7 +2799,7 @@ void test_max_pool2d( idx_ioval.staging = graph.set_output_tensor(idx_ioval.value); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -2879,7 +2878,7 @@ void test_grid_priors( out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); @@ -2983,7 +2982,7 @@ void test_transpose_view_mm( out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); for (int i = 1; i < 4; i++) { @@ -3049,7 +3048,7 @@ void test_to_copy() { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute(); graph.propagate_resize(); @@ -3236,7 +3235,7 @@ void test_dynamic_dispatch(int M, int N) { ComputeGraph graph = build_dynamic_dispatch_test_graph(M, N); graph.prepare(); - graph.encode_prepack(); + graph.prepack(); graph.encode_execute();