From c8f3078b60c18504597833bcca599a595cfec99a Mon Sep 17 00:00:00 2001 From: jorgep31415 Date: Tue, 21 Jan 2025 09:13:07 -0800 Subject: [PATCH 1/2] [ET-VK] Fix SDK `event_name` for inputs and outputs Pull Request resolved: https://github.com/pytorch/executorch/pull/7738 ## Issue In the ET-SDK, we assign an `event_name` to each operation. In ET-VK, we compose a unique `event_name` using the `node_id`. The `node_id` exists for every `OperatorCall` but not for input/output with `nchw_to_image`/`image_to_nchw`. Those cases collapse into `node_id == 0` which means all `nchw_to_image` had the same `event_name` and hence only one was stored. The same reasoning results in storage of only one `image_to_nchw`. ## Solution Ignore the serialized `node_id` and use the operation's `prepack_node`/`execute_node` vector index. TODO: Determine if we can remove the serialized `node_id`, or whether this should be fixed differently and still reference it. ghstack-source-id: 262309622 @exported-using-ghexport Differential Revision: [D68344534](https://our.internmc.facebook.com/intern/diff/D68344534/) --- backends/vulkan/runtime/VulkanBackend.cpp | 28 ++++++++--------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index 51cb16764a3..2621dc69d3e 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -334,9 +334,6 @@ class GraphBuilder { } // Parse the operators - uint32_t last_prepack_node_ct = 0; - uint32_t last_execute_node_ct = 0; - for (OpCallPtr op_call : *(flatbuffer_->chain())) { std::string op_name = op_call->name()->str(); ET_CHECK_MSG(VK_HAS_OP(op_name), "Missing operator: %s", op_name.c_str()); @@ -351,22 +348,6 @@ class GraphBuilder { auto vkFn = VK_GET_OP_FN(op_name); vkFn(*compute_graph_, args); - if (compute_graph_->graphconfig().enable_querypool) { - for (uint32_t idx_prepack = last_prepack_node_ct; - idx_prepack < compute_graph_->prepack_nodes().size(); - idx_prepack++) { - compute_graph_->prepack_nodes()[idx_prepack]->set_node_id( - op_call->node_id()); - } - for (uint32_t idx_execute = last_execute_node_ct; - idx_execute < compute_graph_->execute_nodes().size(); - idx_execute++) { - compute_graph_->execute_nodes()[idx_execute]->set_node_id( - op_call->node_id()); - } - last_prepack_node_ct = compute_graph_->prepack_nodes().size(); - last_execute_node_ct = compute_graph_->execute_nodes().size(); - } } // Parse the outputs, which will be mostly tensors. For some reason, @@ -379,6 +360,15 @@ class GraphBuilder { compute_graph_->set_output_tensor(ref); } } + + if (compute_graph_->graphconfig().enable_querypool) { + for (uint32_t i = 0; i < compute_graph_->prepack_nodes().size(); ++i) { + compute_graph_->prepack_nodes()[i]->set_node_id(i); + } + for (uint32_t i = 0; i < compute_graph_->execute_nodes().size(); ++i) { + compute_graph_->execute_nodes()[i]->set_node_id(i); + } + } } }; From 8bc92e8836cde0815ab1c4251d3dbd17656b09e3 Mon Sep 17 00:00:00 2001 From: jorgep31415 Date: Tue, 21 Jan 2025 09:13:08 -0800 Subject: [PATCH 2/2] [ET-VK] Print op breakdown in tsv format Pull Request resolved: https://github.com/pytorch/executorch/pull/7740 Use boolean flag to switch between nicely-formatted space separator and spreadsheet-ready tab operator. Motiviation is similar to https://github.com/pytorch/executorch/pull/7035 in facilitating copy-paste of results to a gsheet. ghstack-source-id: 262309623 Differential Revision: [D68345444](https://our.internmc.facebook.com/intern/diff/D68345444/) --- backends/vulkan/runtime/vk_api/QueryPool.cpp | 39 +++++++++++++++++++- backends/vulkan/runtime/vk_api/QueryPool.h | 7 +++- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/backends/vulkan/runtime/vk_api/QueryPool.cpp b/backends/vulkan/runtime/vk_api/QueryPool.cpp index 943911d19d0..6ebda7b7ef4 100644 --- a/backends/vulkan/runtime/vk_api/QueryPool.cpp +++ b/backends/vulkan/runtime/vk_api/QueryPool.cpp @@ -233,9 +233,44 @@ std::string QueryPool::generate_string_report() { return ss.str(); } -void QueryPool::print_results() { +std::string QueryPool::generate_tsv_string_report() { + std::lock_guard lock(mutex_); + + std::stringstream ss; + + ss << "Kernel Name\t"; + ss << "Global Workgroup Size\t"; + ss << "Local Workgroup Size\t"; + ss << "Duration (ns)\t"; + ss << std::endl; + + ss << "===========\t"; + ss << "=====================\t"; + ss << "====================\t"; + ss << "=============\t"; + ss << std::endl; + + for (ShaderDuration& entry : shader_durations_) { + std::chrono::duration exec_duration_ns( + entry.execution_duration_ns); + + ss << entry.kernel_name << "\t"; + ss << stringize(entry.global_workgroup_size) << "\t"; + ss << stringize(entry.local_workgroup_size) << "\t"; + ss << exec_duration_ns.count() << "\t"; + ss << std::endl; + } + + return ss.str(); +} + +void QueryPool::print_results(const bool tsv_format) { EARLY_RETURN_IF_UNINITIALIZED(); - std::cout << generate_string_report() << std::endl; + if (tsv_format) { + std::cout << generate_tsv_string_report() << std::endl; + } else { + std::cout << generate_string_report() << std::endl; + } } unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) { diff --git a/backends/vulkan/runtime/vk_api/QueryPool.h b/backends/vulkan/runtime/vk_api/QueryPool.h index fb7a8cd9e20..a94ea101760 100644 --- a/backends/vulkan/runtime/vk_api/QueryPool.h +++ b/backends/vulkan/runtime/vk_api/QueryPool.h @@ -99,14 +99,17 @@ class QueryPool final { std::vector> get_shader_timestamp_data(); - std::string generate_string_report(); - void print_results(); + void print_results(const bool tsv_format = false); unsigned long get_total_shader_ns(std::string kernel_name); unsigned long get_mean_shader_ns(std::string kernel_name); operator bool() const { return querypool_ != VK_NULL_HANDLE; } + + private: + std::string generate_string_report(); + std::string generate_tsv_string_report(); }; } // namespace vkapi