From b449ac4df4b315b7a4e3d17f90c20f273d4bc028 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Thu, 21 Nov 2024 11:28:35 -0800 Subject: [PATCH] [ET-VK] Replacing use of adaptive_work_group_size function by create_local_wg_size function. This diff replaces the use of the adaptive_work_group_size function with create_local_wg_size function, which is better tuned for improving shader performance. Differential Revision: [D66308779](https://our.internmc.facebook.com/intern/diff/D66308779/) [ghstack-poisoned] --- backends/vulkan/runtime/graph/ops/impl/Copy.cpp | 2 +- backends/vulkan/runtime/graph/ops/impl/Linear.cpp | 2 +- backends/vulkan/runtime/graph/ops/impl/MatMul.cpp | 2 +- backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp | 2 +- backends/vulkan/runtime/graph/ops/impl/Pool.cpp | 4 ++-- backends/vulkan/runtime/graph/ops/impl/Repeat.cpp | 2 +- backends/vulkan/runtime/graph/ops/impl/Slice.cpp | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp index 15cfce2a014..b98b2c504d4 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp @@ -135,7 +135,7 @@ void add_copy_channel_offset_node( utils::safe_downcast(dim_at(in_sizes)), utils::safe_downcast(dim_at(in_sizes)), utils::safe_downcast(dst_last_z - dst_first_z + 1)}; - uvec3 local_size = adaptive_work_group_size(global_size); + uvec3 local_size = graph.create_local_wg_size(global_size); const struct Block final { ivec3 range; diff --git a/backends/vulkan/runtime/graph/ops/impl/Linear.cpp b/backends/vulkan/runtime/graph/ops/impl/Linear.cpp index 1cba6de851c..966928b365e 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Linear.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Linear.cpp @@ -198,7 +198,7 @@ void add_addmm_optimized_node( } else { global_size = utils::divup_vec(global_size, {4, 4, 1}); } - utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = graph.create_local_wg_size(global_size); graph.execute_nodes().emplace_back(new DispatchNode( graph, diff --git a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp index a852a30d087..9a0d211176e 100644 --- a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp @@ -213,7 +213,7 @@ void add_matmul_optimized_node( global_size = utils::divup_vec(global_size, {4, 4, 1}); } - utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = graph.create_local_wg_size(global_size); graph.execute_nodes().emplace_back(new DispatchNode( graph, diff --git a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp index b1cc8c8084a..18f316a5265 100644 --- a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp @@ -89,7 +89,7 @@ void add_native_layer_norm_node( std::vector in_sizes = t_input->sizes(); utils::uvec3 global_size = t_mean->logical_limits(); - utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = graph.create_local_wg_size(global_size); std::string kernel_name("native_layer_norm"); kernel_name.reserve(kShaderNameReserve); diff --git a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp index b7015d2b1a0..4e4e896415c 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp @@ -79,7 +79,7 @@ void add_max_pool2d_node( check_pool2d_args(*t_in, *t_out); utils::uvec3 global_size = t_out->logical_limits(); - utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = graph.create_local_wg_size(global_size); std::string kernel_name("max_pool2d"); add_dtype_suffix(kernel_name, *t_out); @@ -154,7 +154,7 @@ void add_avg_pool2d_node( check_pool2d_args(*t_in, *t_out); utils::uvec3 global_size = t_out->logical_limits(); - utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = graph.create_local_wg_size(global_size); std::string kernel_name("avg_pool2d"); add_dtype_suffix(kernel_name, *t_out); diff --git a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp index 00199ba7a80..a5ff9821063 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp @@ -90,7 +90,7 @@ void add_repeat_channel_node( // Channel packed global work ids running_range[2] = out_whcn_sizes[3] * utils::div_up_4(out_whcn_sizes[2]); utils::uvec3 global_size = utils::make_uvec3(running_range); - utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = graph.create_local_wg_size(global_size); const struct Block final { utils::ivec4 out_sizes; diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp index 413cf40ea31..40603394660 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp @@ -126,7 +126,7 @@ void add_slice_tensor_copy_node( add_dtype_suffix(kernel_name, *t_out); utils::uvec3 global_size = t_out->logical_limits(); - utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = graph.create_local_wg_size(global_size); const struct Block final { int dim;