From c9e1b8489b151faa8d2ca4f8350b772e7fe41953 Mon Sep 17 00:00:00 2001 From: Abhishek Chandra Date: Thu, 6 Feb 2025 10:04:41 -0800 Subject: [PATCH] Optimized axis map (#7886) Summary: Add a flag to optimize the axis map layout to be in descending order of axis size. The default is still the same. Reviewed By: SS-JIA Differential Revision: D67692960 --- .../vulkan/runtime/api/containers/Tensor.cpp | 44 ++++++++++---- .../vulkan/runtime/api/containers/Tensor.h | 6 +- .../vulkan/runtime/graph/ComputeGraph.cpp | 57 +++++++++++++++---- backends/vulkan/runtime/graph/ComputeGraph.h | 18 ++++-- .../vulkan/runtime/graph/ops/impl/Staging.cpp | 6 +- .../vulkan/runtime/graph/ops/impl/Staging.h | 3 +- backends/vulkan/runtime/utils/StorageUtils.h | 9 +++ 7 files changed, 109 insertions(+), 34 deletions(-) diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 8c76c11532b..856ff4d618a 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -7,6 +7,7 @@ */ #include +#include #include namespace vkcompute { @@ -99,12 +100,31 @@ std::vector calculate_strides( * * The axis mapping allows for permuted views of texture-backed tensors. */ -std::vector default_axis_map() { - // Currently, all compute shaders have an assumption that the channels dim is - // used to combine with the batch dim of a tensor. However, once dim mapping - // is integrated into the tensor indexing logic for each compute shader, we - // can be more flexible with mapping the batch dim to different texture axes - // in order to improve performance or memory footprint. +std::vector calculate_axis_map( + const std::vector& sizes, + utils::AxisMapLayout axis_map_layout) { + if (axis_map_layout == utils::AxisMapLayout::OPTIMIZED) { + std::vector axis_map(sizes.size() + 1); + std::iota(axis_map.begin(), axis_map.end() - 1, 0); + + std::stable_sort( + axis_map.begin(), axis_map.end() - 1, [&sizes](size_t i1, size_t i2) { + return sizes[i1] < sizes[i2]; + }); + + assert(axis_map.size() > 0); + // Find the index of the channel dimension + for (size_t i = 0; i < axis_map.size() - 1; ++i) { + assert(sizes.size() > axis_map[i]); + if (sizes[axis_map[i]] == 2) { + axis_map.back() = i; + break; + } + } + + return axis_map; + } + // default return {0, 1, 2, 2}; } @@ -439,13 +459,14 @@ vTensor::vTensor( const vkapi::ScalarType dtype, const utils::StorageType storage_type, const utils::GPUMemoryLayout memory_layout, - const bool allocate_memory) + const bool allocate_memory, + const utils::AxisMapLayout axis_map_layout) : dtype_(dtype), // Calculate tensor metadata sizes_(sizes.begin(), sizes.end()), packed_dim_(utils::to_packed_dim(memory_layout)), dim_order_(calculate_dim_order(sizes_.size(), packed_dim_)), - axis_map_(default_axis_map()), + axis_map_(calculate_axis_map(sizes_, axis_map_layout)), strides_(calculate_strides(sizes, dim_order_)), padded_sizes_{calculate_padded_sizes(sizes, packed_dim_)}, unsqueezed_strides_{ @@ -484,13 +505,14 @@ vTensor::vTensor( vTensor::vTensor( Context* context, const vkapi::VulkanImage& image, - const utils::GPUMemoryLayout memory_layout) + const utils::GPUMemoryLayout memory_layout, + const utils::AxisMapLayout axis_map_layout) : dtype_(vkapi::element_scalartype(image.format())), // Calculate tensor metadata sizes_(calculate_sizes(image, memory_layout)), packed_dim_(utils::to_packed_dim(memory_layout)), dim_order_(), - axis_map_(default_axis_map()), + axis_map_(calculate_axis_map(sizes_, axis_map_layout)), strides_(), padded_sizes_(calculate_padded_sizes(sizes_, packed_dim_)), unsqueezed_strides_(), @@ -547,7 +569,7 @@ vTensor::vTensor( sizes_(sizes.begin(), sizes.end()), packed_dim_(other.packed_dim_), dim_order_(dim_order.begin(), dim_order.end()), - axis_map_(default_axis_map()), + axis_map_(calculate_axis_map(sizes_, utils::kDefaultAxisMap)), strides_(calculate_strides(sizes_, dim_order_)), padded_sizes_{calculate_padded_sizes(sizes, packed_dim_)}, unsqueezed_strides_{ diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index 49d5fcd36a3..8238962ae31 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -183,14 +183,16 @@ class vTensor final { const vkapi::ScalarType dtype, const utils::StorageType storage_type = utils::kTexture3D, const utils::GPUMemoryLayout memory_layout = utils::kChannelsPacked, - const bool allocate_memory = true); + const bool allocate_memory = true, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); vTensor(const vTensor& other) = delete; explicit vTensor( Context* context, const vkapi::VulkanImage& image, - const utils::GPUMemoryLayout memory_layout = utils::kChannelsPacked); + const utils::GPUMemoryLayout memory_layout = utils::kChannelsPacked, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); /* * This constructor allows for the creation of a vTensor that references the diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index fbef45d8641..5109e198206 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -239,13 +239,20 @@ ValueRef ComputeGraph::add_tensor( const vkapi::ScalarType dtype, const utils::StorageType storage_type, const utils::GPUMemoryLayout memory_layout, - const int64_t shared_object_idx) { + const int64_t shared_object_idx, + const utils::AxisMapLayout axis_map_layout) { bool allocate_memory = shared_object_idx < 0; ValueRef idx(static_cast(values_.size())); check_no_active_value_ptrs(); values_.emplace_back(api::vTensor( - context(), sizes, dtype, storage_type, memory_layout, allocate_memory)); + context(), + sizes, + dtype, + storage_type, + memory_layout, + allocate_memory, + axis_map_layout)); if (!allocate_memory) { get_shared_object(shared_object_idx).add_user(this, idx); @@ -257,44 +264,70 @@ ValueRef ComputeGraph::add_tensor( const std::vector& sizes, const vkapi::ScalarType dtype, const utils::StorageType storage_type, - const int64_t shared_object_idx) { + const int64_t shared_object_idx, + const utils::AxisMapLayout axis_map_layout) { return add_tensor( sizes, dtype, storage_type, suggested_memory_layout(sizes), - shared_object_idx); + shared_object_idx, + axis_map_layout); } ValueRef ComputeGraph::add_tensor( const std::vector& sizes, const vkapi::ScalarType dtype, const utils::GPUMemoryLayout memory_layout, - const int64_t shared_object_idx) { + const int64_t shared_object_idx, + const utils::AxisMapLayout axis_map_layout) { return add_tensor( - sizes, dtype, suggested_storage_type(), memory_layout, shared_object_idx); + sizes, + dtype, + suggested_storage_type(), + memory_layout, + shared_object_idx, + axis_map_layout); } ValueRef ComputeGraph::add_tensor_like( const ValueRef idx, const utils::StorageType storage_type, - const utils::GPUMemoryLayout memory_layout) { - return add_tensor(sizes_of(idx), dtype_of(idx), storage_type, memory_layout); + const utils::GPUMemoryLayout memory_layout, + const utils::AxisMapLayout axis_map_layout) { + return add_tensor( + sizes_of(idx), + dtype_of(idx), + storage_type, + memory_layout, + -1, + axis_map_layout); } ValueRef ComputeGraph::add_tensor_like( const ValueRef idx, - const utils::GPUMemoryLayout memory_layout) { + const utils::GPUMemoryLayout memory_layout, + const utils::AxisMapLayout axis_map_layout) { return add_tensor( - sizes_of(idx), dtype_of(idx), storage_type_of(idx), memory_layout); + sizes_of(idx), + dtype_of(idx), + storage_type_of(idx), + memory_layout, + -1, + axis_map_layout); } ValueRef ComputeGraph::add_tensor( const std::vector& sizes, const vkapi::ScalarType dtype, - const int64_t shared_object_idx) { + const int64_t shared_object_idx, + const utils::AxisMapLayout axis_map_layout) { return add_tensor( - sizes, dtype, suggested_memory_layout(sizes), shared_object_idx); + sizes, + dtype, + suggested_memory_layout(sizes), + shared_object_idx, + axis_map_layout); } ValueRef ComputeGraph::add_tensor(const vkapi::VulkanImage& image) { diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 201278ac61b..3d46aa327b8 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -461,7 +461,8 @@ class ComputeGraph final { const vkapi::ScalarType dtype, const utils::StorageType storage_type, const utils::GPUMemoryLayout memory_layout, - const int64_t shared_object_idx = -1); + const int64_t shared_object_idx = -1, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); /* * Add a `api::vTensor` value to the graph with the specified properties. The @@ -471,7 +472,8 @@ class ComputeGraph final { const std::vector& sizes, const vkapi::ScalarType dtype, const utils::StorageType storage_type, - const int64_t shared_object_idx = -1); + const int64_t shared_object_idx = -1, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); /* * Add a `api::vTensor` value to the graph with the specified properties. The @@ -481,7 +483,8 @@ class ComputeGraph final { const std::vector& sizes, const vkapi::ScalarType dtype, const utils::GPUMemoryLayout memory_layout, - const int64_t shared_object_idx = -1); + const int64_t shared_object_idx = -1, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); /* * Add a `api::vTensor` value to the graph with the specified properties. The @@ -491,7 +494,8 @@ class ComputeGraph final { ValueRef add_tensor( const std::vector& sizes, const vkapi::ScalarType dtype, - const int64_t shared_object_idx = -1); + const int64_t shared_object_idx = -1, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); /* * Add a `api::vTensor` value to the graph with the specified image. @@ -504,7 +508,8 @@ class ComputeGraph final { ValueRef add_tensor_like( const ValueRef vref, const utils::StorageType storage_type, - const utils::GPUMemoryLayout memory_layout); + const utils::GPUMemoryLayout memory_layout, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); /* * Add a `api::vTensor` value to the graph with the properties of `vref`. The @@ -512,7 +517,8 @@ class ComputeGraph final { */ ValueRef add_tensor_like( const ValueRef vref, - const utils::GPUMemoryLayout memory_layout); + const utils::GPUMemoryLayout memory_layout, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); /* * Use the copy constructor of `api::vTensor` to create a "view" of the diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp index 80a1e706e83..959d3974b73 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp @@ -146,12 +146,14 @@ ValueRef prepack_standard( const ValueRef tensor_data, const utils::StorageType storage_type, const utils::GPUMemoryLayout layout, - const bool passthrough) { + const bool passthrough, + const utils::AxisMapLayout axis_map_layout) { if (passthrough && graph.val_is_tensor(tensor_data)) { return tensor_data; } VK_CHECK_COND(graph.val_is_tref(tensor_data)); - ValueRef tensor = graph.add_tensor_like(tensor_data, storage_type, layout); + ValueRef tensor = + graph.add_tensor_like(tensor_data, storage_type, layout, axis_map_layout); add_prepack_standard_node(graph, tensor_data, tensor); return tensor; } diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.h b/backends/vulkan/runtime/graph/ops/impl/Staging.h index add9162d85f..bc501d5d053 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.h +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.h @@ -48,7 +48,8 @@ ValueRef prepack_standard( const ValueRef tensor_data, const utils::StorageType storage_type, const utils::GPUMemoryLayout layout, - const bool passthrough = false); + const bool passthrough = false, + const utils::AxisMapLayout axis_map_layout = utils::kDefaultAxisMap); /* * Equivalent to `prepack_standard()` function, except the `storage_type` and diff --git a/backends/vulkan/runtime/utils/StorageUtils.h b/backends/vulkan/runtime/utils/StorageUtils.h index 28caf58bef2..20addf88c53 100644 --- a/backends/vulkan/runtime/utils/StorageUtils.h +++ b/backends/vulkan/runtime/utils/StorageUtils.h @@ -146,5 +146,14 @@ inline std::ostream& operator<<( return os; } +enum class AxisMapLayout : uint8_t { + DEFAULT = 0u, + OPTIMIZED = 1u, +}; + +constexpr AxisMapLayout kDefaultAxisMap = AxisMapLayout::DEFAULT; + +constexpr AxisMapLayout kOptimizedAxisMap = AxisMapLayout::OPTIMIZED; + } // namespace utils } // namespace vkcompute