From 012f7a3c1073ad6126f469f16900a59b832f993e Mon Sep 17 00:00:00 2001 From: Stephen Jia Date: Thu, 5 Jun 2025 11:02:46 -0700 Subject: [PATCH] [ET-VK] Use shared pointer for vTensorStorage ## Changes * In `vTensor`, store the `storage_` member using a shared pointer instead of a direct `vTensorStorage` object * Remove the ability to construct a tensor view with a buffer offset ## Motivation > * In `vTensor`, store the `storage_` member using a shared pointer instead of a direct `vTensorStorage` object Previously, to support the ability to create tensor views I implemented copy constructors for `VulkanImage`, `VulkanBuffer`, and `vTensorStorage`. The idea was that the copied object instance would have the same handles as the original, but would not own the handle (and therefore not be responsible for destroying it). However, the problem with this approach is that in order to construct pipeline barriers and image layout transitions properly, we must know the details of how a resource was last accessed. Since tensor views make copies, accessing the original tensor will not update the access information of any copies it may have. Therefore, if the copy is then accessed, the last access information it stores would then be out of date. I originally tried to solve this problem with crude assumptions in the `transition()` function, but unfortunately the solution was not robust enough. I discovered validation errors such as ``` [ RUN ] VulkanComputeGraphOpsTest.test_transpose_with_mm VUID-VkImageMemoryBarrier-oldLayout-01197(ERROR / SPEC): msgNum: 307231540 - Validation Error: [ VUID-VkImageMemoryBarrier-oldLayout-01197 ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x124ffb34 | vkCmdPipelineBarrier(): pImageMemoryBarriers[2].image (VkImage 0x35643f0000000111[]) cannot transition the layout of aspect=1, level=0, layer=0 from VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when the previous known layout is VK_IMAGE_LAYOUT_GENERAL. The Vulkan spec states: If srcQueueFamilyIndex and dstQueueFamilyIndex define a queue family ownership transfer or oldLayout and newLayout define an image layout transition, oldLayout must be VK_IMAGE_LAYOUT_UNDEFINED or the current layout of the image subresources affected by the barrier (https://vulkan.lunarg.com/doc/view/1.3.296.0/windows/1.3-extensions/vkspec.html#VUID-VkImageMemoryBarrier-oldLayout-01197) Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout(ERROR / SPEC): msgNum: 1303270965 - Validation Error: [ UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x4dae5635 | vkQueueSubmit(): pSubmits[0].pCommandBuffers[0] command buffer VkCommandBuffer 0x21eba07e160[] expects VkImage 0x35643f0000000111[] (subresource: aspectMask VK_IMAGE_ASPECT_COLOR_BIT array layer 0, mip level 0) to be in layout VK_IMAGE_LAYOUT_GENERAL--instead, current layout is VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL. Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL VUID-VkImageMemoryBarrier-oldLayout-01197(ERROR / SPEC): msgNum: 307231540 - Validation Error: [ VUID-VkImageMemoryBarrier-oldLayout-01197 ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x124ffb34 | vkCmdPipelineBarrier(): pImageMemoryBarriers[2].image (VkImage 0x35643f0000000111[]) cannot transition the layout of aspect=1, level=0, layer=0 from VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when the previous known layout is VK_IMAGE_LAYOUT_GENERAL. The Vulkan spec states: If srcQueueFamilyIndex and dstQueueFamilyIndex define a queue family ownership transfer or oldLayout and newLayout define an image layout transition, oldLayout must be VK_IMAGE_LAYOUT_UNDEFINED or the current layout of the image subresources affected by the barrier (https://vulkan.lunarg.com/doc/view/1.3.296.0/windows/1.3-extensions/vkspec.html#VUID-VkImageMemoryBarrier-oldLayout-01197) Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout(ERROR / SPEC): msgNum: 1303270965 - Validation Error: [ UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x4dae5635 | vkQueueSubmit(): pSubmits[0].pCommandBuffers[0] command buffer VkCommandBuffer 0x21eba07e160[] expects VkImage 0x35643f0000000111[] (subresource: aspectMask VK_IMAGE_ASPECT_COLOR_BIT array layer 0, mip level 0) to be in layout VK_IMAGE_LAYOUT_GENERAL--instead, current layout is VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL. Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL VUID-VkImageMemoryBarrier-oldLayout-01197(ERROR / SPEC): msgNum: 307231540 - Validation Error: [ VUID-VkImageMemoryBarrier-oldLayout-01197 ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x124ffb34 | vkCmdPipelineBarrier(): pImageMemoryBarriers[2].image (VkImage 0x35643f0000000111[]) cannot transition the layout of aspect=1, level=0, layer=0 from VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when the previous known layout is VK_IMAGE_LAYOUT_GENERAL. The Vulkan spec states: If srcQueueFamilyIndex and dstQueueFamilyIndex define a queue family ownership transfer or oldLayout and newLayout define an image layout transition, oldLayout must be VK_IMAGE_LAYOUT_UNDEFINED or the current layout of the image subresources affected by the barrier (https://vulkan.lunarg.com/doc/view/1.3.296.0/windows/1.3-extensions/vkspec.html#VUID-VkImageMemoryBarrier-oldLayout-01197) Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout(ERROR / SPEC): msgNum: 1303270965 - Validation Error: [ UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x4dae5635 | vkQueueSubmit(): pSubmits[0].pCommandBuffers[0] command buffer VkCommandBuffer 0x21eba07e160[] expects VkImage 0x35643f0000000111[] (subresource: aspectMask VK_IMAGE_ASPECT_COLOR_BIT array layer 0, mip level 0) to be in layout VK_IMAGE_LAYOUT_GENERAL--instead, current layout is VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL. Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL VUID-VkImageMemoryBarrier-oldLayout-01197(ERROR / SPEC): msgNum: 307231540 - Validation Error: [ VUID-VkImageMemoryBarrier-oldLayout-01197 ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x124ffb34 | vkCmdPipelineBarrier(): pImageMemoryBarriers[2].image (VkImage 0x35643f0000000111[]) cannot transition the layout of aspect=1, level=0, layer=0 from VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when the previous known layout is VK_IMAGE_LAYOUT_GENERAL. The Vulkan spec states: If srcQueueFamilyIndex and dstQueueFamilyIndex define a queue family ownership transfer or oldLayout and newLayout define an image layout transition, oldLayout must be VK_IMAGE_LAYOUT_UNDEFINED or the current layout of the image subresources affected by the barrier (https://vulkan.lunarg.com/doc/view/1.3.296.0/windows/1.3-extensions/vkspec.html#VUID-VkImageMemoryBarrier-oldLayout-01197) Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout(ERROR / SPEC): msgNum: 1303270965 - Validation Error: [ UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x4dae5635 | vkQueueSubmit(): pSubmits[0].pCommandBuffers[0] command buffer VkCommandBuffer 0x21eba07e160[] expects VkImage 0x35643f0000000111[] (subresource: aspectMask VK_IMAGE_ASPECT_COLOR_BIT array layer 0, mip level 0) to be in layout VK_IMAGE_LAYOUT_GENERAL--instead, current layout is VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL. Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL VUID-VkImageMemoryBarrier-oldLayout-01197(ERROR / SPEC): msgNum: 307231540 - Validation Error: [ VUID-VkImageMemoryBarrier-oldLayout-01197 ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x124ffb34 | vkCmdPipelineBarrier(): pImageMemoryBarriers[2].image (VkImage 0x35643f0000000111[]) cannot transition the layout of aspect=1, level=0, layer=0 from VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when the previous known layout is VK_IMAGE_LAYOUT_GENERAL. The Vulkan spec states: If srcQueueFamilyIndex and dstQueueFamilyIndex define a queue family ownership transfer or oldLayout and newLayout define an image layout transition, oldLayout must be VK_IMAGE_LAYOUT_UNDEFINED or the current layout of the image subresources affected by the barrier (https://vulkan.lunarg.com/doc/view/1.3.296.0/windows/1.3-extensions/vkspec.html#VUID-VkImageMemoryBarrier-oldLayout-01197) Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout(ERROR / SPEC): msgNum: 1303270965 - Validation Error: [ UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout ] Object 0: handle = 0x21eba07e160, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0x35643f0000000111, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x4dae5635 | vkQueueSubmit(): pSubmits[0].pCommandBuffers[0] command buffer VkCommandBuffer 0x21eba07e160[] expects VkImage 0x35643f0000000111[] (subresource: aspectMask VK_IMAGE_ASPECT_COLOR_BIT array layer 0, mip level 0) to be in layout VK_IMAGE_LAYOUT_GENERAL--instead, current layout is VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL. Objects: 2 [0] 0x21eba07e160, type: 6, name: NULL [1] 0x35643f0000000111, type: 10, name: NULL [ OK ] VulkanComputeGraphOpsTest.test_transpose_with_mm (26 ms) ``` when using tensor views. The simplest solution is to use a shared pointer to store the tensor storage object, that way tensor views can point to the same underlying object instance, and last access metadata will be consistent among all tensor views. > * Remove the ability to construct a tensor view with a buffer offset Removed this because it turns out that buffer properties cannot have arbitrary offsets; there are restrictions for what offsets can be used. The validation errors say it all: ``` [ RUN ] VulkanComputeGraphTest.test_simple_graph_with_view VUID-VkWriteDescriptorSet-descriptorType-00328(ERROR / SPEC): msgNum: -368569266 - Validation Error: [ VUID-VkWriteDescriptorSet-descriptorType-00328 ] | MessageID = 0xea08144e | vkUpdateDescriptorSets(): pDescriptorWrites[1].pBufferInfo[0].offset (84) must be a multiple of device limit minStorageBufferOffsetAlignment 16 when descriptor type is VK_DESCRIPTOR_TYPE_STORAGE_BUFFER. The Vulkan spec states: If descriptorType is VK_DESCRIPTOR_TYPE_STORAGE_BUFFER or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, the offset member of each element of pBufferInfo must be a multiple of VkPhysicalDeviceLimits::minStorageBufferOffsetAlignment (https://vulkan.lunarg.com/doc/view/1.3.296.0/windows/1.3-extensions/vkspec.html#VUID-VkWriteDescriptorSet-descriptorType-00328) VUID-VkDescriptorBufferInfo-range-00342(ERROR / SPEC): msgNum: -371195848 - Validation Error: [ VUID-VkDescriptorBufferInfo-range-00342 ] Object 0: handle = 0xee647e0000000009, type = VK_OBJECT_TYPE_BUFFER; | MessageID = 0xe9e00038 | vkUpdateDescriptorSets(): pDescriptorWrites[1].pBufferInfo[0].range (224) is larger than buffer size (224) + offset (84). The Vulkan spec states: If range is not equal to VK_WHOLE_SIZE, range must be less than or equal to the size of buffer minus offset (https://vulkan.lunarg.com/doc/view/1.3.296.0/windows/1.3-extensions/vkspec.html#VUID-VkDescriptorBufferInfo-range-00342) Objects: 1 [0] 0xee647e0000000009, type: 9, name: NULL VUID-VkBufferMemoryBarrier-size-01189(ERROR / SPEC): msgNum: -1238074894 - Validation Error: [ VUID-VkBufferMemoryBarrier-size-01189 ] Object 0: handle = 0x25d7d900870, type = VK_OBJECT_TYPE_COMMAND_BUFFER; | MessageID = 0xb63479f2 | vkCmdPipelineBarrier(): pBufferMemoryBarriers[0].size VkBuffer 0xee647e0000000009[] has offset 0x54 and size 0xe0 whose sum is greater than total size 0xe0. The Vulkan spec states: If size is not equal to VK_WHOLE_SIZE, size must be less than or equal to than the size of buffer minus offset (https://vulkan.lunarg.com/doc/view/1.3.296.0/windows/1.3-extensions/vkspec.html#VUID-VkBufferMemoryBarrier-size-01189) Objects: 1 [0] 0x25d7d900870, type: 6, name: NULL [ OK ] VulkanComputeGraphTest.test_simple_graph_with_view (8 ms) ``` ## Impact * Heap allocation upon tensor construction * Increased pointer chasing when accessing `vTensor` members I will validate that the impact of this change does not regress load/inference latency. Differential Revision: [D76047204](https://our.internmc.facebook.com/intern/diff/D76047204/) [ghstack-poisoned] --- .../vulkan/runtime/api/containers/Tensor.cpp | 116 +++++------------- .../vulkan/runtime/api/containers/Tensor.h | 40 ++---- .../vulkan/runtime/graph/ComputeGraph.cpp | 15 +-- backends/vulkan/runtime/graph/ComputeGraph.h | 3 +- .../vulkan/test/vulkan_compute_api_test.cpp | 108 ---------------- 5 files changed, 44 insertions(+), 238 deletions(-) diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 62b53f9a76c..a85229b2b86 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -343,8 +343,7 @@ vTensorStorage::vTensorStorage( storage_type_, dtype, allocate_memory)), - last_access_{}, - has_copies_{false} {} + last_access_{} {} vTensorStorage::vTensorStorage( Context* const context, @@ -361,21 +360,6 @@ vTensorStorage::vTensorStorage( buffer_(vkapi::VulkanBuffer()), last_access_{} {} -vTensorStorage::vTensorStorage( - vTensorStorage& other, - const int64_t buffer_offset) - : context_(other.context_), - storage_type_{other.storage_type_}, - image_extents_(other.image_extents_), - buffer_length_{other.buffer_length_}, - buffer_offset_{buffer_offset}, - image_(other.image_), - buffer_(other.buffer_, buffer_offset), - last_access_{other.last_access_}, - has_copies_{false} { - other.has_copies_ = true; -} - vTensorStorage::~vTensorStorage() { flush(); } @@ -397,21 +381,6 @@ void vTensorStorage::transition( vkapi::PipelineStageFlags prev_stage = last_access_.stage; vkapi::MemoryAccessFlags prev_access = last_access_.access; - // If the underlying resource is a copy of another tensor's resource the - // last_access may not be accurate, since the original storage may have been - // written to as part of the original tensor. Likewise, if the underlying - // resource has copies, then the resource may have been updated as part of the - // view tensors. - // - // If the resource is a copy, or has copies of it, then cowardly assume that - // it has previously been written to as part of a compute shader before the - // current access event so that the appropriate memory barriers may be - // inserted. - if (is_copy() || has_copies_) { - prev_stage = vkapi::PipelineStage::COMPUTE; - prev_access = vkapi::kWrite; - } - const bool prev_written = (prev_access & vkapi::MemoryAccessType::WRITE) != 0; VkImageLayout cur_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -458,20 +427,6 @@ void vTensorStorage::transition( last_access_.access = cur_access; } -bool vTensorStorage::is_copy() const { - if (storage_type_ == utils::kBuffer) { - return buffer_.is_copy(); - } - return image_.is_copy(); -} - -bool vTensorStorage::is_copy_of(const vTensorStorage& other) const { - if (storage_type_ == utils::kBuffer) { - return buffer_.is_copy_of(other.buffer_); - } - return image_.is_copy_of(other.image_); -} - // // vTensor // @@ -503,14 +458,14 @@ vTensor::vTensor( numel_uniform_offset_(kUniformOffsetUnset), logical_limits_uniform_offset_(kUniformOffsetUnset), // Construct Tensor storage - storage_( + storage_(std::make_shared( context, storage_type, axis_map_, packed_dim_, padded_sizes_, dtype_, - allocate_memory) { + allocate_memory)) { uniform_data_ = std::make_shared(UniformData{ sizes_, unsqueezed_strides_, @@ -519,7 +474,7 @@ vTensor::vTensor( VK_CHECK_COND( dim_order_is_valid(dim_order_), "computed dim order is invalid"); - set_logical_limits(storage_.image_extents_); + set_logical_limits(storage_->image_extents_); } // NOLINTNEXTLINE @@ -546,13 +501,13 @@ vTensor::vTensor( numel_uniform_offset_(kUniformOffsetUnset), logical_limits_uniform_offset_(kUniformOffsetUnset), // Construct Tensor storage - storage_(context, image) { + storage_(std::make_shared(context, image)) { uniform_data_ = std::make_shared(UniformData{ sizes_, {0, 0, 0, 0}, {{0, 0, 0}}, static_cast(utils::multiply_integers(sizes_))}); - set_logical_limits(storage_.image_extents_); + set_logical_limits(storage_->image_extents_); } vTensor::vTensor(vTensor& other) @@ -583,8 +538,7 @@ vTensor::vTensor(vTensor& other) vTensor::vTensor( vTensor& other, const std::vector& sizes, - const std::vector& dim_order, - const int64_t offset_numel) + const std::vector& dim_order) : dtype_(other.dtype_), // Copy tensor size metadata sizes_(sizes.begin(), sizes.end()), @@ -604,7 +558,7 @@ vTensor::vTensor( numel_uniform_offset_(kUniformOffsetUnset), logical_limits_uniform_offset_(kUniformOffsetUnset), // Copy Tensor storage - storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) { + storage_(other.storage_) { uniform_data_ = std::make_shared(UniformData{ sizes_, unsqueezed_strides_, @@ -613,10 +567,6 @@ vTensor::vTensor( VK_CHECK_COND( dim_order_is_valid(dim_order_), "new dim order provided is invalid"); - VK_CHECK_COND( - offset_numel + numel() <= other.numel(), - "Tensor alias cannot access more elements than available in the original" - "tensor"); } uint32_t vTensor::UniformData::write_attribute( @@ -647,31 +597,31 @@ uint32_t vTensor::UniformData::write_attribute( vkapi::VulkanImage& vTensor::image( vkapi::PipelineBarrier& pipeline_barrier, const vkapi::PipelineStageFlags stage) & { - storage_.transition(pipeline_barrier, stage, vkapi::MemoryAccessType::READ); - return storage_.image_; + storage_->transition(pipeline_barrier, stage, vkapi::MemoryAccessType::READ); + return storage_->image_; } vkapi::VulkanImage& vTensor::image( vkapi::PipelineBarrier& pipeline_barrier, const vkapi::PipelineStageFlags stage, const vkapi::MemoryAccessFlags access) & { - storage_.transition(pipeline_barrier, stage, access); - return storage_.image_; + storage_->transition(pipeline_barrier, stage, access); + return storage_->image_; } vkapi::VulkanBuffer& vTensor::buffer( vkapi::PipelineBarrier& pipeline_barrier, const vkapi::PipelineStageFlags stage) & { - storage_.transition(pipeline_barrier, stage, vkapi::MemoryAccessType::READ); - return storage_.buffer_; + storage_->transition(pipeline_barrier, stage, vkapi::MemoryAccessType::READ); + return storage_->buffer_; } vkapi::VulkanBuffer& vTensor::buffer( vkapi::PipelineBarrier& pipeline_barrier, const vkapi::PipelineStageFlags stage, const vkapi::MemoryAccessFlags access) & { - storage_.transition(pipeline_barrier, stage, access); - return storage_.buffer_; + storage_->transition(pipeline_barrier, stage, access); + return storage_->buffer_; } void vTensor::set_logical_limits(const utils::uvec3& image_extents) { @@ -695,10 +645,10 @@ utils::GPUMemoryLayout vTensor::estimate_memory_layout() const { const vkapi::BufferBindInfo vTensor::sizes_ubo() { const size_t size_per_ubo = - storage_.context_->adapter_ptr()->min_ubo_alignment(); + storage_->context_->adapter_ptr()->min_ubo_alignment(); const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo; if (!uniforms_.buffer()) { - uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true); + uniforms_ = ParamsBuffer(storage_->context_, max_ubo_size, true); } if (sizes_uniform_offset_ == kUniformOffsetUnset) { VK_CHECK_COND( @@ -714,10 +664,10 @@ const vkapi::BufferBindInfo vTensor::sizes_ubo() { const vkapi::BufferBindInfo vTensor::strides_ubo() { const size_t size_per_ubo = - storage_.context_->adapter_ptr()->min_ubo_alignment(); + storage_->context_->adapter_ptr()->min_ubo_alignment(); const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo; if (!uniforms_.buffer()) { - uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true); + uniforms_ = ParamsBuffer(storage_->context_, max_ubo_size, true); } if (unsqueezed_strides_offset_ == kUniformOffsetUnset) { VK_CHECK_COND( @@ -735,10 +685,10 @@ const vkapi::BufferBindInfo vTensor::strides_ubo() { const vkapi::BufferBindInfo vTensor::logical_limits_ubo() { const size_t size_per_ubo = - storage_.context_->adapter_ptr()->min_ubo_alignment(); + storage_->context_->adapter_ptr()->min_ubo_alignment(); const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo; if (!uniforms_.buffer()) { - uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true); + uniforms_ = ParamsBuffer(storage_->context_, max_ubo_size, true); } if (logical_limits_uniform_offset_ == kUniformOffsetUnset) { VK_CHECK_COND( @@ -754,10 +704,10 @@ const vkapi::BufferBindInfo vTensor::logical_limits_ubo() { const vkapi::BufferBindInfo vTensor::numel_ubo() { const size_t size_per_ubo = - storage_.context_->adapter_ptr()->min_ubo_alignment(); + storage_->context_->adapter_ptr()->min_ubo_alignment(); const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo; if (!uniforms_.buffer()) { - uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true); + uniforms_ = ParamsBuffer(storage_->context_, max_ubo_size, true); } if (numel_uniform_offset_ == kUniformOffsetUnset) { VK_CHECK_COND( @@ -774,7 +724,7 @@ const vkapi::BufferBindInfo vTensor::numel_ubo() { size_t vTensor::staging_buffer_numel() const { const bool is_int8 = dtype_ == vkapi::kChar; const bool int8_supported = - storage_.context_->adapter_ptr()->has_full_int8_buffers_support(); + storage_->context_->adapter_ptr()->has_full_int8_buffers_support(); if (is_int8 && !int8_supported) { return utils::align_up_4(numel()); } @@ -787,10 +737,10 @@ size_t vTensor::staging_buffer_numel() const { VkMemoryRequirements vTensor::get_memory_requirements() const { switch (storage_type()) { case utils::kBuffer: - return storage_.buffer_.get_memory_requirements(); + return storage_->buffer_.get_memory_requirements(); case utils::kTexture2D: case utils::kTexture3D: - return storage_.image_.get_memory_requirements(); + return storage_->image_.get_memory_requirements(); } return {}; } @@ -798,11 +748,11 @@ VkMemoryRequirements vTensor::get_memory_requirements() const { void vTensor::bind_allocation(const vkapi::Allocation& allocation) { switch (storage_type()) { case utils::kBuffer: - storage_.buffer_.bind_allocation(allocation); + storage_->buffer_.bind_allocation(allocation); break; case utils::kTexture2D: case utils::kTexture3D: - storage_.image_.bind_allocation(allocation); + storage_->image_.bind_allocation(allocation); break; } } @@ -845,11 +795,11 @@ void vTensor::check_sizes(const std::vector& sizes) const { utils::uvec3 virtual_extents = calculate_image_extents(padded_sizes_, axis_map_, packed_dim_); - bool valid_resize = virtual_extents[0] <= storage_.image_extents_[0]; + bool valid_resize = virtual_extents[0] <= storage_->image_extents_[0]; valid_resize = - valid_resize && virtual_extents[1] <= storage_.image_extents_[1]; + valid_resize && virtual_extents[1] <= storage_->image_extents_[1]; valid_resize = - valid_resize && virtual_extents[2] <= storage_.image_extents_[2]; + valid_resize && virtual_extents[2] <= storage_->image_extents_[2]; VK_CHECK_COND( valid_resize, @@ -859,7 +809,7 @@ void vTensor::check_sizes(const std::vector& sizes) const { // new sizes of the tensor. int64_t numel = utils::multiply_integers(sizes); bool valid_resize = - numel + storage_.buffer_offset_ <= storage_.buffer_length_; + numel + storage_->buffer_offset_ <= storage_->buffer_length_; VK_CHECK_COND( valid_resize, "tensor sizes requires a larger buffer than the current one."); diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index d9cbadb46b9..850dc2d7fab 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -97,19 +97,8 @@ class vTensorStorage final { vTensorStorage(Context* const context, const vkapi::VulkanImage& image); - protected: - /* - * This allows for creation of tensors that use the same underlying storage - * as another tensor. Note that this functionality is currently enabled for - * tensors that have buffer storage only. The created tensor will not have - * ownership of the underlying VkBuffer. This constructor is marked protected - * because this behaviour is unsafe, since the original tensor may be - * destroyed before the copy is destroyed. - */ - vTensorStorage(vTensorStorage& other, const int64_t buffer_offset = 0); - public: - // To discourage creating copies, the assignment operator is still deleted. + vTensorStorage(vTensorStorage& other) = delete; vTensorStorage& operator=(const vTensorStorage& other) = delete; vTensorStorage(vTensorStorage&& other) = default; @@ -136,8 +125,6 @@ class vTensorStorage final { // Last Access - used to insert memory barriers LastAccess last_access_; - // Indicates whether copies of this vTensorStorage have been made - bool has_copies_; private: // Registers underlying memory for cleanup @@ -156,16 +143,6 @@ class vTensorStorage final { inline VkFormat texture_format() { return image_.format(); } - - /* - * Check if the underlying resource is a copy of another resource - */ - bool is_copy() const; - - /* - * Used for checking if this vTensorStorage is a copy of another instance - */ - bool is_copy_of(const vTensorStorage& other) const; }; class vTensor final { @@ -222,8 +199,7 @@ class vTensor final { vTensor( vTensor& other, const std::vector& sizes, - const std::vector& dim_order, - const int64_t offset_numel = 0); + const std::vector& dim_order); // To discourage making copies, the copy assignment operator is still deleted vTensor& operator=(const vTensor& other) = delete; @@ -358,7 +334,7 @@ class vTensor final { // impossible for a ubo to have an offset of 1. constexpr static uint32_t kUniformOffsetUnset = 1; - vTensorStorage storage_; + std::shared_ptr storage_; std::shared_ptr uniform_data_; @@ -368,7 +344,7 @@ class vTensor final { */ inline vkapi::VulkanImage& image() const& { - return storage_.image_; + return storage_->image_; } vkapi::VulkanImage& image( @@ -381,7 +357,7 @@ class vTensor final { const vkapi::MemoryAccessFlags) &; inline vkapi::VulkanBuffer& buffer() const& { - return storage_.buffer_; + return storage_->buffer_; } vkapi::VulkanBuffer& buffer( @@ -398,11 +374,11 @@ class vTensor final { */ inline utils::StorageType storage_type() const { - return storage_.storage_type_; + return storage_->storage_type_; } inline bool has_buffer_storage() const { - return storage_.storage_type_ == utils::kBuffer; + return storage_->storage_type_ == utils::kBuffer; } private: @@ -623,7 +599,7 @@ class vTensor final { * Check if this vTensor instance is a view of another vTensor instance */ inline bool is_view_of(const vTensor& other) const { - return storage_.is_copy_of(other.storage_); + return storage_.get() == other.storage_.get(); } const std::shared_ptr& get_uniform_data() const { diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 8e498d5f2d1..5dc26286682 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -378,27 +378,16 @@ ValueRef ComputeGraph::add_tensor_view(const ValueRef vref) { const vTensorPtr t = get_tensor(vref); ValueRef idx(static_cast(values_.size())); values_.emplace_back(api::vTensor(*t)); - for (SharedObject& sobj : shared_objects_) { - if (sobj.has_user(vref)) { - sobj.add_user(this, idx); - } - } return idx; } ValueRef ComputeGraph::add_tensor_view( const ValueRef vref, const std::vector& sizes, - const std::vector& strides, - const size_t offset_numel) { + const std::vector& strides) { const vTensorPtr t = get_tensor(vref); ValueRef idx(static_cast(values_.size())); - values_.emplace_back(api::vTensor(*t, sizes, strides, offset_numel)); - for (SharedObject& sobj : shared_objects_) { - if (sobj.has_user(vref)) { - sobj.add_user(this, idx); - } - } + values_.emplace_back(api::vTensor(*t, sizes, strides)); return idx; } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 90f89ea18d6..31514989dfc 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -573,8 +573,7 @@ class ComputeGraph final { ValueRef add_tensor_view( const ValueRef vref, const std::vector& sizes, - const std::vector& dim_order, - const size_t offset_numel = 0); + const std::vector& dim_order); /* * Add a `TensorRef` value to the graph with the specific properties. A diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 60b5ccb1a80..05cec2d2257 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -821,58 +821,6 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) { } } -TEST_F(VulkanComputeAPITest, tensor_no_copy_slice_test) { - constexpr int L = 31; - - // S{N} refers to slice {N} - constexpr int L_S1 = 17; - constexpr int O_S1 = 5; - - constexpr int L_S2 = 7; - constexpr int O_S2 = 3; - - std::vector dim_order = {0}; - - std::vector t_sizes = {L}; - std::vector s1_sizes = {L_S1}; - std::vector s2_sizes = {L_S2}; - - vTensor orig = CREATE_FLOAT_BUFFER(t_sizes, /*allocate_memory=*/true); - - fill_vtensor(orig, 0); - - vTensor s1 = vTensor(orig, s1_sizes, dim_order, O_S1); - vTensor s2 = vTensor(s1, s2_sizes, dim_order, O_S2); - - record_scalar_add_buffer(api::context(), s1, 4.5f); - record_scalar_add_buffer(api::context(), s2, 7.5f); - - std::vector orig_data(orig.staging_buffer_numel()); - extract_vtensor(orig, orig_data); - - int id = 0; - while (id < O_S1) { - EXPECT_TRUE(orig_data[id] == 0); - ++id; - } - while (id < O_S1 + O_S2) { - EXPECT_TRUE(orig_data[id] == 4.5); - ++id; - } - while (id < O_S1 + O_S2 + L_S2) { - EXPECT_TRUE(orig_data[id] == 12); - ++id; - } - while (id < O_S1 + L_S1) { - EXPECT_TRUE(orig_data[id] == 4.5); - ++id; - } - while (id < L) { - EXPECT_TRUE(orig_data[id] == 0); - ++id; - } -} - TEST_F(VulkanComputeAPITest, texture_deferred_allocation_test) { // This test is the same as texture_add_sanity_check, except that the tensor // memory is allocated in a deferred fashion @@ -1303,62 +1251,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_buffer) { } } -TEST(VulkanComputeGraphTest, test_simple_graph_with_view) { - constexpr int W = 7; - constexpr int H = 7; - // slice height - constexpr int S_H = 2; - // slice offset - constexpr int S_O = 3; - - GraphConfig config; - config.set_storage_type_override(utils::kBuffer); - ComputeGraph graph(config); - - std::vector dim_order = {0, 1}; - - std::vector orig_sizes = {H, W}; - std::vector slice_sizes = {S_H, W}; - const int offset = S_O * W; - - // Build graph - - IOValueRef orig = graph.add_input_tensor(orig_sizes, vkapi::kFloat); - ValueRef slice = - graph.add_tensor_view(orig.value, slice_sizes, dim_order, offset); - - EXPECT_TRUE(graph.val_is_view_of(slice, orig.value)); - - IOValueRef out = {}; - - out.value = graph.add_tensor(slice_sizes, vkapi::kFloat); - - auto opFn = VK_GET_OP_FN("aten.abs.default"); - opFn(graph, {slice, out.value, kDummyValueRef, kDummyValueRef}); - - out.staging = graph.set_output_tensor(out.value); - - graph.prepare(); - graph.encode_execute(); - - // Run graph - - for (float i = 5.0f; i < 30.0f; i += 10.0f) { - float start_val = -130 + i; - - fill_vtensor(graph, orig, start_val, true); - - graph.execute(); - - EXTRACT_TENSOR(out); - - for (size_t i = 0; i < graph.get_tensor(out.value)->numel(); ++i) { - const float expected_val = std::abs(start_val) - float(offset) - i; - CHECK_VALUE(data_out, i, expected_val); - } - } -} - TEST(VulkanComputeGraphTest, test_graph_view_of_view) { GraphConfig config; config.set_storage_type_override(utils::kTexture3D);