diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index a3d9bd4aa34..6f7167c54fb 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -897,6 +897,16 @@ VkMemoryRequirements vTensor::get_memory_requirements() const { return {}; } +bool vTensor::memory_is_bound() const { + switch (storage_type()) { + case utils::kBuffer: + return storage_->buffer_.has_memory(); + case utils::kTexture2D: + case utils::kTexture3D: + return storage_->image_.has_memory(); + } +} + void vTensor::bind_allocation(const vkapi::Allocation& allocation) { switch (storage_type()) { case utils::kBuffer: @@ -909,6 +919,18 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) { } } +void vTensor::acquire_allocation(vkapi::Allocation&& allocation) { + switch (storage_type()) { + case utils::kBuffer: + storage_->buffer_.acquire_allocation(std::move(allocation)); + break; + case utils::kTexture2D: + case utils::kTexture3D: + storage_->image_.acquire_allocation(std::move(allocation)); + break; + } +} + void vTensor::update_metadata() { numel_ = utils::multiply_integers(sizes_); strides_ = calculate_strides(sizes_, dim_order_); diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index 0e1a1526d88..bcca956e5ea 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -560,6 +560,12 @@ class vTensor final { */ VmaAllocationCreateInfo get_allocation_create_info() const; + /* + * Checks if the tensor's underlying buffer or image resource is bound to a + * memory allocation. + */ + bool memory_is_bound() const; + /* * Return the VkMemoryRequirements of the underlying resource */ @@ -570,6 +576,11 @@ class vTensor final { */ void bind_allocation(const vkapi::Allocation& allocation); + /* + * Binds and acquires a rvalue memory allocation + */ + void acquire_allocation(vkapi::Allocation&& allocation); + private: /* * Assuming sizes, dim order, or axis mapping was modified, recompute all diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index d57ba2b11d7..fff530d57cb 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -356,8 +356,6 @@ ValueRef ComputeGraph::add_tensor( const utils::GPUMemoryLayout memory_layout, const int64_t shared_object_idx, const utils::AxisMapLayout axis_map_layout) { - bool allocate_memory = shared_object_idx < 0; - ValueRef idx(static_cast(values_.size())); check_no_active_value_ptrs(); values_.emplace_back(api::vTensor( @@ -366,10 +364,10 @@ ValueRef ComputeGraph::add_tensor( dtype, storage_type, memory_layout, - allocate_memory, + false, axis_map_layout)); - if (!allocate_memory) { + if (shared_object_idx >= 0) { get_shared_object(shared_object_idx).add_user(this, idx); } return idx; @@ -626,6 +624,17 @@ SharedObject& ComputeGraph::get_shared_object(const int64_t idx) { return shared_objects_.at(idx); } +void ComputeGraph::create_dedicated_allocation_for(const ValueRef idx) { + vTensorPtr tensor = get_tensor(idx); + if (!tensor->memory_is_bound()) { + VmaAllocationCreateInfo alloc_create_info = + context()->adapter_ptr()->vma().gpuonly_resource_create_info(); + tensor->acquire_allocation( + context()->adapter_ptr()->vma().create_allocation( + tensor->get_memory_requirements(), alloc_create_info)); + } +} + void ComputeGraph::update_descriptor_counts( const vkapi::ShaderInfo& shader_info, bool execute) { @@ -852,11 +861,6 @@ void ComputeGraph::prepare() { } execute_threshold_node_count_ = count_threshold; - - for (SharedObject& shared_object : shared_objects_) { - shared_object.allocate(this); - shared_object.bind_users(this); - } } void ComputeGraph::prepare_pipelines() { @@ -952,6 +956,18 @@ void ComputeGraph::prepack() { submit_current_cmd_and_wait(/*final_use=*/true); context_->flush(); staging_nbytes_in_cmd_ = 0; + + // Initialize allocations for intermediate tensors + for (SharedObject& shared_object : shared_objects_) { + shared_object.allocate(this); + shared_object.bind_users(this); + } + // Make sure all remaining tensors have allocations + for (int i = 0; i < values_.size(); i++) { + if (values_.at(i).isTensor()) { + create_dedicated_allocation_for(i); + } + } } void ComputeGraph::execute() { diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index f594571f9a7..7686aa65025 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -827,6 +827,13 @@ class ComputeGraph final { SharedObject& get_shared_object(const int64_t idx); + /* + * Creates a dedicated memory allocation for a vTensor value, and have the + * tensor acquire the allocation object. If the tensor is already bound to a + * memory allocation, this function will be a no-op. + */ + void create_dedicated_allocation_for(const ValueRef idx); + // // Graph Preparation // diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index 03df92292f8..62e1dc86f43 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -97,6 +97,10 @@ void PrepackNode::encode(ComputeGraph* graph) { } { + // If the vTensor is not yet bound to a memory allocation, create a new one + // and aquire it. + graph->create_dedicated_allocation_for(packed_); + vkapi::PipelineBarrier pipeline_barrier{}; vkapi::DescriptorSet descriptor_set = context->get_descriptor_set( shader_, local_workgroup_size_, spec_vars_, push_constants_offset); diff --git a/backends/vulkan/runtime/vk_api/memory/Buffer.cpp b/backends/vulkan/runtime/vk_api/memory/Buffer.cpp index 4f58e07b146..f10e40abdbb 100644 --- a/backends/vulkan/runtime/vk_api/memory/Buffer.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Buffer.cpp @@ -20,6 +20,7 @@ VulkanBuffer::VulkanBuffer() allocator_(VK_NULL_HANDLE), memory_{}, owns_memory_(false), + memory_bundled_(false), is_copy_(false), handle_(VK_NULL_HANDLE) {} @@ -33,6 +34,7 @@ VulkanBuffer::VulkanBuffer( allocator_(vma_allocator), memory_{}, owns_memory_(allocate_memory), + memory_bundled_(allocate_memory), is_copy_(false), handle_(VK_NULL_HANDLE) { // If the buffer size is 0, allocate a buffer with a size of 1 byte. This is @@ -77,6 +79,7 @@ VulkanBuffer::VulkanBuffer( allocator_(other.allocator_), memory_(other.memory_), owns_memory_(false), + memory_bundled_(false), is_copy_(true), handle_(other.handle_) { // TODO: set the offset and range appropriately @@ -91,6 +94,7 @@ VulkanBuffer::VulkanBuffer(VulkanBuffer&& other) noexcept allocator_(other.allocator_), memory_(std::move(other.memory_)), owns_memory_(other.owns_memory_), + memory_bundled_(other.memory_bundled_), is_copy_(other.is_copy_), handle_(other.handle_) { other.handle_ = VK_NULL_HANDLE; @@ -99,16 +103,19 @@ VulkanBuffer::VulkanBuffer(VulkanBuffer&& other) noexcept VulkanBuffer& VulkanBuffer::operator=(VulkanBuffer&& other) noexcept { VkBuffer tmp_buffer = handle_; bool tmp_owns_memory = owns_memory_; + bool tmp_memory_bundled = memory_bundled_; buffer_properties_ = other.buffer_properties_; allocator_ = other.allocator_; memory_ = std::move(other.memory_); owns_memory_ = other.owns_memory_; + memory_bundled_ = other.memory_bundled_; is_copy_ = other.is_copy_; handle_ = other.handle_; other.handle_ = tmp_buffer; other.owns_memory_ = tmp_owns_memory; + other.memory_bundled_ = tmp_memory_bundled; return *this; } @@ -119,14 +126,22 @@ VulkanBuffer::~VulkanBuffer() { // ownership of the underlying resource. if (handle_ != VK_NULL_HANDLE && !is_copy_) { if (owns_memory_) { - vmaDestroyBuffer(allocator_, handle_, memory_.allocation); + if (memory_bundled_) { + vmaDestroyBuffer(allocator_, handle_, memory_.allocation); + // Prevent the underlying memory allocation from being freed; it was + // freed by vmaDestroyImage + memory_.allocation = VK_NULL_HANDLE; + } else { + vkDestroyBuffer(this->device(), handle_, nullptr); + // Allow underlying memory allocation to be freed by the destructor of + // Allocation class + } } else { vkDestroyBuffer(this->device(), handle_, nullptr); + // Prevent the underlying memory allocation from being freed since this + // object doesn't own it + memory_.allocation = VK_NULL_HANDLE; } - // Prevent the underlying memory allocation from being freed; it was either - // freed by vmaDestroyBuffer, or this resource does not own the underlying - // memory - memory_.allocation = VK_NULL_HANDLE; } } @@ -136,6 +151,24 @@ VmaAllocationInfo VulkanBuffer::allocation_info() const { return info; } +void VulkanBuffer::bind_allocation_impl(const Allocation& memory) { + VK_CHECK_COND(!memory_, "Cannot bind an already bound allocation!"); + if (!is_copy_) { + VK_CHECK(vmaBindBufferMemory(allocator_, memory.allocation, handle_)); + } +} + +void VulkanBuffer::bind_allocation(const Allocation& memory) { + bind_allocation_impl(memory); + memory_.allocation = memory.allocation; +} + +void VulkanBuffer::acquire_allocation(Allocation&& memory) { + bind_allocation_impl(memory); + memory_ = std::move(memory); + owns_memory_ = true; +} + VkMemoryRequirements VulkanBuffer::get_memory_requirements() const { VkMemoryRequirements memory_requirements; vkGetBufferMemoryRequirements(this->device(), handle_, &memory_requirements); diff --git a/backends/vulkan/runtime/vk_api/memory/Buffer.h b/backends/vulkan/runtime/vk_api/memory/Buffer.h index e1b441397b4..582b537465d 100644 --- a/backends/vulkan/runtime/vk_api/memory/Buffer.h +++ b/backends/vulkan/runtime/vk_api/memory/Buffer.h @@ -100,6 +100,10 @@ class VulkanBuffer final { Allocation memory_; // Indicates whether the underlying memory is owned by this resource bool owns_memory_; + // Indicates whether the allocation for the buffer was created with the buffer + // via vmaCreateBuffer; if this is false, the memory is owned but was bound + // separately via vmaBindBufferMemory + bool memory_bundled_; // Indicates whether this VulkanBuffer was copied from another VulkanBuffer, // thus it does not have ownership of the underlying VKBuffer bool is_copy_; @@ -162,13 +166,21 @@ class VulkanBuffer final { return (handle_ == other.handle_) && is_copy_; } - inline void bind_allocation(const Allocation& memory) { - VK_CHECK_COND(!memory_, "Cannot bind an already bound allocation!"); - if (!is_copy_) { - VK_CHECK(vmaBindBufferMemory(allocator_, memory.allocation, handle_)); - } - memory_.allocation = memory.allocation; - } + private: + void bind_allocation_impl(const Allocation& memory); + + public: + /* + * Given a memory allocation, bind it to the underlying VkImage. The lifetime + * of the memory allocation is assumed to be managed externally. + */ + void bind_allocation(const Allocation& memory); + + /* + * Given a rvalue memory allocation, bind it to the underlying VkImage and + * also acquire ownership of the memory allocation. + */ + void acquire_allocation(Allocation&& memory); VkMemoryRequirements get_memory_requirements() const; diff --git a/backends/vulkan/runtime/vk_api/memory/Image.cpp b/backends/vulkan/runtime/vk_api/memory/Image.cpp index da6ff76bccd..cadeb779c83 100644 --- a/backends/vulkan/runtime/vk_api/memory/Image.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Image.cpp @@ -99,6 +99,7 @@ VulkanImage::VulkanImage() allocator_(VK_NULL_HANDLE), memory_{}, owns_memory_(false), + memory_bundled_(false), owns_view_(false), is_copy_(false), handles_{ @@ -125,6 +126,7 @@ VulkanImage::VulkanImage( allocator_(vma_allocator), memory_{}, owns_memory_{allocate_memory}, + memory_bundled_(allocate_memory), owns_view_(false), is_copy_(false), handles_{ @@ -195,6 +197,7 @@ VulkanImage::VulkanImage( allocator_(VK_NULL_HANDLE), memory_{}, owns_memory_(false), + memory_bundled_(false), is_copy_(false), handles_{ image, @@ -224,6 +227,7 @@ VulkanImage::VulkanImage(VulkanImage&& other) noexcept allocator_(other.allocator_), memory_(std::move(other.memory_)), owns_memory_(other.owns_memory_), + memory_bundled_(other.memory_bundled_), owns_view_(other.owns_view_), is_copy_(other.is_copy_), handles_(other.handles_), @@ -232,12 +236,14 @@ VulkanImage::VulkanImage(VulkanImage&& other) noexcept other.handles_.image_view = VK_NULL_HANDLE; other.handles_.sampler = VK_NULL_HANDLE; other.owns_memory_ = false; + other.memory_bundled_ = false; } VulkanImage& VulkanImage::operator=(VulkanImage&& other) noexcept { VkImage tmp_image = handles_.image; VkImageView tmp_image_view = handles_.image_view; bool tmp_owns_memory = owns_memory_; + bool tmp_memory_bundled = memory_bundled_; device_ = other.device_; image_properties_ = other.image_properties_; @@ -246,6 +252,7 @@ VulkanImage& VulkanImage::operator=(VulkanImage&& other) noexcept { allocator_ = other.allocator_; memory_ = std::move(other.memory_); owns_memory_ = other.owns_memory_; + memory_bundled_ = other.memory_bundled_; is_copy_ = other.is_copy_; handles_ = other.handles_; layout_ = other.layout_; @@ -253,6 +260,7 @@ VulkanImage& VulkanImage::operator=(VulkanImage&& other) noexcept { other.handles_.image = tmp_image; other.handles_.image_view = tmp_image_view; other.owns_memory_ = tmp_owns_memory; + other.memory_bundled_ = tmp_memory_bundled; return *this; } @@ -271,14 +279,22 @@ VulkanImage::~VulkanImage() { if (handles_.image != VK_NULL_HANDLE) { if (owns_memory_) { - vmaDestroyImage(allocator_, handles_.image, memory_.allocation); + if (memory_bundled_) { + vmaDestroyImage(allocator_, handles_.image, memory_.allocation); + // Prevent the underlying memory allocation from being freed; it was + // freed by vmaDestroyImage + memory_.allocation = VK_NULL_HANDLE; + } else { + vkDestroyImage(this->device(), handles_.image, nullptr); + // Allow underlying memory allocation to be freed by the destructor of + // Allocation class + } } else { vkDestroyImage(this->device(), handles_.image, nullptr); + // Prevent the underlying memory allocation from being freed since this + // object doesn't own it + memory_.allocation = VK_NULL_HANDLE; } - // Prevent the underlying memory allocation from being freed; it was either - // freed by vmaDestroyImage, or this resource does not own the underlying - // memory - memory_.allocation = VK_NULL_HANDLE; } } @@ -319,6 +335,31 @@ void VulkanImage::create_image_view() { &(handles_.image_view))); } +void VulkanImage::bind_allocation_impl(const Allocation& memory) { + VK_CHECK_COND(!memory_, "Cannot bind an already bound allocation!"); + // To prevent multiple instances of binding the same VkImage to a memory + // block, do not actually bind memory if this VulkanImage is a copy. Assume + // that the original VulkanImage is responsible for binding the image. + if (!is_copy_) { + VK_CHECK(vmaBindImageMemory(allocator_, memory.allocation, handles_.image)); + } + + // Only create the image view if the image has been bound to memory + owns_view_ = true; + create_image_view(); +} + +void VulkanImage::bind_allocation(const Allocation& memory) { + bind_allocation_impl(memory); + memory_.allocation = memory.allocation; +} + +void VulkanImage::acquire_allocation(Allocation&& memory) { + bind_allocation_impl(memory); + memory_ = std::move(memory); + owns_memory_ = true; +} + VkMemoryRequirements VulkanImage::get_memory_requirements() const { VkMemoryRequirements memory_requirements; vkGetImageMemoryRequirements( diff --git a/backends/vulkan/runtime/vk_api/memory/Image.h b/backends/vulkan/runtime/vk_api/memory/Image.h index 5bbdaf06b47..db632c34378 100644 --- a/backends/vulkan/runtime/vk_api/memory/Image.h +++ b/backends/vulkan/runtime/vk_api/memory/Image.h @@ -156,6 +156,10 @@ class VulkanImage final { Allocation memory_; // Indicates whether the underlying memory is owned by this resource bool owns_memory_; + // Indicates whether the allocation for the image was created with the image + // via vmaCreateImage; if this is false, the memory is owned but was bound + // separately via vmaBindImageMemory + bool memory_bundled_; // In some cases, a VulkanImage may be a copy of another VulkanImage but still // own a unique view of the VkImage. bool owns_view_; @@ -242,21 +246,21 @@ class VulkanImage final { return (handles_.image == other.handles_.image) && is_copy_; } - inline void bind_allocation(const Allocation& memory) { - VK_CHECK_COND(!memory_, "Cannot bind an already bound allocation!"); - // To prevent multiple instances of binding the same VkImage to a memory - // block, do not actually bind memory if this VulkanImage is a copy. Assume - // that the original VulkanImage is responsible for binding the image. - if (!is_copy_) { - VK_CHECK( - vmaBindImageMemory(allocator_, memory.allocation, handles_.image)); - } - memory_.allocation = memory.allocation; - - // Only create the image view if the image has been bound to memory - owns_view_ = true; - create_image_view(); - } + private: + void bind_allocation_impl(const Allocation& memory); + + public: + /* + * Given a memory allocation, bind it to the underlying VkImage. The lifetime + * of the memory allocation is assumed to be managed externally. + */ + void bind_allocation(const Allocation& memory); + + /* + * Given a rvalue memory allocation, bind it to the underlying VkImage and + * also acquire ownership of the memory allocation. + */ + void acquire_allocation(Allocation&& memory); VkMemoryRequirements get_memory_requirements() const; diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 96adc13d3cd..9a857f41fde 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -1176,6 +1176,7 @@ TEST(VulkanComputeGraphTest, test_zero_dim_tensor) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); // Run graph @@ -1218,6 +1219,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_buffer) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); // Run graph @@ -1303,6 +1305,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); // Run graph @@ -1361,6 +1364,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_symint) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); // Run graph @@ -1519,6 +1523,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); graph.prepare(); + graph.prepack(); // +3: shared memory allocations for tensors expected_vma_allocation_count += 3; @@ -1659,6 +1664,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_tmp_tensors) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); // Run graph @@ -1725,6 +1731,7 @@ TEST(VulkanComputeGraphTest, test_large_graph) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); auto build_end_time = std::chrono::system_clock::now(); @@ -1801,6 +1808,7 @@ void test_clone( out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); fill_vtensor(graph, a, 0.0f, /*iota = */ true); @@ -1885,6 +1893,7 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_offset_node) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); fill_vtensor(graph, a, 0.0f, /*iota = */ true); @@ -1948,6 +1957,7 @@ TEST(VulkanComputeGraphTest, DISABLED_test_etvk_copy_channel_offset_node) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); fill_vtensor(graph, a, 0.0f, true); @@ -2038,6 +2048,7 @@ TEST( out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); float a_value = 1.0f; float b_value = 2.0f; @@ -2150,6 +2161,7 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_offset_int_node) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); fill_vtensor(graph, a, 0, /*iota = */ true); @@ -2213,6 +2225,7 @@ TEST(VulkanComputeGraphTest, DISABLED_test_etvk_copy_channel_offset_int_node) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); fill_vtensor(graph, a, 0.0f, true); @@ -2272,6 +2285,7 @@ TEST(VulkanComputeGraphTest, test_view_change_packing) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); + graph.prepack(); fill_vtensor(graph, in, 0.0, true); @@ -2430,6 +2444,7 @@ void compute_graph_round_trip_test( ValueRef r_staging_out = graph.set_output_tensor(r_tensor); graph.prepare(); + graph.prepack(); std::vector data_in(graph.numel_of(r_tensor)); for (int i = 0; i < data_in.size(); i++) { @@ -2620,7 +2635,6 @@ void test_mm( B, M, K, N, dtype, storage_type, memory_layout, mat2_data, prepack); graph.prepare(); - graph.prepack(); for (int i = 1; i < 4; i++) { @@ -2700,7 +2714,6 @@ void test_mm_with_resize_reencode( B, M, K, N, dtype, storage_type, memory_layout, mat2_data, false); graph.prepare(); - graph.prepack(); for (int i = 1; i < 4; i++) { @@ -3122,7 +3135,6 @@ void test_dynamic_dispatch(int M, int N) { ComputeGraph graph = build_dynamic_dispatch_test_graph(M, N); graph.prepare(); - graph.prepack(); for (int i = 1; i < 4; i++) {