From c5677ff9000aee8ccfafceabe3caf7d489a08c4f Mon Sep 17 00:00:00 2001 From: SS-JIA Date: Mon, 18 Aug 2025 11:16:24 -0400 Subject: [PATCH] [ET-VK] Runtime support for NamedDataMap Summary: Allow VulkanBackend to load constant tensors from the NamedDataMap instead of the constant data section of the delegate blob. ## Motivation This enables several key results: * Unblocks delegate retargetability with other backends * Allows reducing peak memory usage when loading models by freeing constant weight data as it gets moved to the GPU ## Changes * Allow `TensorRef` to be constructed with a `FreeableBuffer` rvalue * Add ability to load constant data from `NamedDataMap` in `VulkanBackend.cpp` * When prepacking, free the constant data pointer once it's been copied to the staging buffer Test Plan: CI Validate results by collecting memory measurements in the next few diffs. [ghstack-poisoned] --- backends/vulkan/runtime/VulkanBackend.cpp | 67 ++++++++++++++++--- .../vulkan/runtime/graph/ComputeGraph.cpp | 11 +++ backends/vulkan/runtime/graph/ComputeGraph.h | 10 +++ backends/vulkan/runtime/graph/Logging.cpp | 2 +- .../runtime/graph/containers/Constant.cpp | 17 ++++- .../runtime/graph/containers/Constant.h | 17 +++++ .../vulkan/runtime/graph/ops/PrepackNode.cpp | 3 + backends/vulkan/serialization/schema.fbs | 1 + .../serialization/vulkan_graph_schema.py | 1 + 9 files changed, 118 insertions(+), 11 deletions(-) diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index 73b726bd32e..038f6935ff7 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -22,6 +22,7 @@ #include #endif // ET_EVENT_TRACER_ENABLED #include +#include #include #include @@ -47,6 +48,7 @@ using executorch::runtime::Error; using executorch::runtime::EValue; using executorch::runtime::FreeableBuffer; using executorch::runtime::kTensorDimensionLimit; +using executorch::runtime::NamedDataMap; using executorch::runtime::Result; using executorch::runtime::Span; @@ -69,8 +71,29 @@ using UIntVector = const flatbuffers::Vector*; const uint8_t* get_constant_data_ptr( VkGraphPtr flatbuffer_graph, const int32_t buffer_idx, - const uint8_t* constant_data) { + const uint8_t* constant_data, + const NamedDataMap* named_data_map, + std::vector& loaded_buffers_from_map) { VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx); + + // Check if there's a named key for this constant data + if (constant_bytes->named_key() != nullptr && named_data_map != nullptr) { + const std::string& data_name = constant_bytes->named_key()->str(); + Result buffer = named_data_map->get_data(data_name.c_str()); + if (!buffer.ok()) { + ET_LOG( + Error, + "Failed to get constant data for key %s from named_data_map. Error code: %u", + data_name.c_str(), + static_cast(buffer.error())); + return nullptr; + } + const uint8_t* data_ptr = static_cast(buffer.get().data()); + loaded_buffers_from_map.push_back(std::move(buffer.get())); + return data_ptr; + } + + // Fallback to offset-based access return constant_data + constant_bytes->offset(); } @@ -166,6 +189,8 @@ class GraphBuilder { ComputeGraph* compute_graph_; VkGraphPtr flatbuffer_; const uint8_t* constant_data_; + const NamedDataMap* named_data_map_; + std::vector loaded_buffers_from_map_; std::vector ref_mapping_; @@ -173,10 +198,13 @@ class GraphBuilder { explicit GraphBuilder( ComputeGraph* compute_graph, VkGraphPtr flatbuffer, - const uint8_t* constant_data) + const uint8_t* constant_data, + const NamedDataMap* named_data_map) : compute_graph_(compute_graph), flatbuffer_(flatbuffer), constant_data_(constant_data), + named_data_map_(named_data_map), + loaded_buffers_from_map_(), ref_mapping_() {} void resize(uint32_t size) { @@ -212,10 +240,27 @@ class GraphBuilder { ValueRef ref; if (tensor_fb->constant_id() >= 0) { - const uint8_t* tensor_data = get_constant_data_ptr( - flatbuffer_, tensor_fb->constant_id(), constant_data_); + VkBytesPtr constant_bytes = + flatbuffer_->constants()->Get(tensor_fb->constant_id()); + + if (constant_bytes->named_key() != nullptr && + constant_bytes->offset() == UINT64_MAX && + named_data_map_ != nullptr) { + const std::string& data_name = constant_bytes->named_key()->str(); + Result buffer = + named_data_map_->get_data(data_name.c_str()); - ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data); + VK_CHECK_COND( + buffer.ok(), + "Failed to get constant data for key %s from named_data_map. Error code: %u", + data_name.c_str(), + static_cast(buffer.error())); + ref = compute_graph_->add_tensorref( + dims_vector, dtype, std::move(buffer.get())); + } else { + const uint8_t* tensor_data = constant_data_ + constant_bytes->offset(); + ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data); + } } else { ref = compute_graph_->add_tensor( dims_vector, @@ -479,8 +524,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { return true; } - ET_NODISCARD Error - compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const { + ET_NODISCARD Error compileModel( + const void* buffer_pointer, + ComputeGraph* compute_graph, + const NamedDataMap* named_data_map) const { Result header = VulkanDelegateHeader::parse(buffer_pointer); @@ -506,7 +553,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data); - GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data); + GraphBuilder builder( + compute_graph, flatbuffer_graph, constant_data, named_data_map); builder.build_graph(); @@ -532,7 +580,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { graph_config.external_adapter = vkapi::set_and_get_external_adapter(); new (compute_graph) ComputeGraph(graph_config); - Error err = compileModel(processed->data(), compute_graph); + const NamedDataMap* named_data_map = context.get_named_data_map(); + Error err = compileModel(processed->data(), compute_graph, named_data_map); // This backend does not need its processed data after compiling the // model. diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index acd20c9ee44..65ce71a9ce6 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -480,6 +480,17 @@ ValueRef ComputeGraph::add_tensorref( return idx; } +ValueRef ComputeGraph::add_tensorref( + const std::vector& sizes, + const vkapi::ScalarType dtype, + executorch::runtime::FreeableBuffer&& buffer) { + ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); + values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer))); + total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes(); + return idx; +} + ValueRef ComputeGraph::add_staging( const vkapi::ScalarType dtype, const size_t numel) { diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index e4556a9efe6..f594571f9a7 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -693,6 +693,16 @@ class ComputeGraph final { const vkapi::ScalarType dtype, const void* const data); + /* + * Add a `TensorRef` value to the graph with the specific properties. A + * `TensorRef` is a reference to a `api::vTensor` whose data is stored in a + * FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer. + */ + ValueRef add_tensorref( + const std::vector& sizes, + const vkapi::ScalarType dtype, + executorch::runtime::FreeableBuffer&& buffer); + /* * Add a staging buffer to the graph. Staging buffers are data buffers that * use memory that is visible to both the CPU and GPU, and therefore is used diff --git a/backends/vulkan/runtime/graph/Logging.cpp b/backends/vulkan/runtime/graph/Logging.cpp index 7102345773c..081083e3a63 100644 --- a/backends/vulkan/runtime/graph/Logging.cpp +++ b/backends/vulkan/runtime/graph/Logging.cpp @@ -86,7 +86,7 @@ void ComputeGraph::print_readable() { ss << v_tensor.sizes(); std::cout << ss.str(); } else if (val.isTensorRef()) { - const TensorRef tensor_ref = val.toTensorRef(); + const TensorRef& tensor_ref = val.toTensorRef(); std::stringstream ss; ss << tensor_ref.sizes; std::cout << ss.str(); diff --git a/backends/vulkan/runtime/graph/containers/Constant.cpp b/backends/vulkan/runtime/graph/containers/Constant.cpp index cb43295a42a..4dc2cdda8f5 100644 --- a/backends/vulkan/runtime/graph/containers/Constant.cpp +++ b/backends/vulkan/runtime/graph/containers/Constant.cpp @@ -14,7 +14,22 @@ TensorRef::TensorRef( const std::vector& t_sizes, vkapi::ScalarType t_dtype, const void* const t_data) - : sizes{}, dtype{t_dtype}, data{t_data} { + : sizes{}, dtype{t_dtype}, data{t_data}, buffer{} { + size_t ndim = t_sizes.size(); + sizes.resize(ndim); + for (int i = 0; i < ndim; ++i) { + sizes[i] = t_sizes.at(i); + } +} + +TensorRef::TensorRef( + const std::vector& t_sizes, + vkapi::ScalarType t_dtype, + executorch::runtime::FreeableBuffer&& t_buffer) + : sizes{}, + dtype{t_dtype}, + data{t_buffer.data()}, + buffer{std::move(t_buffer)} { size_t ndim = t_sizes.size(); sizes.resize(ndim); for (int i = 0; i < ndim; ++i) { diff --git a/backends/vulkan/runtime/graph/containers/Constant.h b/backends/vulkan/runtime/graph/containers/Constant.h index aaa92360a9e..a18c284a219 100644 --- a/backends/vulkan/runtime/graph/containers/Constant.h +++ b/backends/vulkan/runtime/graph/containers/Constant.h @@ -9,6 +9,7 @@ #pragma once #include +#include namespace vkcompute { @@ -24,14 +25,30 @@ struct TensorRef final { vkapi::ScalarType dtype; const void* data; + // Optional FreeableBuffer for managing memory lifecycle + // This will be empty (default constructed) for the raw pointer constructor + executorch::runtime::FreeableBuffer buffer; + explicit TensorRef( const std::vector& t_sizes, vkapi::ScalarType t_dtype, const void* const t_data); + // Constructor that takes ownership of a FreeableBuffer + explicit TensorRef( + const std::vector& t_sizes, + vkapi::ScalarType t_dtype, + executorch::runtime::FreeableBuffer&& t_buffer); + inline size_t nbytes() const { return utils::multiply_integers(sizes) * vkapi::element_size(dtype); } + + // Manually free the buffer if needed (though it will be freed automatically + // on destruction) + void free_buffer() { + buffer.Free(); + } }; } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index c8220df837b..03df92292f8 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -64,6 +64,9 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t()); size_t nbytes = numel * vkapi::element_size(tref->dtype); staging.copy_from(tref->data, nbytes); + // Once the staging buffer is copied, if the TensorRef owns a FreeableBuffer, + // it can be freed. + tref->free_buffer(); return staging; } diff --git a/backends/vulkan/serialization/schema.fbs b/backends/vulkan/serialization/schema.fbs index 99ba6a86594..b6670b6f53d 100644 --- a/backends/vulkan/serialization/schema.fbs +++ b/backends/vulkan/serialization/schema.fbs @@ -118,6 +118,7 @@ table VkValue { table VkBytes { offset:ulong; length:ulong; + named_key:string; } table VkGraph { diff --git a/backends/vulkan/serialization/vulkan_graph_schema.py b/backends/vulkan/serialization/vulkan_graph_schema.py index f845e5601a7..aa7641bd927 100644 --- a/backends/vulkan/serialization/vulkan_graph_schema.py +++ b/backends/vulkan/serialization/vulkan_graph_schema.py @@ -137,6 +137,7 @@ class VkValue: class VkBytes: offset: int length: int + named_key: str = "" @dataclass