diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index 73b726bd32e..7b138072d50 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -22,6 +22,7 @@ #include #endif // ET_EVENT_TRACER_ENABLED #include +#include #include #include @@ -47,6 +48,7 @@ using executorch::runtime::Error; using executorch::runtime::EValue; using executorch::runtime::FreeableBuffer; using executorch::runtime::kTensorDimensionLimit; +using executorch::runtime::NamedDataMap; using executorch::runtime::Result; using executorch::runtime::Span; @@ -66,14 +68,6 @@ using BytesVector = const flatbuffers::Vector>*; using UIntVector = const flatbuffers::Vector*; -const uint8_t* get_constant_data_ptr( - VkGraphPtr flatbuffer_graph, - const int32_t buffer_idx, - const uint8_t* constant_data) { - VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx); - return constant_data + constant_bytes->offset(); -} - vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) { switch (vk_datatype) { case vkgraph::VkDataType::BOOL: @@ -166,6 +160,8 @@ class GraphBuilder { ComputeGraph* compute_graph_; VkGraphPtr flatbuffer_; const uint8_t* constant_data_; + const NamedDataMap* named_data_map_; + std::vector loaded_buffers_from_map_; std::vector ref_mapping_; @@ -173,10 +169,13 @@ class GraphBuilder { explicit GraphBuilder( ComputeGraph* compute_graph, VkGraphPtr flatbuffer, - const uint8_t* constant_data) + const uint8_t* constant_data, + const NamedDataMap* named_data_map) : compute_graph_(compute_graph), flatbuffer_(flatbuffer), constant_data_(constant_data), + named_data_map_(named_data_map), + loaded_buffers_from_map_(), ref_mapping_() {} void resize(uint32_t size) { @@ -212,10 +211,27 @@ class GraphBuilder { ValueRef ref; if (tensor_fb->constant_id() >= 0) { - const uint8_t* tensor_data = get_constant_data_ptr( - flatbuffer_, tensor_fb->constant_id(), constant_data_); + VkBytesPtr constant_bytes = + flatbuffer_->constants()->Get(tensor_fb->constant_id()); - ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data); + if (constant_bytes->named_key() != nullptr && + constant_bytes->offset() == UINT64_MAX && + named_data_map_ != nullptr) { + const std::string& data_name = constant_bytes->named_key()->str(); + Result buffer = + named_data_map_->get_data(data_name.c_str()); + + VK_CHECK_COND( + buffer.ok(), + "Failed to get constant data for key %s from named_data_map. Error code: %u", + data_name.c_str(), + static_cast(buffer.error())); + ref = compute_graph_->add_tensorref( + dims_vector, dtype, std::move(buffer.get())); + } else { + const uint8_t* tensor_data = constant_data_ + constant_bytes->offset(); + ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data); + } } else { ref = compute_graph_->add_tensor( dims_vector, @@ -479,8 +495,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { return true; } - ET_NODISCARD Error - compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const { + ET_NODISCARD Error compileModel( + const void* buffer_pointer, + ComputeGraph* compute_graph, + const NamedDataMap* named_data_map) const { Result header = VulkanDelegateHeader::parse(buffer_pointer); @@ -506,7 +524,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data); - GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data); + GraphBuilder builder( + compute_graph, flatbuffer_graph, constant_data, named_data_map); builder.build_graph(); @@ -532,7 +551,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { graph_config.external_adapter = vkapi::set_and_get_external_adapter(); new (compute_graph) ComputeGraph(graph_config); - Error err = compileModel(processed->data(), compute_graph); + const NamedDataMap* named_data_map = context.get_named_data_map(); + Error err = compileModel(processed->data(), compute_graph, named_data_map); // This backend does not need its processed data after compiling the // model. diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index acd20c9ee44..d57ba2b11d7 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -480,6 +480,17 @@ ValueRef ComputeGraph::add_tensorref( return idx; } +ValueRef ComputeGraph::add_tensorref( + const std::vector& sizes, + const vkapi::ScalarType dtype, + executorch::runtime::FreeableBuffer&& buffer) { + ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); + values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer))); + total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes(); + return idx; +} + ValueRef ComputeGraph::add_staging( const vkapi::ScalarType dtype, const size_t numel) { @@ -813,25 +824,8 @@ void ComputeGraph::prepare() { context_->initialize_querypool(); } - for (SharedObject& shared_object : shared_objects_) { - shared_object.allocate(this); - shared_object.bind_users(this); - } -} - -void ComputeGraph::prepare_pipelines() { - for (std::unique_ptr& node : prepack_nodes_) { - node->prepare_pipelines(this); - } - for (std::unique_ptr& node : execute_nodes_) { - node->prepare_pipelines(this); - } - context_->pipeline_cache().create_pipelines(pipeline_descriptors_); - - pipeline_descriptors_ = std::unordered_set< - vkapi::ComputePipelineCache::Key, - vkapi::ComputePipelineCache::Hasher>(); - + // Calculate the threshold at which a new command buffer should be created + // during execute() const size_t total_node_count = execute_nodes_.size(); size_t init_threshold = config_.execute_initial_threshold_node_count; size_t count_threshold = config_.execute_threshold_node_count; @@ -858,6 +852,25 @@ void ComputeGraph::prepare_pipelines() { } execute_threshold_node_count_ = count_threshold; + + for (SharedObject& shared_object : shared_objects_) { + shared_object.allocate(this); + shared_object.bind_users(this); + } +} + +void ComputeGraph::prepare_pipelines() { + for (std::unique_ptr& node : prepack_nodes_) { + node->prepare_pipelines(this); + } + for (std::unique_ptr& node : execute_nodes_) { + node->prepare_pipelines(this); + } + context_->pipeline_cache().create_pipelines(pipeline_descriptors_); + + pipeline_descriptors_ = std::unordered_set< + vkapi::ComputePipelineCache::Key, + vkapi::ComputePipelineCache::Hasher>(); } void ComputeGraph::submit_current_cmd(const bool final_use) { diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index e4556a9efe6..f594571f9a7 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -693,6 +693,16 @@ class ComputeGraph final { const vkapi::ScalarType dtype, const void* const data); + /* + * Add a `TensorRef` value to the graph with the specific properties. A + * `TensorRef` is a reference to a `api::vTensor` whose data is stored in a + * FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer. + */ + ValueRef add_tensorref( + const std::vector& sizes, + const vkapi::ScalarType dtype, + executorch::runtime::FreeableBuffer&& buffer); + /* * Add a staging buffer to the graph. Staging buffers are data buffers that * use memory that is visible to both the CPU and GPU, and therefore is used diff --git a/backends/vulkan/runtime/graph/Logging.cpp b/backends/vulkan/runtime/graph/Logging.cpp index 7102345773c..081083e3a63 100644 --- a/backends/vulkan/runtime/graph/Logging.cpp +++ b/backends/vulkan/runtime/graph/Logging.cpp @@ -86,7 +86,7 @@ void ComputeGraph::print_readable() { ss << v_tensor.sizes(); std::cout << ss.str(); } else if (val.isTensorRef()) { - const TensorRef tensor_ref = val.toTensorRef(); + const TensorRef& tensor_ref = val.toTensorRef(); std::stringstream ss; ss << tensor_ref.sizes; std::cout << ss.str(); diff --git a/backends/vulkan/runtime/graph/containers/Constant.cpp b/backends/vulkan/runtime/graph/containers/Constant.cpp index cb43295a42a..4dc2cdda8f5 100644 --- a/backends/vulkan/runtime/graph/containers/Constant.cpp +++ b/backends/vulkan/runtime/graph/containers/Constant.cpp @@ -14,7 +14,22 @@ TensorRef::TensorRef( const std::vector& t_sizes, vkapi::ScalarType t_dtype, const void* const t_data) - : sizes{}, dtype{t_dtype}, data{t_data} { + : sizes{}, dtype{t_dtype}, data{t_data}, buffer{} { + size_t ndim = t_sizes.size(); + sizes.resize(ndim); + for (int i = 0; i < ndim; ++i) { + sizes[i] = t_sizes.at(i); + } +} + +TensorRef::TensorRef( + const std::vector& t_sizes, + vkapi::ScalarType t_dtype, + executorch::runtime::FreeableBuffer&& t_buffer) + : sizes{}, + dtype{t_dtype}, + data{t_buffer.data()}, + buffer{std::move(t_buffer)} { size_t ndim = t_sizes.size(); sizes.resize(ndim); for (int i = 0; i < ndim; ++i) { diff --git a/backends/vulkan/runtime/graph/containers/Constant.h b/backends/vulkan/runtime/graph/containers/Constant.h index aaa92360a9e..a18c284a219 100644 --- a/backends/vulkan/runtime/graph/containers/Constant.h +++ b/backends/vulkan/runtime/graph/containers/Constant.h @@ -9,6 +9,7 @@ #pragma once #include +#include namespace vkcompute { @@ -24,14 +25,30 @@ struct TensorRef final { vkapi::ScalarType dtype; const void* data; + // Optional FreeableBuffer for managing memory lifecycle + // This will be empty (default constructed) for the raw pointer constructor + executorch::runtime::FreeableBuffer buffer; + explicit TensorRef( const std::vector& t_sizes, vkapi::ScalarType t_dtype, const void* const t_data); + // Constructor that takes ownership of a FreeableBuffer + explicit TensorRef( + const std::vector& t_sizes, + vkapi::ScalarType t_dtype, + executorch::runtime::FreeableBuffer&& t_buffer); + inline size_t nbytes() const { return utils::multiply_integers(sizes) * vkapi::element_size(dtype); } + + // Manually free the buffer if needed (though it will be freed automatically + // on destruction) + void free_buffer() { + buffer.Free(); + } }; } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index c8220df837b..03df92292f8 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -64,6 +64,9 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t()); size_t nbytes = numel * vkapi::element_size(tref->dtype); staging.copy_from(tref->data, nbytes); + // Once the staging buffer is copied, if the TensorRef owns a FreeableBuffer, + // it can be freed. + tref->free_buffer(); return staging; } diff --git a/backends/vulkan/serialization/schema.fbs b/backends/vulkan/serialization/schema.fbs index 99ba6a86594..b6670b6f53d 100644 --- a/backends/vulkan/serialization/schema.fbs +++ b/backends/vulkan/serialization/schema.fbs @@ -118,6 +118,7 @@ table VkValue { table VkBytes { offset:ulong; length:ulong; + named_key:string; } table VkGraph { diff --git a/backends/vulkan/serialization/vulkan_graph_schema.py b/backends/vulkan/serialization/vulkan_graph_schema.py index f845e5601a7..aa7641bd927 100644 --- a/backends/vulkan/serialization/vulkan_graph_schema.py +++ b/backends/vulkan/serialization/vulkan_graph_schema.py @@ -137,6 +137,7 @@ class VkValue: class VkBytes: offset: int length: int + named_key: str = "" @dataclass diff --git a/backends/vulkan/targets.bzl b/backends/vulkan/targets.bzl index ac26d202fe1..b9b96abdec4 100644 --- a/backends/vulkan/targets.bzl +++ b/backends/vulkan/targets.bzl @@ -263,6 +263,7 @@ def define_common_targets(is_fbcode = False): ], exported_deps = [ ":vulkan_graph_runtime_shaderlib{}".format(suffix), + "//executorch/runtime/backend:interface", ], define_static_target = True, # Static initialization is used to register operators to the global operator registry, @@ -303,8 +304,8 @@ def define_common_targets(is_fbcode = False): ":vulkan_graph_runtime{}".format(suffix), "//executorch/backends/vulkan/serialization:vk_delegate_schema", "//executorch/runtime/core:event_tracer", - "//executorch/runtime/backend:interface", "//executorch/runtime/core/exec_aten/util:tensor_util", + "//executorch/runtime/core:named_data_map", ], define_static_target = True, # VulkanBackend.cpp needs to compile with executor as whole diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index f99552ceee1..96adc13d3cd 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -1036,12 +1036,12 @@ TEST_F(VulkanComputeAPITest, print_object_sizes) { // Current known size on 64 bit system: 1040 B EXPECT_TRUE(sizeof(vTensor) < 1200); - // Current known size on 64 bit system: 48 B - EXPECT_TRUE(sizeof(Value) < 56); + // Current known size on 64 bit system: 80 B + EXPECT_TRUE(sizeof(Value) < 100); // Current known size on 64 bit system: 120 B EXPECT_TRUE(sizeof(StagingBuffer) < 500); - // Current known size on 64 bit system: 512 B - EXPECT_TRUE(sizeof(ComputeGraph) < 600); + // Current known size on 64 bit system: 608 B + EXPECT_TRUE(sizeof(ComputeGraph) < 700); // Current known size on 64 bit system: 248 B EXPECT_TRUE(sizeof(DispatchNode) < 500); }