pytorch · SS-JIA · Aug 19, 2025 · Aug 18, 2025 · Aug 18, 2025 · Aug 18, 2025
@@ -22,6 +22,7 @@
 #include <executorch/runtime/core/event_tracer_hooks_delegate.h>
 #endif // ET_EVENT_TRACER_ENABLED
 #include <executorch/runtime/core/exec_aten/util/tensor_util.h>
+#include <executorch/runtime/core/named_data_map.h>
 #include <executorch/runtime/platform/compiler.h>
 #include <executorch/runtime/platform/profiler.h>
 
@@ -47,6 +48,7 @@ using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::kTensorDimensionLimit;
+using executorch::runtime::NamedDataMap;
 using executorch::runtime::Result;
 using executorch::runtime::Span;
 
@@ -66,14 +68,6 @@ using BytesVector =
     const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkBytes>>*;
 using UIntVector = const flatbuffers::Vector<uint32_t>*;
 
-const uint8_t* get_constant_data_ptr(
-    VkGraphPtr flatbuffer_graph,
-    const int32_t buffer_idx,
-    const uint8_t* constant_data) {
-  VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx);
-  return constant_data + constant_bytes->offset();
-}
-
 vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
   switch (vk_datatype) {
     case vkgraph::VkDataType::BOOL:
@@ -166,17 +160,22 @@ class GraphBuilder {
   ComputeGraph* compute_graph_;
   VkGraphPtr flatbuffer_;
   const uint8_t* constant_data_;
+  const NamedDataMap* named_data_map_;
+  std::vector<FreeableBuffer> loaded_buffers_from_map_;
 
   std::vector<ValueRef> ref_mapping_;
 
  public:
   explicit GraphBuilder(
       ComputeGraph* compute_graph,
       VkGraphPtr flatbuffer,
-      const uint8_t* constant_data)
+      const uint8_t* constant_data,
+      const NamedDataMap* named_data_map)
       : compute_graph_(compute_graph),
         flatbuffer_(flatbuffer),
         constant_data_(constant_data),
+        named_data_map_(named_data_map),
+        loaded_buffers_from_map_(),
         ref_mapping_() {}
 
   void resize(uint32_t size) {
@@ -212,10 +211,27 @@ class GraphBuilder {
 
     ValueRef ref;
     if (tensor_fb->constant_id() >= 0) {
-      const uint8_t* tensor_data = get_constant_data_ptr(
-          flatbuffer_, tensor_fb->constant_id(), constant_data_);
+      VkBytesPtr constant_bytes =
+          flatbuffer_->constants()->Get(tensor_fb->constant_id());
 
-      ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
+      if (constant_bytes->named_key() != nullptr &&
+          constant_bytes->offset() == UINT64_MAX &&
+          named_data_map_ != nullptr) {
+        const std::string& data_name = constant_bytes->named_key()->str();
+        Result<FreeableBuffer> buffer =
+            named_data_map_->get_data(data_name.c_str());
+
+        VK_CHECK_COND(
+            buffer.ok(),
+            "Failed to get constant data for key %s from named_data_map. Error code: %u",
+            data_name.c_str(),
+            static_cast<uint32_t>(buffer.error()));
+        ref = compute_graph_->add_tensorref(
+            dims_vector, dtype, std::move(buffer.get()));
+      } else {
+        const uint8_t* tensor_data = constant_data_ + constant_bytes->offset();
+        ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
+      }
     } else {
       ref = compute_graph_->add_tensor(
           dims_vector,
@@ -479,8 +495,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     return true;
   }
 
-  ET_NODISCARD Error
-  compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const {
+  ET_NODISCARD Error compileModel(
+      const void* buffer_pointer,
+      ComputeGraph* compute_graph,
+      const NamedDataMap* named_data_map) const {
     Result<VulkanDelegateHeader> header =
         VulkanDelegateHeader::parse(buffer_pointer);
 
@@ -506,7 +524,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
 
     VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data);
 
-    GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data);
+    GraphBuilder builder(
+        compute_graph, flatbuffer_graph, constant_data, named_data_map);
 
     builder.build_graph();
 
@@ -532,7 +551,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     graph_config.external_adapter = vkapi::set_and_get_external_adapter();
     new (compute_graph) ComputeGraph(graph_config);
 
-    Error err = compileModel(processed->data(), compute_graph);
+    const NamedDataMap* named_data_map = context.get_named_data_map();
+    Error err = compileModel(processed->data(), compute_graph, named_data_map);
 
     // This backend does not need its processed data after compiling the
     // model.

@@ -897,6 +897,16 @@ VkMemoryRequirements vTensor::get_memory_requirements() const {
   return {};
 }
 
+bool vTensor::memory_is_bound() const {
+  switch (storage_type()) {
+    case utils::kBuffer:
+      return storage_->buffer_.has_memory();
+    case utils::kTexture2D:
+    case utils::kTexture3D:
+      return storage_->image_.has_memory();
+  }
+}
+
 void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
   switch (storage_type()) {
     case utils::kBuffer:
@@ -909,6 +919,18 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
   }
 }
 
+void vTensor::acquire_allocation(vkapi::Allocation&& allocation) {
+  switch (storage_type()) {
+    case utils::kBuffer:
+      storage_->buffer_.acquire_allocation(std::move(allocation));
+      break;
+    case utils::kTexture2D:
+    case utils::kTexture3D:
+      storage_->image_.acquire_allocation(std::move(allocation));
+      break;
+  }
+}
+
 void vTensor::update_metadata() {
   numel_ = utils::multiply_integers(sizes_);
   strides_ = calculate_strides(sizes_, dim_order_);

@@ -560,6 +560,12 @@ class vTensor final {
    */
   VmaAllocationCreateInfo get_allocation_create_info() const;
 
+  /*
+   * Checks if the tensor's underlying buffer or image resource is bound to a
+   * memory allocation.
+   */
+  bool memory_is_bound() const;
+
   /*
    * Return the VkMemoryRequirements of the underlying resource
    */
@@ -570,6 +576,11 @@ class vTensor final {
    */
   void bind_allocation(const vkapi::Allocation& allocation);
 
+  /*
+   * Binds and acquires a rvalue memory allocation
+   */
+  void acquire_allocation(vkapi::Allocation&& allocation);
+
  private:
   /*
    * Assuming sizes, dim order, or axis mapping was modified, recompute all

@@ -356,8 +356,6 @@ ValueRef ComputeGraph::add_tensor(
     const utils::GPUMemoryLayout memory_layout,
     const int64_t shared_object_idx,
     const utils::AxisMapLayout axis_map_layout) {
-  bool allocate_memory = shared_object_idx < 0;
-
   ValueRef idx(static_cast<int>(values_.size()));
   check_no_active_value_ptrs();
   values_.emplace_back(api::vTensor(
@@ -366,10 +364,10 @@ ValueRef ComputeGraph::add_tensor(
       dtype,
       storage_type,
       memory_layout,
-      allocate_memory,
+      false,
       axis_map_layout));
 
-  if (!allocate_memory) {
+  if (shared_object_idx >= 0) {
     get_shared_object(shared_object_idx).add_user(this, idx);
   }
   return idx;
@@ -480,6 +478,17 @@ ValueRef ComputeGraph::add_tensorref(
   return idx;
 }
 
+ValueRef ComputeGraph::add_tensorref(
+    const std::vector<int64_t>& sizes,
+    const vkapi::ScalarType dtype,
+    executorch::runtime::FreeableBuffer&& buffer) {
+  ValueRef idx(static_cast<int>(values_.size()));
+  check_no_active_value_ptrs();
+  values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer)));
+  total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes();
+  return idx;
+}
+
 ValueRef ComputeGraph::add_staging(
     const vkapi::ScalarType dtype,
     const size_t numel) {
@@ -615,6 +624,17 @@ SharedObject& ComputeGraph::get_shared_object(const int64_t idx) {
   return shared_objects_.at(idx);
 }
 
+void ComputeGraph::create_dedicated_allocation_for(const ValueRef idx) {
+  vTensorPtr tensor = get_tensor(idx);
+  if (!tensor->memory_is_bound()) {
+    VmaAllocationCreateInfo alloc_create_info =
+        context()->adapter_ptr()->vma().gpuonly_resource_create_info();
+    tensor->acquire_allocation(
+        context()->adapter_ptr()->vma().create_allocation(
+            tensor->get_memory_requirements(), alloc_create_info));
+  }
+}
+
 void ComputeGraph::update_descriptor_counts(
     const vkapi::ShaderInfo& shader_info,
     bool execute) {
@@ -813,25 +833,8 @@ void ComputeGraph::prepare() {
     context_->initialize_querypool();
   }
 
-  for (SharedObject& shared_object : shared_objects_) {
-    shared_object.allocate(this);
-    shared_object.bind_users(this);
-  }
-}
-
-void ComputeGraph::prepare_pipelines() {
-  for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
-    node->prepare_pipelines(this);
-  }
-  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
-    node->prepare_pipelines(this);
-  }
-  context_->pipeline_cache().create_pipelines(pipeline_descriptors_);
-
-  pipeline_descriptors_ = std::unordered_set<
-      vkapi::ComputePipelineCache::Key,
-      vkapi::ComputePipelineCache::Hasher>();
-
+  // Calculate the threshold at which a new command buffer should be created
+  // during execute()
   const size_t total_node_count = execute_nodes_.size();
   size_t init_threshold = config_.execute_initial_threshold_node_count;
   size_t count_threshold = config_.execute_threshold_node_count;
@@ -860,6 +863,20 @@ void ComputeGraph::prepare_pipelines() {
   execute_threshold_node_count_ = count_threshold;
 }
 
+void ComputeGraph::prepare_pipelines() {
+  for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
+    node->prepare_pipelines(this);
+  }
+  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
+    node->prepare_pipelines(this);
+  }
+  context_->pipeline_cache().create_pipelines(pipeline_descriptors_);
+
+  pipeline_descriptors_ = std::unordered_set<
+      vkapi::ComputePipelineCache::Key,
+      vkapi::ComputePipelineCache::Hasher>();
+}
+
 void ComputeGraph::submit_current_cmd(const bool final_use) {
   context_->submit_cmd_to_gpu(VK_NULL_HANDLE, final_use);
 }
@@ -939,6 +956,18 @@ void ComputeGraph::prepack() {
   submit_current_cmd_and_wait(/*final_use=*/true);
   context_->flush();
   staging_nbytes_in_cmd_ = 0;
+
+  // Initialize allocations for intermediate tensors
+  for (SharedObject& shared_object : shared_objects_) {
+    shared_object.allocate(this);
+    shared_object.bind_users(this);
+  }
+  // Make sure all remaining tensors have allocations
+  for (int i = 0; i < values_.size(); i++) {
+    if (values_.at(i).isTensor()) {
+      create_dedicated_allocation_for(i);
+    }
+  }
 }
 
 void ComputeGraph::execute() {

@@ -693,6 +693,16 @@ class ComputeGraph final {
       const vkapi::ScalarType dtype,
       const void* const data);
 
+  /*
+   * Add a `TensorRef` value to the graph with the specific properties. A
+   * `TensorRef` is a reference to a `api::vTensor` whose data is stored in a
+   * FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer.
+   */
+  ValueRef add_tensorref(
+      const std::vector<int64_t>& sizes,
+      const vkapi::ScalarType dtype,
+      executorch::runtime::FreeableBuffer&& buffer);
+
   /*
    * Add a staging buffer to the graph. Staging buffers are data buffers that
    * use memory that is visible to both the CPU and GPU, and therefore is used
@@ -817,6 +827,13 @@ class ComputeGraph final {
 
   SharedObject& get_shared_object(const int64_t idx);
 
+  /*
+   * Creates a dedicated memory allocation for a vTensor value, and have the
+   * tensor acquire the allocation object. If the tensor is already bound to a
+   * memory allocation, this function will be a no-op.
+   */
+  void create_dedicated_allocation_for(const ValueRef idx);
+
   //
   // Graph Preparation
   //

@@ -86,7 +86,7 @@ void ComputeGraph::print_readable() {
       ss << v_tensor.sizes();
       std::cout << ss.str();
     } else if (val.isTensorRef()) {
-      const TensorRef tensor_ref = val.toTensorRef();
+      const TensorRef& tensor_ref = val.toTensorRef();
       std::stringstream ss;
       ss << tensor_ref.sizes;
       std::cout << ss.str();

@@ -14,7 +14,22 @@ TensorRef::TensorRef(
     const std::vector<int64_t>& t_sizes,
     vkapi::ScalarType t_dtype,
     const void* const t_data)
-    : sizes{}, dtype{t_dtype}, data{t_data} {
+    : sizes{}, dtype{t_dtype}, data{t_data}, buffer{} {
+  size_t ndim = t_sizes.size();
+  sizes.resize(ndim);
+  for (int i = 0; i < ndim; ++i) {
+    sizes[i] = t_sizes.at(i);
+  }
+}
+
+TensorRef::TensorRef(
+    const std::vector<int64_t>& t_sizes,
+    vkapi::ScalarType t_dtype,
+    executorch::runtime::FreeableBuffer&& t_buffer)
+    : sizes{},
+      dtype{t_dtype},
+      data{t_buffer.data()},
+      buffer{std::move(t_buffer)} {
   size_t ndim = t_sizes.size();
   sizes.resize(ndim);
   for (int i = 0; i < ndim; ++i) {