pytorch · SS-JIA · Aug 19, 2025 · Aug 18, 2025 · Aug 18, 2025 · Aug 18, 2025
@@ -22,6 +22,7 @@
 #include <executorch/runtime/core/event_tracer_hooks_delegate.h>
 #endif // ET_EVENT_TRACER_ENABLED
 #include <executorch/runtime/core/exec_aten/util/tensor_util.h>
+#include <executorch/runtime/core/named_data_map.h>
 #include <executorch/runtime/platform/compiler.h>
 #include <executorch/runtime/platform/profiler.h>
 
@@ -47,6 +48,7 @@ using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::kTensorDimensionLimit;
+using executorch::runtime::NamedDataMap;
 using executorch::runtime::Result;
 using executorch::runtime::Span;
 
@@ -66,14 +68,6 @@ using BytesVector =
     const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkBytes>>*;
 using UIntVector = const flatbuffers::Vector<uint32_t>*;
 
-const uint8_t* get_constant_data_ptr(
-    VkGraphPtr flatbuffer_graph,
-    const int32_t buffer_idx,
-    const uint8_t* constant_data) {
-  VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx);
-  return constant_data + constant_bytes->offset();
-}
-
 vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
   switch (vk_datatype) {
     case vkgraph::VkDataType::BOOL:
@@ -166,17 +160,22 @@ class GraphBuilder {
   ComputeGraph* compute_graph_;
   VkGraphPtr flatbuffer_;
   const uint8_t* constant_data_;
+  const NamedDataMap* named_data_map_;
+  std::vector<FreeableBuffer> loaded_buffers_from_map_;
 
   std::vector<ValueRef> ref_mapping_;
 
  public:
   explicit GraphBuilder(
       ComputeGraph* compute_graph,
       VkGraphPtr flatbuffer,
-      const uint8_t* constant_data)
+      const uint8_t* constant_data,
+      const NamedDataMap* named_data_map)
       : compute_graph_(compute_graph),
         flatbuffer_(flatbuffer),
         constant_data_(constant_data),
+        named_data_map_(named_data_map),
+        loaded_buffers_from_map_(),
         ref_mapping_() {}
 
   void resize(uint32_t size) {
@@ -212,10 +211,27 @@ class GraphBuilder {
 
     ValueRef ref;
     if (tensor_fb->constant_id() >= 0) {
-      const uint8_t* tensor_data = get_constant_data_ptr(
-          flatbuffer_, tensor_fb->constant_id(), constant_data_);
+      VkBytesPtr constant_bytes =
+          flatbuffer_->constants()->Get(tensor_fb->constant_id());
 
-      ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
+      if (constant_bytes->named_key() != nullptr &&
+          constant_bytes->offset() == UINT64_MAX &&
+          named_data_map_ != nullptr) {
+        const std::string& data_name = constant_bytes->named_key()->str();
+        Result<FreeableBuffer> buffer =
+            named_data_map_->get_data(data_name.c_str());
+
+        VK_CHECK_COND(
+            buffer.ok(),
+            "Failed to get constant data for key %s from named_data_map. Error code: %u",
+            data_name.c_str(),
+            static_cast<uint32_t>(buffer.error()));
+        ref = compute_graph_->add_tensorref(
+            dims_vector, dtype, std::move(buffer.get()));
+      } else {
+        const uint8_t* tensor_data = constant_data_ + constant_bytes->offset();
+        ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
+      }
     } else {
       ref = compute_graph_->add_tensor(
           dims_vector,
@@ -479,8 +495,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     return true;
   }
 
-  ET_NODISCARD Error
-  compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const {
+  ET_NODISCARD Error compileModel(
+      const void* buffer_pointer,
+      ComputeGraph* compute_graph,
+      const NamedDataMap* named_data_map) const {
     Result<VulkanDelegateHeader> header =
         VulkanDelegateHeader::parse(buffer_pointer);
 
@@ -506,7 +524,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
 
     VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data);
 
-    GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data);
+    GraphBuilder builder(
+        compute_graph, flatbuffer_graph, constant_data, named_data_map);
 
     builder.build_graph();
 
@@ -532,7 +551,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     graph_config.external_adapter = vkapi::set_and_get_external_adapter();
     new (compute_graph) ComputeGraph(graph_config);
 
-    Error err = compileModel(processed->data(), compute_graph);
+    const NamedDataMap* named_data_map = context.get_named_data_map();
+    Error err = compileModel(processed->data(), compute_graph, named_data_map);
 
     // This backend does not need its processed data after compiling the
     // model.

@@ -480,6 +480,17 @@ ValueRef ComputeGraph::add_tensorref(
   return idx;
 }
 
+ValueRef ComputeGraph::add_tensorref(
+    const std::vector<int64_t>& sizes,
+    const vkapi::ScalarType dtype,
+    executorch::runtime::FreeableBuffer&& buffer) {
+  ValueRef idx(static_cast<int>(values_.size()));
+  check_no_active_value_ptrs();
+  values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer)));
+  total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes();
+  return idx;
+}
+
 ValueRef ComputeGraph::add_staging(
     const vkapi::ScalarType dtype,
     const size_t numel) {
@@ -813,25 +824,8 @@ void ComputeGraph::prepare() {
     context_->initialize_querypool();
   }
 
-  for (SharedObject& shared_object : shared_objects_) {
-    shared_object.allocate(this);
-    shared_object.bind_users(this);
-  }
-}
-
-void ComputeGraph::prepare_pipelines() {
-  for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
-    node->prepare_pipelines(this);
-  }
-  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
-    node->prepare_pipelines(this);
-  }
-  context_->pipeline_cache().create_pipelines(pipeline_descriptors_);
-
-  pipeline_descriptors_ = std::unordered_set<
-      vkapi::ComputePipelineCache::Key,
-      vkapi::ComputePipelineCache::Hasher>();
-
+  // Calculate the threshold at which a new command buffer should be created
+  // during execute()
   const size_t total_node_count = execute_nodes_.size();
   size_t init_threshold = config_.execute_initial_threshold_node_count;
   size_t count_threshold = config_.execute_threshold_node_count;
@@ -858,6 +852,25 @@ void ComputeGraph::prepare_pipelines() {
   }
 
   execute_threshold_node_count_ = count_threshold;
+
+  for (SharedObject& shared_object : shared_objects_) {
+    shared_object.allocate(this);
+    shared_object.bind_users(this);
+  }
+}
+
+void ComputeGraph::prepare_pipelines() {
+  for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
+    node->prepare_pipelines(this);
+  }
+  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
+    node->prepare_pipelines(this);
+  }
+  context_->pipeline_cache().create_pipelines(pipeline_descriptors_);
+
+  pipeline_descriptors_ = std::unordered_set<
+      vkapi::ComputePipelineCache::Key,
+      vkapi::ComputePipelineCache::Hasher>();
 }
 
 void ComputeGraph::submit_current_cmd(const bool final_use) {

@@ -693,6 +693,16 @@ class ComputeGraph final {
       const vkapi::ScalarType dtype,
       const void* const data);
 
+  /*
+   * Add a `TensorRef` value to the graph with the specific properties. A
+   * `TensorRef` is a reference to a `api::vTensor` whose data is stored in a
+   * FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer.
+   */
+  ValueRef add_tensorref(
+      const std::vector<int64_t>& sizes,
+      const vkapi::ScalarType dtype,
+      executorch::runtime::FreeableBuffer&& buffer);
+
   /*
    * Add a staging buffer to the graph. Staging buffers are data buffers that
    * use memory that is visible to both the CPU and GPU, and therefore is used

@@ -86,7 +86,7 @@ void ComputeGraph::print_readable() {
       ss << v_tensor.sizes();
       std::cout << ss.str();
     } else if (val.isTensorRef()) {
-      const TensorRef tensor_ref = val.toTensorRef();
+      const TensorRef& tensor_ref = val.toTensorRef();
       std::stringstream ss;
       ss << tensor_ref.sizes;
       std::cout << ss.str();

@@ -14,7 +14,22 @@ TensorRef::TensorRef(
     const std::vector<int64_t>& t_sizes,
     vkapi::ScalarType t_dtype,
     const void* const t_data)
-    : sizes{}, dtype{t_dtype}, data{t_data} {
+    : sizes{}, dtype{t_dtype}, data{t_data}, buffer{} {
+  size_t ndim = t_sizes.size();
+  sizes.resize(ndim);
+  for (int i = 0; i < ndim; ++i) {
+    sizes[i] = t_sizes.at(i);
+  }
+}
+
+TensorRef::TensorRef(
+    const std::vector<int64_t>& t_sizes,
+    vkapi::ScalarType t_dtype,
+    executorch::runtime::FreeableBuffer&& t_buffer)
+    : sizes{},
+      dtype{t_dtype},
+      data{t_buffer.data()},
+      buffer{std::move(t_buffer)} {
   size_t ndim = t_sizes.size();
   sizes.resize(ndim);
   for (int i = 0; i < ndim; ++i) {

@@ -9,6 +9,7 @@
 #pragma once
 
 #include <executorch/backends/vulkan/runtime/api/Context.h>
+#include <executorch/runtime/core/freeable_buffer.h>
 
 namespace vkcompute {
 
@@ -24,14 +25,30 @@ struct TensorRef final {
   vkapi::ScalarType dtype;
   const void* data;
 
+  // Optional FreeableBuffer for managing memory lifecycle
+  // This will be empty (default constructed) for the raw pointer constructor
+  executorch::runtime::FreeableBuffer buffer;
+
   explicit TensorRef(
       const std::vector<int64_t>& t_sizes,
       vkapi::ScalarType t_dtype,
       const void* const t_data);
 
+  // Constructor that takes ownership of a FreeableBuffer
+  explicit TensorRef(
+      const std::vector<int64_t>& t_sizes,
+      vkapi::ScalarType t_dtype,
+      executorch::runtime::FreeableBuffer&& t_buffer);
+
   inline size_t nbytes() const {
     return utils::multiply_integers(sizes) * vkapi::element_size(dtype);
   }
+
+  // Manually free the buffer if needed (though it will be freed automatically
+  // on destruction)
+  void free_buffer() {
+    buffer.Free();
+  }
 };
 
 } // namespace vkcompute
@@ -64,6 +64,9 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
   graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
   size_t nbytes = numel * vkapi::element_size(tref->dtype);
   staging.copy_from(tref->data, nbytes);
+  // Once the staging buffer is copied, if the TensorRef owns a FreeableBuffer,
+  // it can be freed.
+  tref->free_buffer();
   return staging;
 }
 

@@ -118,6 +118,7 @@ table VkValue {
 table VkBytes {
   offset:ulong;
   length:ulong;
+  named_key:string;
 }
 
 table VkGraph {

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import ctypes
+import hashlib
 import logging
 import operator
 from types import NoneType
@@ -25,6 +27,7 @@
     is_symint_node,
     TensorRepr,
 )
+from executorch.exir._serialize._named_data_store import NamedDataStore
 from executorch.exir.backend.utils import DelegateMappingBuilder
 
 from executorch.exir.tensor import TensorSpec
@@ -56,6 +59,7 @@ def __init__(
         self.input_ids = []
         self.output_ids = []
         self.const_tensors = []
+        self.named_data_store = NamedDataStore()
 
         # Mapping from Node to VkValue id
         self.node_to_value_ids = {}
@@ -129,8 +133,36 @@ def get_param_tensor(self, node: Node) -> torch.Tensor:
     def maybe_add_constant_tensor(self, node: Node) -> int:
         constant_id = -1
         if is_param_node(self.program, node):
-            constant_id = len(self.const_tensors)
-            self.const_tensors.append(self.get_param_tensor(node))
+            tensor = self.get_param_tensor(node)
+
+            # Serialize tensor data to bytes
+            tensor = tensor.contiguous()
+            size = tensor.untyped_storage().nbytes()
+
+            if size > 0:
+                array_type = ctypes.c_char * size
+                array = ctypes.cast(
+                    tensor.untyped_storage().data_ptr(),
+                    ctypes.POINTER(array_type),
+                ).contents
+
+                # Generate SHA256 hash as the named key
+                tensor_bytes = bytes(array)
+                sha256_hash = hashlib.sha256(tensor_bytes)
+                named_key = sha256_hash.hexdigest()
+
+                # Add to named data store with 16-byte alignment (matching XNNPACK)
+                self.named_data_store.add_named_data(
+                    named_key, tensor_bytes, alignment=16
+                )
+
+                # Create VkBytes entry with named_key and set offset to indicate named data usage
+                constant_id = len(self.const_tensors)
+                self.const_tensors.append((named_key, size))
+            else:
+                # Handle empty tensors
+                constant_id = len(self.const_tensors)
+                self.const_tensors.append(None)
 
         return constant_id
 

@@ -137,6 +137,7 @@ class VkValue:
 class VkBytes:
     offset: int
     length: int
+    named_key: str = ""
 
 
 @dataclass