diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
index 73b726bd32e..7b138072d50 100644
--- a/backends/vulkan/runtime/VulkanBackend.cpp
+++ b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -22,6 +22,7 @@
 #include <executorch/runtime/core/event_tracer_hooks_delegate.h>
 #endif // ET_EVENT_TRACER_ENABLED
 #include <executorch/runtime/core/exec_aten/util/tensor_util.h>
+#include <executorch/runtime/core/named_data_map.h>
 #include <executorch/runtime/platform/compiler.h>
 #include <executorch/runtime/platform/profiler.h>
 
@@ -47,6 +48,7 @@ using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::kTensorDimensionLimit;
+using executorch::runtime::NamedDataMap;
 using executorch::runtime::Result;
 using executorch::runtime::Span;
 
@@ -66,14 +68,6 @@ using BytesVector =
     const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkBytes>>*;
 using UIntVector = const flatbuffers::Vector<uint32_t>*;
 
-const uint8_t* get_constant_data_ptr(
-    VkGraphPtr flatbuffer_graph,
-    const int32_t buffer_idx,
-    const uint8_t* constant_data) {
-  VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx);
-  return constant_data + constant_bytes->offset();
-}
-
 vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
   switch (vk_datatype) {
     case vkgraph::VkDataType::BOOL:
@@ -166,6 +160,8 @@ class GraphBuilder {
   ComputeGraph* compute_graph_;
   VkGraphPtr flatbuffer_;
   const uint8_t* constant_data_;
+  const NamedDataMap* named_data_map_;
+  std::vector<FreeableBuffer> loaded_buffers_from_map_;
 
   std::vector<ValueRef> ref_mapping_;
 
@@ -173,10 +169,13 @@ class GraphBuilder {
   explicit GraphBuilder(
       ComputeGraph* compute_graph,
       VkGraphPtr flatbuffer,
-      const uint8_t* constant_data)
+      const uint8_t* constant_data,
+      const NamedDataMap* named_data_map)
       : compute_graph_(compute_graph),
         flatbuffer_(flatbuffer),
         constant_data_(constant_data),
+        named_data_map_(named_data_map),
+        loaded_buffers_from_map_(),
         ref_mapping_() {}
 
   void resize(uint32_t size) {
@@ -212,10 +211,27 @@ class GraphBuilder {
 
     ValueRef ref;
     if (tensor_fb->constant_id() >= 0) {
-      const uint8_t* tensor_data = get_constant_data_ptr(
-          flatbuffer_, tensor_fb->constant_id(), constant_data_);
+      VkBytesPtr constant_bytes =
+          flatbuffer_->constants()->Get(tensor_fb->constant_id());
 
-      ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
+      if (constant_bytes->named_key() != nullptr &&
+          constant_bytes->offset() == UINT64_MAX &&
+          named_data_map_ != nullptr) {
+        const std::string& data_name = constant_bytes->named_key()->str();
+        Result<FreeableBuffer> buffer =
+            named_data_map_->get_data(data_name.c_str());
+
+        VK_CHECK_COND(
+            buffer.ok(),
+            "Failed to get constant data for key %s from named_data_map. Error code: %u",
+            data_name.c_str(),
+            static_cast<uint32_t>(buffer.error()));
+        ref = compute_graph_->add_tensorref(
+            dims_vector, dtype, std::move(buffer.get()));
+      } else {
+        const uint8_t* tensor_data = constant_data_ + constant_bytes->offset();
+        ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
+      }
     } else {
       ref = compute_graph_->add_tensor(
           dims_vector,
@@ -479,8 +495,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     return true;
   }
 
-  ET_NODISCARD Error
-  compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const {
+  ET_NODISCARD Error compileModel(
+      const void* buffer_pointer,
+      ComputeGraph* compute_graph,
+      const NamedDataMap* named_data_map) const {
     Result<VulkanDelegateHeader> header =
         VulkanDelegateHeader::parse(buffer_pointer);
 
@@ -506,7 +524,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
 
     VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data);
 
-    GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data);
+    GraphBuilder builder(
+        compute_graph, flatbuffer_graph, constant_data, named_data_map);
 
     builder.build_graph();
 
@@ -532,7 +551,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     graph_config.external_adapter = vkapi::set_and_get_external_adapter();
     new (compute_graph) ComputeGraph(graph_config);
 
-    Error err = compileModel(processed->data(), compute_graph);
+    const NamedDataMap* named_data_map = context.get_named_data_map();
+    Error err = compileModel(processed->data(), compute_graph, named_data_map);
 
     // This backend does not need its processed data after compiling the
     // model.
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
index acd20c9ee44..d57ba2b11d7 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.cpp
+++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -480,6 +480,17 @@ ValueRef ComputeGraph::add_tensorref(
   return idx;
 }
 
+ValueRef ComputeGraph::add_tensorref(
+    const std::vector<int64_t>& sizes,
+    const vkapi::ScalarType dtype,
+    executorch::runtime::FreeableBuffer&& buffer) {
+  ValueRef idx(static_cast<int>(values_.size()));
+  check_no_active_value_ptrs();
+  values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer)));
+  total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes();
+  return idx;
+}
+
 ValueRef ComputeGraph::add_staging(
     const vkapi::ScalarType dtype,
     const size_t numel) {
@@ -813,25 +824,8 @@ void ComputeGraph::prepare() {
     context_->initialize_querypool();
   }
 
-  for (SharedObject& shared_object : shared_objects_) {
-    shared_object.allocate(this);
-    shared_object.bind_users(this);
-  }
-}
-
-void ComputeGraph::prepare_pipelines() {
-  for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
-    node->prepare_pipelines(this);
-  }
-  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
-    node->prepare_pipelines(this);
-  }
-  context_->pipeline_cache().create_pipelines(pipeline_descriptors_);
-
-  pipeline_descriptors_ = std::unordered_set<
-      vkapi::ComputePipelineCache::Key,
-      vkapi::ComputePipelineCache::Hasher>();
-
+  // Calculate the threshold at which a new command buffer should be created
+  // during execute()
   const size_t total_node_count = execute_nodes_.size();
   size_t init_threshold = config_.execute_initial_threshold_node_count;
   size_t count_threshold = config_.execute_threshold_node_count;
@@ -858,6 +852,25 @@ void ComputeGraph::prepare_pipelines() {
   }
 
   execute_threshold_node_count_ = count_threshold;
+
+  for (SharedObject& shared_object : shared_objects_) {
+    shared_object.allocate(this);
+    shared_object.bind_users(this);
+  }
+}
+
+void ComputeGraph::prepare_pipelines() {
+  for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
+    node->prepare_pipelines(this);
+  }
+  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
+    node->prepare_pipelines(this);
+  }
+  context_->pipeline_cache().create_pipelines(pipeline_descriptors_);
+
+  pipeline_descriptors_ = std::unordered_set<
+      vkapi::ComputePipelineCache::Key,
+      vkapi::ComputePipelineCache::Hasher>();
 }
 
 void ComputeGraph::submit_current_cmd(const bool final_use) {
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index e4556a9efe6..f594571f9a7 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -693,6 +693,16 @@ class ComputeGraph final {
       const vkapi::ScalarType dtype,
       const void* const data);
 
+  /*
+   * Add a `TensorRef` value to the graph with the specific properties. A
+   * `TensorRef` is a reference to a `api::vTensor` whose data is stored in a
+   * FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer.
+   */
+  ValueRef add_tensorref(
+      const std::vector<int64_t>& sizes,
+      const vkapi::ScalarType dtype,
+      executorch::runtime::FreeableBuffer&& buffer);
+
   /*
    * Add a staging buffer to the graph. Staging buffers are data buffers that
    * use memory that is visible to both the CPU and GPU, and therefore is used
diff --git a/backends/vulkan/runtime/graph/Logging.cpp b/backends/vulkan/runtime/graph/Logging.cpp
index 7102345773c..081083e3a63 100644
--- a/backends/vulkan/runtime/graph/Logging.cpp
+++ b/backends/vulkan/runtime/graph/Logging.cpp
@@ -86,7 +86,7 @@ void ComputeGraph::print_readable() {
       ss << v_tensor.sizes();
       std::cout << ss.str();
     } else if (val.isTensorRef()) {
-      const TensorRef tensor_ref = val.toTensorRef();
+      const TensorRef& tensor_ref = val.toTensorRef();
       std::stringstream ss;
       ss << tensor_ref.sizes;
       std::cout << ss.str();
diff --git a/backends/vulkan/runtime/graph/containers/Constant.cpp b/backends/vulkan/runtime/graph/containers/Constant.cpp
index cb43295a42a..4dc2cdda8f5 100644
--- a/backends/vulkan/runtime/graph/containers/Constant.cpp
+++ b/backends/vulkan/runtime/graph/containers/Constant.cpp
@@ -14,7 +14,22 @@ TensorRef::TensorRef(
     const std::vector<int64_t>& t_sizes,
     vkapi::ScalarType t_dtype,
     const void* const t_data)
-    : sizes{}, dtype{t_dtype}, data{t_data} {
+    : sizes{}, dtype{t_dtype}, data{t_data}, buffer{} {
+  size_t ndim = t_sizes.size();
+  sizes.resize(ndim);
+  for (int i = 0; i < ndim; ++i) {
+    sizes[i] = t_sizes.at(i);
+  }
+}
+
+TensorRef::TensorRef(
+    const std::vector<int64_t>& t_sizes,
+    vkapi::ScalarType t_dtype,
+    executorch::runtime::FreeableBuffer&& t_buffer)
+    : sizes{},
+      dtype{t_dtype},
+      data{t_buffer.data()},
+      buffer{std::move(t_buffer)} {
   size_t ndim = t_sizes.size();
   sizes.resize(ndim);
   for (int i = 0; i < ndim; ++i) {
diff --git a/backends/vulkan/runtime/graph/containers/Constant.h b/backends/vulkan/runtime/graph/containers/Constant.h
index aaa92360a9e..a18c284a219 100644
--- a/backends/vulkan/runtime/graph/containers/Constant.h
+++ b/backends/vulkan/runtime/graph/containers/Constant.h
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <executorch/backends/vulkan/runtime/api/Context.h>
+#include <executorch/runtime/core/freeable_buffer.h>
 
 namespace vkcompute {
 
@@ -24,14 +25,30 @@ struct TensorRef final {
   vkapi::ScalarType dtype;
   const void* data;
 
+  // Optional FreeableBuffer for managing memory lifecycle
+  // This will be empty (default constructed) for the raw pointer constructor
+  executorch::runtime::FreeableBuffer buffer;
+
   explicit TensorRef(
       const std::vector<int64_t>& t_sizes,
       vkapi::ScalarType t_dtype,
       const void* const t_data);
 
+  // Constructor that takes ownership of a FreeableBuffer
+  explicit TensorRef(
+      const std::vector<int64_t>& t_sizes,
+      vkapi::ScalarType t_dtype,
+      executorch::runtime::FreeableBuffer&& t_buffer);
+
   inline size_t nbytes() const {
     return utils::multiply_integers(sizes) * vkapi::element_size(dtype);
   }
+
+  // Manually free the buffer if needed (though it will be freed automatically
+  // on destruction)
+  void free_buffer() {
+    buffer.Free();
+  }
 };
 
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
index c8220df837b..03df92292f8 100644
--- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
@@ -64,6 +64,9 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
   graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
   size_t nbytes = numel * vkapi::element_size(tref->dtype);
   staging.copy_from(tref->data, nbytes);
+  // Once the staging buffer is copied, if the TensorRef owns a FreeableBuffer,
+  // it can be freed.
+  tref->free_buffer();
   return staging;
 }
 
diff --git a/backends/vulkan/serialization/schema.fbs b/backends/vulkan/serialization/schema.fbs
index 99ba6a86594..b6670b6f53d 100644
--- a/backends/vulkan/serialization/schema.fbs
+++ b/backends/vulkan/serialization/schema.fbs
@@ -118,6 +118,7 @@ table VkValue {
 table VkBytes {
   offset:ulong;
   length:ulong;
+  named_key:string;
 }
 
 table VkGraph {
diff --git a/backends/vulkan/serialization/vulkan_graph_schema.py b/backends/vulkan/serialization/vulkan_graph_schema.py
index f845e5601a7..aa7641bd927 100644
--- a/backends/vulkan/serialization/vulkan_graph_schema.py
+++ b/backends/vulkan/serialization/vulkan_graph_schema.py
@@ -137,6 +137,7 @@ class VkValue:
 class VkBytes:
     offset: int
     length: int
+    named_key: str = ""
 
 
 @dataclass
diff --git a/backends/vulkan/targets.bzl b/backends/vulkan/targets.bzl
index ac26d202fe1..b9b96abdec4 100644
--- a/backends/vulkan/targets.bzl
+++ b/backends/vulkan/targets.bzl
@@ -263,6 +263,7 @@ def define_common_targets(is_fbcode = False):
             ],
             exported_deps = [
                 ":vulkan_graph_runtime_shaderlib{}".format(suffix),
+                "//executorch/runtime/backend:interface",
             ],
             define_static_target = True,
             # Static initialization is used to register operators to the global operator registry,
@@ -303,8 +304,8 @@ def define_common_targets(is_fbcode = False):
                 ":vulkan_graph_runtime{}".format(suffix),
                 "//executorch/backends/vulkan/serialization:vk_delegate_schema",
                 "//executorch/runtime/core:event_tracer",
-                "//executorch/runtime/backend:interface",
                 "//executorch/runtime/core/exec_aten/util:tensor_util",
+                "//executorch/runtime/core:named_data_map",
             ],
             define_static_target = True,
             # VulkanBackend.cpp needs to compile with executor as whole
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
index f99552ceee1..96adc13d3cd 100644
--- a/backends/vulkan/test/vulkan_compute_api_test.cpp
+++ b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -1036,12 +1036,12 @@ TEST_F(VulkanComputeAPITest, print_object_sizes) {
 
   // Current known size on 64 bit system: 1040 B
   EXPECT_TRUE(sizeof(vTensor) < 1200);
-  // Current known size on 64 bit system: 48 B
-  EXPECT_TRUE(sizeof(Value) < 56);
+  // Current known size on 64 bit system: 80 B
+  EXPECT_TRUE(sizeof(Value) < 100);
   // Current known size on 64 bit system: 120 B
   EXPECT_TRUE(sizeof(StagingBuffer) < 500);
-  // Current known size on 64 bit system: 512 B
-  EXPECT_TRUE(sizeof(ComputeGraph) < 600);
+  // Current known size on 64 bit system: 608 B
+  EXPECT_TRUE(sizeof(ComputeGraph) < 700);
   // Current known size on 64 bit system: 248 B
   EXPECT_TRUE(sizeof(DispatchNode) < 500);
 }