Skip to content

Commit 3c4cabb

Browse files
authored
[ET-VK] Runtime support for NamedDataMap (#13498)
Summary: Allow VulkanBackend to load constant tensors from the NamedDataMap instead of the constant data section of the delegate blob. ## Motivation This enables several key results: * Unblocks delegate retargetability with other backends * Allows reducing peak memory usage when loading models by freeing constant weight data as it gets moved to the GPU ## Changes * Allow `TensorRef` to be constructed with a `FreeableBuffer` rvalue * Add ability to load constant data from `NamedDataMap` in `VulkanBackend.cpp` * When prepacking, free the constant data pointer once it's been copied to the staging buffer Test Plan: CI Validate results by collecting memory measurements in the next few diffs. Differential Revision: [D80460035](https://our.internmc.facebook.com/intern/diff/D80460035) [ghstack-poisoned]
1 parent 7622df0 commit 3c4cabb

File tree

11 files changed

+102
-23
lines changed

11 files changed

+102
-23
lines changed

backends/vulkan/runtime/VulkanBackend.cpp

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <executorch/runtime/core/event_tracer_hooks_delegate.h>
2323
#endif // ET_EVENT_TRACER_ENABLED
2424
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
25+
#include <executorch/runtime/core/named_data_map.h>
2526
#include <executorch/runtime/platform/compiler.h>
2627
#include <executorch/runtime/platform/profiler.h>
2728

@@ -47,6 +48,7 @@ using executorch::runtime::Error;
4748
using executorch::runtime::EValue;
4849
using executorch::runtime::FreeableBuffer;
4950
using executorch::runtime::kTensorDimensionLimit;
51+
using executorch::runtime::NamedDataMap;
5052
using executorch::runtime::Result;
5153
using executorch::runtime::Span;
5254

@@ -66,14 +68,6 @@ using BytesVector =
6668
const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkBytes>>*;
6769
using UIntVector = const flatbuffers::Vector<uint32_t>*;
6870

69-
const uint8_t* get_constant_data_ptr(
70-
VkGraphPtr flatbuffer_graph,
71-
const int32_t buffer_idx,
72-
const uint8_t* constant_data) {
73-
VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx);
74-
return constant_data + constant_bytes->offset();
75-
}
76-
7771
vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
7872
switch (vk_datatype) {
7973
case vkgraph::VkDataType::BOOL:
@@ -166,17 +160,22 @@ class GraphBuilder {
166160
ComputeGraph* compute_graph_;
167161
VkGraphPtr flatbuffer_;
168162
const uint8_t* constant_data_;
163+
const NamedDataMap* named_data_map_;
164+
std::vector<FreeableBuffer> loaded_buffers_from_map_;
169165

170166
std::vector<ValueRef> ref_mapping_;
171167

172168
public:
173169
explicit GraphBuilder(
174170
ComputeGraph* compute_graph,
175171
VkGraphPtr flatbuffer,
176-
const uint8_t* constant_data)
172+
const uint8_t* constant_data,
173+
const NamedDataMap* named_data_map)
177174
: compute_graph_(compute_graph),
178175
flatbuffer_(flatbuffer),
179176
constant_data_(constant_data),
177+
named_data_map_(named_data_map),
178+
loaded_buffers_from_map_(),
180179
ref_mapping_() {}
181180

182181
void resize(uint32_t size) {
@@ -212,10 +211,27 @@ class GraphBuilder {
212211

213212
ValueRef ref;
214213
if (tensor_fb->constant_id() >= 0) {
215-
const uint8_t* tensor_data = get_constant_data_ptr(
216-
flatbuffer_, tensor_fb->constant_id(), constant_data_);
214+
VkBytesPtr constant_bytes =
215+
flatbuffer_->constants()->Get(tensor_fb->constant_id());
217216

218-
ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
217+
if (constant_bytes->named_key() != nullptr &&
218+
constant_bytes->offset() == UINT64_MAX &&
219+
named_data_map_ != nullptr) {
220+
const std::string& data_name = constant_bytes->named_key()->str();
221+
Result<FreeableBuffer> buffer =
222+
named_data_map_->get_data(data_name.c_str());
223+
224+
VK_CHECK_COND(
225+
buffer.ok(),
226+
"Failed to get constant data for key %s from named_data_map. Error code: %u",
227+
data_name.c_str(),
228+
static_cast<uint32_t>(buffer.error()));
229+
ref = compute_graph_->add_tensorref(
230+
dims_vector, dtype, std::move(buffer.get()));
231+
} else {
232+
const uint8_t* tensor_data = constant_data_ + constant_bytes->offset();
233+
ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
234+
}
219235
} else {
220236
ref = compute_graph_->add_tensor(
221237
dims_vector,
@@ -479,8 +495,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
479495
return true;
480496
}
481497

482-
ET_NODISCARD Error
483-
compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const {
498+
ET_NODISCARD Error compileModel(
499+
const void* buffer_pointer,
500+
ComputeGraph* compute_graph,
501+
const NamedDataMap* named_data_map) const {
484502
Result<VulkanDelegateHeader> header =
485503
VulkanDelegateHeader::parse(buffer_pointer);
486504

@@ -506,7 +524,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
506524

507525
VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data);
508526

509-
GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data);
527+
GraphBuilder builder(
528+
compute_graph, flatbuffer_graph, constant_data, named_data_map);
510529

511530
builder.build_graph();
512531

@@ -532,7 +551,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
532551
graph_config.external_adapter = vkapi::set_and_get_external_adapter();
533552
new (compute_graph) ComputeGraph(graph_config);
534553

535-
Error err = compileModel(processed->data(), compute_graph);
554+
const NamedDataMap* named_data_map = context.get_named_data_map();
555+
Error err = compileModel(processed->data(), compute_graph, named_data_map);
536556

537557
// This backend does not need its processed data after compiling the
538558
// model.

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,17 @@ ValueRef ComputeGraph::add_tensorref(
480480
return idx;
481481
}
482482

483+
ValueRef ComputeGraph::add_tensorref(
484+
const std::vector<int64_t>& sizes,
485+
const vkapi::ScalarType dtype,
486+
executorch::runtime::FreeableBuffer&& buffer) {
487+
ValueRef idx(static_cast<int>(values_.size()));
488+
check_no_active_value_ptrs();
489+
values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer)));
490+
total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes();
491+
return idx;
492+
}
493+
483494
ValueRef ComputeGraph::add_staging(
484495
const vkapi::ScalarType dtype,
485496
const size_t numel) {

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,16 @@ class ComputeGraph final {
693693
const vkapi::ScalarType dtype,
694694
const void* const data);
695695

696+
/*
697+
* Add a `TensorRef` value to the graph with the specific properties. A
698+
* `TensorRef` is a reference to a `api::vTensor` whose data is stored in a
699+
* FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer.
700+
*/
701+
ValueRef add_tensorref(
702+
const std::vector<int64_t>& sizes,
703+
const vkapi::ScalarType dtype,
704+
executorch::runtime::FreeableBuffer&& buffer);
705+
696706
/*
697707
* Add a staging buffer to the graph. Staging buffers are data buffers that
698708
* use memory that is visible to both the CPU and GPU, and therefore is used

backends/vulkan/runtime/graph/Logging.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ void ComputeGraph::print_readable() {
8686
ss << v_tensor.sizes();
8787
std::cout << ss.str();
8888
} else if (val.isTensorRef()) {
89-
const TensorRef tensor_ref = val.toTensorRef();
89+
const TensorRef& tensor_ref = val.toTensorRef();
9090
std::stringstream ss;
9191
ss << tensor_ref.sizes;
9292
std::cout << ss.str();

backends/vulkan/runtime/graph/containers/Constant.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,22 @@ TensorRef::TensorRef(
1414
const std::vector<int64_t>& t_sizes,
1515
vkapi::ScalarType t_dtype,
1616
const void* const t_data)
17-
: sizes{}, dtype{t_dtype}, data{t_data} {
17+
: sizes{}, dtype{t_dtype}, data{t_data}, buffer{} {
18+
size_t ndim = t_sizes.size();
19+
sizes.resize(ndim);
20+
for (int i = 0; i < ndim; ++i) {
21+
sizes[i] = t_sizes.at(i);
22+
}
23+
}
24+
25+
TensorRef::TensorRef(
26+
const std::vector<int64_t>& t_sizes,
27+
vkapi::ScalarType t_dtype,
28+
executorch::runtime::FreeableBuffer&& t_buffer)
29+
: sizes{},
30+
dtype{t_dtype},
31+
data{t_buffer.data()},
32+
buffer{std::move(t_buffer)} {
1833
size_t ndim = t_sizes.size();
1934
sizes.resize(ndim);
2035
for (int i = 0; i < ndim; ++i) {

backends/vulkan/runtime/graph/containers/Constant.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#pragma once
1010

1111
#include <executorch/backends/vulkan/runtime/api/Context.h>
12+
#include <executorch/runtime/core/freeable_buffer.h>
1213

1314
namespace vkcompute {
1415

@@ -24,14 +25,30 @@ struct TensorRef final {
2425
vkapi::ScalarType dtype;
2526
const void* data;
2627

28+
// Optional FreeableBuffer for managing memory lifecycle
29+
// This will be empty (default constructed) for the raw pointer constructor
30+
executorch::runtime::FreeableBuffer buffer;
31+
2732
explicit TensorRef(
2833
const std::vector<int64_t>& t_sizes,
2934
vkapi::ScalarType t_dtype,
3035
const void* const t_data);
3136

37+
// Constructor that takes ownership of a FreeableBuffer
38+
explicit TensorRef(
39+
const std::vector<int64_t>& t_sizes,
40+
vkapi::ScalarType t_dtype,
41+
executorch::runtime::FreeableBuffer&& t_buffer);
42+
3243
inline size_t nbytes() const {
3344
return utils::multiply_integers(sizes) * vkapi::element_size(dtype);
3445
}
46+
47+
// Manually free the buffer if needed (though it will be freed automatically
48+
// on destruction)
49+
void free_buffer() {
50+
buffer.Free();
51+
}
3552
};
3653

3754
} // namespace vkcompute

backends/vulkan/runtime/graph/ops/PrepackNode.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
6464
graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
6565
size_t nbytes = numel * vkapi::element_size(tref->dtype);
6666
staging.copy_from(tref->data, nbytes);
67+
// Once the staging buffer is copied, if the TensorRef owns a FreeableBuffer,
68+
// it can be freed.
69+
tref->free_buffer();
6770
return staging;
6871
}
6972

backends/vulkan/serialization/schema.fbs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ table VkValue {
118118
table VkBytes {
119119
offset:ulong;
120120
length:ulong;
121+
named_key:string;
121122
}
122123

123124
table VkGraph {

backends/vulkan/serialization/vulkan_graph_schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ class VkValue:
137137
class VkBytes:
138138
offset: int
139139
length: int
140+
named_key: str = ""
140141

141142

142143
@dataclass

backends/vulkan/targets.bzl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ def define_common_targets(is_fbcode = False):
263263
],
264264
exported_deps = [
265265
":vulkan_graph_runtime_shaderlib{}".format(suffix),
266+
"//executorch/runtime/backend:interface",
266267
],
267268
define_static_target = True,
268269
# Static initialization is used to register operators to the global operator registry,
@@ -303,8 +304,8 @@ def define_common_targets(is_fbcode = False):
303304
":vulkan_graph_runtime{}".format(suffix),
304305
"//executorch/backends/vulkan/serialization:vk_delegate_schema",
305306
"//executorch/runtime/core:event_tracer",
306-
"//executorch/runtime/backend:interface",
307307
"//executorch/runtime/core/exec_aten/util:tensor_util",
308+
"//executorch/runtime/core:named_data_map",
308309
],
309310
define_static_target = True,
310311
# VulkanBackend.cpp needs to compile with executor as whole

0 commit comments

Comments
 (0)