Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions backends/vulkan/runtime/VulkanBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <executorch/runtime/core/event_tracer_hooks_delegate.h>
#endif // ET_EVENT_TRACER_ENABLED
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
#include <executorch/runtime/executor/pte_data_map.h>
#include <executorch/runtime/platform/compiler.h>
#include <executorch/runtime/platform/profiler.h>

Expand All @@ -47,6 +48,7 @@ using executorch::runtime::Error;
using executorch::runtime::EValue;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::kTensorDimensionLimit;
using executorch::runtime::NamedDataMap;
using executorch::runtime::Result;
using executorch::runtime::Span;

Expand All @@ -66,14 +68,6 @@ using BytesVector =
const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkBytes>>*;
using UIntVector = const flatbuffers::Vector<uint32_t>*;

const uint8_t* get_constant_data_ptr(
VkGraphPtr flatbuffer_graph,
const int32_t buffer_idx,
const uint8_t* constant_data) {
VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx);
return constant_data + constant_bytes->offset();
}

vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
switch (vk_datatype) {
case vkgraph::VkDataType::BOOL:
Expand Down Expand Up @@ -166,17 +160,22 @@ class GraphBuilder {
ComputeGraph* compute_graph_;
VkGraphPtr flatbuffer_;
const uint8_t* constant_data_;
const NamedDataMap* named_data_map_;
std::vector<FreeableBuffer> loaded_buffers_from_map_;

std::vector<ValueRef> ref_mapping_;

public:
explicit GraphBuilder(
ComputeGraph* compute_graph,
VkGraphPtr flatbuffer,
const uint8_t* constant_data)
const uint8_t* constant_data,
const NamedDataMap* named_data_map)
: compute_graph_(compute_graph),
flatbuffer_(flatbuffer),
constant_data_(constant_data),
named_data_map_(named_data_map),
loaded_buffers_from_map_(),
ref_mapping_() {}

void resize(uint32_t size) {
Expand Down Expand Up @@ -212,10 +211,27 @@ class GraphBuilder {

ValueRef ref;
if (tensor_fb->constant_id() >= 0) {
const uint8_t* tensor_data = get_constant_data_ptr(
flatbuffer_, tensor_fb->constant_id(), constant_data_);
VkBytesPtr constant_bytes =
flatbuffer_->constants()->Get(tensor_fb->constant_id());

ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
if (constant_bytes->named_key() != nullptr &&
constant_bytes->offset() == UINT64_MAX &&
named_data_map_ != nullptr) {
const std::string& data_name = constant_bytes->named_key()->str();
Result<FreeableBuffer> buffer =
named_data_map_->get_data(data_name.c_str());

VK_CHECK_COND(
buffer.ok(),
"Failed to get constant data for key %s from named_data_map. Error code: %u",
data_name.c_str(),
static_cast<uint32_t>(buffer.error()));
ref = compute_graph_->add_tensorref(
dims_vector, dtype, std::move(buffer.get()));
} else {
const uint8_t* tensor_data = constant_data_ + constant_bytes->offset();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit assert offset != UINT64_MAX for debugging

ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
}
} else {
ref = compute_graph_->add_tensor(
dims_vector,
Expand Down Expand Up @@ -479,8 +495,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
return true;
}

ET_NODISCARD Error
compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const {
ET_NODISCARD Error compileModel(
const void* buffer_pointer,
ComputeGraph* compute_graph,
const NamedDataMap* named_data_map) const {
Result<VulkanDelegateHeader> header =
VulkanDelegateHeader::parse(buffer_pointer);

Expand All @@ -506,7 +524,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {

VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data);

GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data);
GraphBuilder builder(
compute_graph, flatbuffer_graph, constant_data, named_data_map);

builder.build_graph();

Expand All @@ -532,7 +551,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
graph_config.external_adapter = vkapi::set_and_get_external_adapter();
new (compute_graph) ComputeGraph(graph_config);

Error err = compileModel(processed->data(), compute_graph);
const NamedDataMap* named_data_map = context.get_named_data_map();
Error err = compileModel(processed->data(), compute_graph, named_data_map);

// This backend does not need its processed data after compiling the
// model.
Expand Down
11 changes: 11 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,17 @@ ValueRef ComputeGraph::add_tensorref(
return idx;
}

ValueRef ComputeGraph::add_tensorref(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
executorch::runtime::FreeableBuffer&& buffer) {
ValueRef idx(static_cast<int>(values_.size()));
check_no_active_value_ptrs();
values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer)));
total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes();
return idx;
}

ValueRef ComputeGraph::add_staging(
const vkapi::ScalarType dtype,
const size_t numel) {
Expand Down
10 changes: 10 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,16 @@ class ComputeGraph final {
const vkapi::ScalarType dtype,
const void* const data);

/*
* Add a `TensorRef` value to the graph with the specific properties. A
* `TensorRef` is a reference to a `api::vTensor` whose data is stored in a
* FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer.
*/
ValueRef add_tensorref(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
executorch::runtime::FreeableBuffer&& buffer);

/*
* Add a staging buffer to the graph. Staging buffers are data buffers that
* use memory that is visible to both the CPU and GPU, and therefore is used
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/Logging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void ComputeGraph::print_readable() {
ss << v_tensor.sizes();
std::cout << ss.str();
} else if (val.isTensorRef()) {
const TensorRef tensor_ref = val.toTensorRef();
const TensorRef& tensor_ref = val.toTensorRef();
std::stringstream ss;
ss << tensor_ref.sizes;
std::cout << ss.str();
Expand Down
17 changes: 16 additions & 1 deletion backends/vulkan/runtime/graph/containers/Constant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,22 @@ TensorRef::TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
const void* const t_data)
: sizes{}, dtype{t_dtype}, data{t_data} {
: sizes{}, dtype{t_dtype}, data{t_data}, buffer{} {
size_t ndim = t_sizes.size();
sizes.resize(ndim);
for (int i = 0; i < ndim; ++i) {
sizes[i] = t_sizes.at(i);
}
}

TensorRef::TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
executorch::runtime::FreeableBuffer&& t_buffer)
: sizes{},
dtype{t_dtype},
data{t_buffer.data()},
buffer{std::move(t_buffer)} {
size_t ndim = t_sizes.size();
sizes.resize(ndim);
for (int i = 0; i < ndim; ++i) {
Expand Down
17 changes: 17 additions & 0 deletions backends/vulkan/runtime/graph/containers/Constant.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#pragma once

#include <executorch/backends/vulkan/runtime/api/Context.h>
#include <executorch/runtime/core/freeable_buffer.h>

namespace vkcompute {

Expand All @@ -24,14 +25,30 @@ struct TensorRef final {
vkapi::ScalarType dtype;
const void* data;

// Optional FreeableBuffer for managing memory lifecycle
// This will be empty (default constructed) for the raw pointer constructor
executorch::runtime::FreeableBuffer buffer;

explicit TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
const void* const t_data);

// Constructor that takes ownership of a FreeableBuffer
explicit TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
executorch::runtime::FreeableBuffer&& t_buffer);

inline size_t nbytes() const {
return utils::multiply_integers(sizes) * vkapi::element_size(dtype);
}

// Manually free the buffer if needed (though it will be freed automatically
// on destruction)
void free_buffer() {
buffer.Free();
}
};

} // namespace vkcompute
3 changes: 3 additions & 0 deletions backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
size_t nbytes = numel * vkapi::element_size(tref->dtype);
staging.copy_from(tref->data, nbytes);
// Once the staging buffer is copied, if the TensorRef owns a FreeableBuffer,
// it can be freed.
tref->free_buffer();
return staging;
}

Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/serialization/schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ table VkValue {
table VkBytes {
offset:ulong;
length:ulong;
named_key:string;
}

table VkGraph {
Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/serialization/vulkan_graph_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ class VkValue:
class VkBytes:
offset: int
length: int
named_key: str = ""


@dataclass
Expand Down
3 changes: 2 additions & 1 deletion backends/vulkan/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def define_common_targets(is_fbcode = False):
],
exported_deps = [
":vulkan_graph_runtime_shaderlib{}".format(suffix),
"//executorch/runtime/backend:interface",
],
define_static_target = True,
# Static initialization is used to register operators to the global operator registry,
Expand Down Expand Up @@ -303,8 +304,8 @@ def define_common_targets(is_fbcode = False):
":vulkan_graph_runtime{}".format(suffix),
"//executorch/backends/vulkan/serialization:vk_delegate_schema",
"//executorch/runtime/core:event_tracer",
"//executorch/runtime/backend:interface",
"//executorch/runtime/core/exec_aten/util:tensor_util",
"//executorch/runtime/executor:pte_data_map",
],
define_static_target = True,
# VulkanBackend.cpp needs to compile with executor as whole
Expand Down
8 changes: 4 additions & 4 deletions backends/vulkan/test/vulkan_compute_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1036,12 +1036,12 @@ TEST_F(VulkanComputeAPITest, print_object_sizes) {

// Current known size on 64 bit system: 1040 B
EXPECT_TRUE(sizeof(vTensor) < 1200);
// Current known size on 64 bit system: 48 B
EXPECT_TRUE(sizeof(Value) < 56);
// Current known size on 64 bit system: 80 B
EXPECT_TRUE(sizeof(Value) < 100);
// Current known size on 64 bit system: 120 B
EXPECT_TRUE(sizeof(StagingBuffer) < 500);
// Current known size on 64 bit system: 512 B
EXPECT_TRUE(sizeof(ComputeGraph) < 600);
// Current known size on 64 bit system: 608 B
EXPECT_TRUE(sizeof(ComputeGraph) < 700);
// Current known size on 64 bit system: 248 B
EXPECT_TRUE(sizeof(DispatchNode) < 500);
}
Expand Down
Loading