Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions backends/vulkan/runtime/VulkanBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <executorch/runtime/core/event_tracer_hooks_delegate.h>
#endif // ET_EVENT_TRACER_ENABLED
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
#include <executorch/runtime/core/named_data_map.h>
#include <executorch/runtime/platform/compiler.h>
#include <executorch/runtime/platform/profiler.h>

Expand All @@ -47,6 +48,7 @@ using executorch::runtime::Error;
using executorch::runtime::EValue;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::kTensorDimensionLimit;
using executorch::runtime::NamedDataMap;
using executorch::runtime::Result;
using executorch::runtime::Span;

Expand All @@ -66,14 +68,6 @@ using BytesVector =
const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkBytes>>*;
using UIntVector = const flatbuffers::Vector<uint32_t>*;

const uint8_t* get_constant_data_ptr(
VkGraphPtr flatbuffer_graph,
const int32_t buffer_idx,
const uint8_t* constant_data) {
VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx);
return constant_data + constant_bytes->offset();
}

vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
switch (vk_datatype) {
case vkgraph::VkDataType::BOOL:
Expand Down Expand Up @@ -166,17 +160,22 @@ class GraphBuilder {
ComputeGraph* compute_graph_;
VkGraphPtr flatbuffer_;
const uint8_t* constant_data_;
const NamedDataMap* named_data_map_;
std::vector<FreeableBuffer> loaded_buffers_from_map_;

std::vector<ValueRef> ref_mapping_;

public:
explicit GraphBuilder(
ComputeGraph* compute_graph,
VkGraphPtr flatbuffer,
const uint8_t* constant_data)
const uint8_t* constant_data,
const NamedDataMap* named_data_map)
: compute_graph_(compute_graph),
flatbuffer_(flatbuffer),
constant_data_(constant_data),
named_data_map_(named_data_map),
loaded_buffers_from_map_(),
ref_mapping_() {}

void resize(uint32_t size) {
Expand Down Expand Up @@ -212,10 +211,27 @@ class GraphBuilder {

ValueRef ref;
if (tensor_fb->constant_id() >= 0) {
const uint8_t* tensor_data = get_constant_data_ptr(
flatbuffer_, tensor_fb->constant_id(), constant_data_);
VkBytesPtr constant_bytes =
flatbuffer_->constants()->Get(tensor_fb->constant_id());

ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
if (constant_bytes->named_key() != nullptr &&
constant_bytes->offset() == UINT64_MAX &&
named_data_map_ != nullptr) {
const std::string& data_name = constant_bytes->named_key()->str();
Result<FreeableBuffer> buffer =
named_data_map_->get_data(data_name.c_str());

VK_CHECK_COND(
buffer.ok(),
"Failed to get constant data for key %s from named_data_map. Error code: %u",
data_name.c_str(),
static_cast<uint32_t>(buffer.error()));
ref = compute_graph_->add_tensorref(
dims_vector, dtype, std::move(buffer.get()));
} else {
const uint8_t* tensor_data = constant_data_ + constant_bytes->offset();
ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
}
} else {
ref = compute_graph_->add_tensor(
dims_vector,
Expand Down Expand Up @@ -479,8 +495,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
return true;
}

ET_NODISCARD Error
compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const {
ET_NODISCARD Error compileModel(
const void* buffer_pointer,
ComputeGraph* compute_graph,
const NamedDataMap* named_data_map) const {
Result<VulkanDelegateHeader> header =
VulkanDelegateHeader::parse(buffer_pointer);

Expand All @@ -506,7 +524,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {

VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data);

GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data);
GraphBuilder builder(
compute_graph, flatbuffer_graph, constant_data, named_data_map);

builder.build_graph();

Expand All @@ -532,7 +551,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
graph_config.external_adapter = vkapi::set_and_get_external_adapter();
new (compute_graph) ComputeGraph(graph_config);

Error err = compileModel(processed->data(), compute_graph);
const NamedDataMap* named_data_map = context.get_named_data_map();
Error err = compileModel(processed->data(), compute_graph, named_data_map);

// This backend does not need its processed data after compiling the
// model.
Expand Down
51 changes: 32 additions & 19 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,17 @@ ValueRef ComputeGraph::add_tensorref(
return idx;
}

ValueRef ComputeGraph::add_tensorref(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
executorch::runtime::FreeableBuffer&& buffer) {
ValueRef idx(static_cast<int>(values_.size()));
check_no_active_value_ptrs();
values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer)));
total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes();
return idx;
}

ValueRef ComputeGraph::add_staging(
const vkapi::ScalarType dtype,
const size_t numel) {
Expand Down Expand Up @@ -813,25 +824,8 @@ void ComputeGraph::prepare() {
context_->initialize_querypool();
}

for (SharedObject& shared_object : shared_objects_) {
shared_object.allocate(this);
shared_object.bind_users(this);
}
}

void ComputeGraph::prepare_pipelines() {
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
node->prepare_pipelines(this);
}
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
node->prepare_pipelines(this);
}
context_->pipeline_cache().create_pipelines(pipeline_descriptors_);

pipeline_descriptors_ = std::unordered_set<
vkapi::ComputePipelineCache::Key,
vkapi::ComputePipelineCache::Hasher>();

// Calculate the threshold at which a new command buffer should be created
// during execute()
const size_t total_node_count = execute_nodes_.size();
size_t init_threshold = config_.execute_initial_threshold_node_count;
size_t count_threshold = config_.execute_threshold_node_count;
Expand All @@ -858,6 +852,25 @@ void ComputeGraph::prepare_pipelines() {
}

execute_threshold_node_count_ = count_threshold;

for (SharedObject& shared_object : shared_objects_) {
shared_object.allocate(this);
shared_object.bind_users(this);
}
}

void ComputeGraph::prepare_pipelines() {
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
node->prepare_pipelines(this);
}
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
node->prepare_pipelines(this);
}
context_->pipeline_cache().create_pipelines(pipeline_descriptors_);

pipeline_descriptors_ = std::unordered_set<
vkapi::ComputePipelineCache::Key,
vkapi::ComputePipelineCache::Hasher>();
}

void ComputeGraph::submit_current_cmd(const bool final_use) {
Expand Down
10 changes: 10 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,16 @@ class ComputeGraph final {
const vkapi::ScalarType dtype,
const void* const data);

/*
* Add a `TensorRef` value to the graph with the specific properties. A
* `TensorRef` is a reference to a `api::vTensor` whose data is stored in a
* FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer.
*/
ValueRef add_tensorref(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
executorch::runtime::FreeableBuffer&& buffer);

/*
* Add a staging buffer to the graph. Staging buffers are data buffers that
* use memory that is visible to both the CPU and GPU, and therefore is used
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/Logging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void ComputeGraph::print_readable() {
ss << v_tensor.sizes();
std::cout << ss.str();
} else if (val.isTensorRef()) {
const TensorRef tensor_ref = val.toTensorRef();
const TensorRef& tensor_ref = val.toTensorRef();
std::stringstream ss;
ss << tensor_ref.sizes;
std::cout << ss.str();
Expand Down
17 changes: 16 additions & 1 deletion backends/vulkan/runtime/graph/containers/Constant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,22 @@ TensorRef::TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
const void* const t_data)
: sizes{}, dtype{t_dtype}, data{t_data} {
: sizes{}, dtype{t_dtype}, data{t_data}, buffer{} {
size_t ndim = t_sizes.size();
sizes.resize(ndim);
for (int i = 0; i < ndim; ++i) {
sizes[i] = t_sizes.at(i);
}
}

TensorRef::TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
executorch::runtime::FreeableBuffer&& t_buffer)
: sizes{},
dtype{t_dtype},
data{t_buffer.data()},
buffer{std::move(t_buffer)} {
size_t ndim = t_sizes.size();
sizes.resize(ndim);
for (int i = 0; i < ndim; ++i) {
Expand Down
17 changes: 17 additions & 0 deletions backends/vulkan/runtime/graph/containers/Constant.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#pragma once

#include <executorch/backends/vulkan/runtime/api/Context.h>
#include <executorch/runtime/core/freeable_buffer.h>

namespace vkcompute {

Expand All @@ -24,14 +25,30 @@ struct TensorRef final {
vkapi::ScalarType dtype;
const void* data;

// Optional FreeableBuffer for managing memory lifecycle
// This will be empty (default constructed) for the raw pointer constructor
executorch::runtime::FreeableBuffer buffer;

explicit TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
const void* const t_data);

// Constructor that takes ownership of a FreeableBuffer
explicit TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
executorch::runtime::FreeableBuffer&& t_buffer);

inline size_t nbytes() const {
return utils::multiply_integers(sizes) * vkapi::element_size(dtype);
}

// Manually free the buffer if needed (though it will be freed automatically
// on destruction)
void free_buffer() {
buffer.Free();
}
};

} // namespace vkcompute
3 changes: 3 additions & 0 deletions backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
size_t nbytes = numel * vkapi::element_size(tref->dtype);
staging.copy_from(tref->data, nbytes);
// Once the staging buffer is copied, if the TensorRef owns a FreeableBuffer,
// it can be freed.
tref->free_buffer();
return staging;
}

Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/serialization/schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ table VkValue {
table VkBytes {
offset:ulong;
length:ulong;
named_key:string;
}

table VkGraph {
Expand Down
36 changes: 34 additions & 2 deletions backends/vulkan/serialization/vulkan_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import ctypes
import hashlib
import logging
import operator
from types import NoneType
Expand All @@ -25,6 +27,7 @@
is_symint_node,
TensorRepr,
)
from executorch.exir._serialize._named_data_store import NamedDataStore
from executorch.exir.backend.utils import DelegateMappingBuilder

from executorch.exir.tensor import TensorSpec
Expand Down Expand Up @@ -56,6 +59,7 @@ def __init__(
self.input_ids = []
self.output_ids = []
self.const_tensors = []
self.named_data_store = NamedDataStore()

# Mapping from Node to VkValue id
self.node_to_value_ids = {}
Expand Down Expand Up @@ -129,8 +133,36 @@ def get_param_tensor(self, node: Node) -> torch.Tensor:
def maybe_add_constant_tensor(self, node: Node) -> int:
constant_id = -1
if is_param_node(self.program, node):
constant_id = len(self.const_tensors)
self.const_tensors.append(self.get_param_tensor(node))
tensor = self.get_param_tensor(node)

# Serialize tensor data to bytes
tensor = tensor.contiguous()
size = tensor.untyped_storage().nbytes()

if size > 0:
array_type = ctypes.c_char * size
array = ctypes.cast(
tensor.untyped_storage().data_ptr(),
ctypes.POINTER(array_type),
).contents

# Generate SHA256 hash as the named key
tensor_bytes = bytes(array)
sha256_hash = hashlib.sha256(tensor_bytes)
named_key = sha256_hash.hexdigest()

# Add to named data store with 16-byte alignment (matching XNNPACK)
self.named_data_store.add_named_data(
named_key, tensor_bytes, alignment=16
)

# Create VkBytes entry with named_key and set offset to indicate named data usage
constant_id = len(self.const_tensors)
self.const_tensors.append((named_key, size))
else:
# Handle empty tensors
constant_id = len(self.const_tensors)
self.const_tensors.append(None)

return constant_id

Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/serialization/vulkan_graph_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ class VkValue:
class VkBytes:
offset: int
length: int
named_key: str = ""


@dataclass
Expand Down
Loading
Loading