Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions backends/vulkan/runtime/VulkanBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <executorch/runtime/core/event_tracer_hooks_delegate.h>
#endif // ET_EVENT_TRACER_ENABLED
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
#include <executorch/runtime/core/named_data_map.h>
#include <executorch/runtime/platform/compiler.h>
#include <executorch/runtime/platform/profiler.h>

Expand All @@ -47,6 +48,7 @@ using executorch::runtime::Error;
using executorch::runtime::EValue;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::kTensorDimensionLimit;
using executorch::runtime::NamedDataMap;
using executorch::runtime::Result;
using executorch::runtime::Span;

Expand All @@ -66,14 +68,6 @@ using BytesVector =
const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkBytes>>*;
using UIntVector = const flatbuffers::Vector<uint32_t>*;

const uint8_t* get_constant_data_ptr(
VkGraphPtr flatbuffer_graph,
const int32_t buffer_idx,
const uint8_t* constant_data) {
VkBytesPtr constant_bytes = flatbuffer_graph->constants()->Get(buffer_idx);
return constant_data + constant_bytes->offset();
}

vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
switch (vk_datatype) {
case vkgraph::VkDataType::BOOL:
Expand Down Expand Up @@ -166,17 +160,22 @@ class GraphBuilder {
ComputeGraph* compute_graph_;
VkGraphPtr flatbuffer_;
const uint8_t* constant_data_;
const NamedDataMap* named_data_map_;
std::vector<FreeableBuffer> loaded_buffers_from_map_;

std::vector<ValueRef> ref_mapping_;

public:
explicit GraphBuilder(
ComputeGraph* compute_graph,
VkGraphPtr flatbuffer,
const uint8_t* constant_data)
const uint8_t* constant_data,
const NamedDataMap* named_data_map)
: compute_graph_(compute_graph),
flatbuffer_(flatbuffer),
constant_data_(constant_data),
named_data_map_(named_data_map),
loaded_buffers_from_map_(),
ref_mapping_() {}

void resize(uint32_t size) {
Expand Down Expand Up @@ -212,10 +211,27 @@ class GraphBuilder {

ValueRef ref;
if (tensor_fb->constant_id() >= 0) {
const uint8_t* tensor_data = get_constant_data_ptr(
flatbuffer_, tensor_fb->constant_id(), constant_data_);
VkBytesPtr constant_bytes =
flatbuffer_->constants()->Get(tensor_fb->constant_id());

ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
if (constant_bytes->named_key() != nullptr &&
constant_bytes->offset() == UINT64_MAX &&
named_data_map_ != nullptr) {
const std::string& data_name = constant_bytes->named_key()->str();
Result<FreeableBuffer> buffer =
named_data_map_->get_data(data_name.c_str());

VK_CHECK_COND(
buffer.ok(),
"Failed to get constant data for key %s from named_data_map. Error code: %u",
data_name.c_str(),
static_cast<uint32_t>(buffer.error()));
ref = compute_graph_->add_tensorref(
dims_vector, dtype, std::move(buffer.get()));
} else {
const uint8_t* tensor_data = constant_data_ + constant_bytes->offset();
ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
}
} else {
ref = compute_graph_->add_tensor(
dims_vector,
Expand Down Expand Up @@ -479,8 +495,10 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
return true;
}

ET_NODISCARD Error
compileModel(const void* buffer_pointer, ComputeGraph* compute_graph) const {
ET_NODISCARD Error compileModel(
const void* buffer_pointer,
ComputeGraph* compute_graph,
const NamedDataMap* named_data_map) const {
Result<VulkanDelegateHeader> header =
VulkanDelegateHeader::parse(buffer_pointer);

Expand All @@ -506,7 +524,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {

VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data);

GraphBuilder builder(compute_graph, flatbuffer_graph, constant_data);
GraphBuilder builder(
compute_graph, flatbuffer_graph, constant_data, named_data_map);

builder.build_graph();

Expand All @@ -532,7 +551,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
graph_config.external_adapter = vkapi::set_and_get_external_adapter();
new (compute_graph) ComputeGraph(graph_config);

Error err = compileModel(processed->data(), compute_graph);
const NamedDataMap* named_data_map = context.get_named_data_map();
Error err = compileModel(processed->data(), compute_graph, named_data_map);

// This backend does not need its processed data after compiling the
// model.
Expand Down
22 changes: 22 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,16 @@ VkMemoryRequirements vTensor::get_memory_requirements() const {
return {};
}

bool vTensor::memory_is_bound() const {
switch (storage_type()) {
case utils::kBuffer:
return storage_->buffer_.has_memory();
case utils::kTexture2D:
case utils::kTexture3D:
return storage_->image_.has_memory();
}
}

void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
switch (storage_type()) {
case utils::kBuffer:
Expand All @@ -909,6 +919,18 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
}
}

void vTensor::acquire_allocation(vkapi::Allocation&& allocation) {
switch (storage_type()) {
case utils::kBuffer:
storage_->buffer_.acquire_allocation(std::move(allocation));
break;
case utils::kTexture2D:
case utils::kTexture3D:
storage_->image_.acquire_allocation(std::move(allocation));
break;
}
}

void vTensor::update_metadata() {
numel_ = utils::multiply_integers(sizes_);
strides_ = calculate_strides(sizes_, dim_order_);
Expand Down
11 changes: 11 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,12 @@ class vTensor final {
*/
VmaAllocationCreateInfo get_allocation_create_info() const;

/*
* Checks if the tensor's underlying buffer or image resource is bound to a
* memory allocation.
*/
bool memory_is_bound() const;

/*
* Return the VkMemoryRequirements of the underlying resource
*/
Expand All @@ -570,6 +576,11 @@ class vTensor final {
*/
void bind_allocation(const vkapi::Allocation& allocation);

/*
* Binds and acquires a rvalue memory allocation
*/
void acquire_allocation(vkapi::Allocation&& allocation);

private:
/*
* Assuming sizes, dim order, or axis mapping was modified, recompute all
Expand Down
75 changes: 52 additions & 23 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,6 @@ ValueRef ComputeGraph::add_tensor(
const utils::GPUMemoryLayout memory_layout,
const int64_t shared_object_idx,
const utils::AxisMapLayout axis_map_layout) {
bool allocate_memory = shared_object_idx < 0;

ValueRef idx(static_cast<int>(values_.size()));
check_no_active_value_ptrs();
values_.emplace_back(api::vTensor(
Expand All @@ -366,10 +364,10 @@ ValueRef ComputeGraph::add_tensor(
dtype,
storage_type,
memory_layout,
allocate_memory,
false,
axis_map_layout));

if (!allocate_memory) {
if (shared_object_idx >= 0) {
get_shared_object(shared_object_idx).add_user(this, idx);
}
return idx;
Expand Down Expand Up @@ -480,6 +478,17 @@ ValueRef ComputeGraph::add_tensorref(
return idx;
}

ValueRef ComputeGraph::add_tensorref(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
executorch::runtime::FreeableBuffer&& buffer) {
ValueRef idx(static_cast<int>(values_.size()));
check_no_active_value_ptrs();
values_.emplace_back(TensorRef(sizes, dtype, std::move(buffer)));
total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes();
return idx;
}

ValueRef ComputeGraph::add_staging(
const vkapi::ScalarType dtype,
const size_t numel) {
Expand Down Expand Up @@ -615,6 +624,17 @@ SharedObject& ComputeGraph::get_shared_object(const int64_t idx) {
return shared_objects_.at(idx);
}

void ComputeGraph::create_dedicated_allocation_for(const ValueRef idx) {
vTensorPtr tensor = get_tensor(idx);
if (!tensor->memory_is_bound()) {
VmaAllocationCreateInfo alloc_create_info =
context()->adapter_ptr()->vma().gpuonly_resource_create_info();
tensor->acquire_allocation(
context()->adapter_ptr()->vma().create_allocation(
tensor->get_memory_requirements(), alloc_create_info));
}
}

void ComputeGraph::update_descriptor_counts(
const vkapi::ShaderInfo& shader_info,
bool execute) {
Expand Down Expand Up @@ -813,25 +833,8 @@ void ComputeGraph::prepare() {
context_->initialize_querypool();
}

for (SharedObject& shared_object : shared_objects_) {
shared_object.allocate(this);
shared_object.bind_users(this);
}
}

void ComputeGraph::prepare_pipelines() {
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
node->prepare_pipelines(this);
}
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
node->prepare_pipelines(this);
}
context_->pipeline_cache().create_pipelines(pipeline_descriptors_);

pipeline_descriptors_ = std::unordered_set<
vkapi::ComputePipelineCache::Key,
vkapi::ComputePipelineCache::Hasher>();

// Calculate the threshold at which a new command buffer should be created
// during execute()
const size_t total_node_count = execute_nodes_.size();
size_t init_threshold = config_.execute_initial_threshold_node_count;
size_t count_threshold = config_.execute_threshold_node_count;
Expand Down Expand Up @@ -860,6 +863,20 @@ void ComputeGraph::prepare_pipelines() {
execute_threshold_node_count_ = count_threshold;
}

void ComputeGraph::prepare_pipelines() {
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
node->prepare_pipelines(this);
}
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
node->prepare_pipelines(this);
}
context_->pipeline_cache().create_pipelines(pipeline_descriptors_);

pipeline_descriptors_ = std::unordered_set<
vkapi::ComputePipelineCache::Key,
vkapi::ComputePipelineCache::Hasher>();
}

void ComputeGraph::submit_current_cmd(const bool final_use) {
context_->submit_cmd_to_gpu(VK_NULL_HANDLE, final_use);
}
Expand Down Expand Up @@ -939,6 +956,18 @@ void ComputeGraph::prepack() {
submit_current_cmd_and_wait(/*final_use=*/true);
context_->flush();
staging_nbytes_in_cmd_ = 0;

// Initialize allocations for intermediate tensors
for (SharedObject& shared_object : shared_objects_) {
shared_object.allocate(this);
shared_object.bind_users(this);
}
// Make sure all remaining tensors have allocations
for (int i = 0; i < values_.size(); i++) {
if (values_.at(i).isTensor()) {
create_dedicated_allocation_for(i);
}
}
}

void ComputeGraph::execute() {
Expand Down
17 changes: 17 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,16 @@ class ComputeGraph final {
const vkapi::ScalarType dtype,
const void* const data);

/*
* Add a `TensorRef` value to the graph with the specific properties. A
* `TensorRef` is a reference to a `api::vTensor` whose data is stored in a
* FreeableBuffer. The TensorRef will take ownership of the FreeableBuffer.
*/
ValueRef add_tensorref(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
executorch::runtime::FreeableBuffer&& buffer);

/*
* Add a staging buffer to the graph. Staging buffers are data buffers that
* use memory that is visible to both the CPU and GPU, and therefore is used
Expand Down Expand Up @@ -817,6 +827,13 @@ class ComputeGraph final {

SharedObject& get_shared_object(const int64_t idx);

/*
* Creates a dedicated memory allocation for a vTensor value, and have the
* tensor acquire the allocation object. If the tensor is already bound to a
* memory allocation, this function will be a no-op.
*/
void create_dedicated_allocation_for(const ValueRef idx);

//
// Graph Preparation
//
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/Logging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void ComputeGraph::print_readable() {
ss << v_tensor.sizes();
std::cout << ss.str();
} else if (val.isTensorRef()) {
const TensorRef tensor_ref = val.toTensorRef();
const TensorRef& tensor_ref = val.toTensorRef();
std::stringstream ss;
ss << tensor_ref.sizes;
std::cout << ss.str();
Expand Down
17 changes: 16 additions & 1 deletion backends/vulkan/runtime/graph/containers/Constant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,22 @@ TensorRef::TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
const void* const t_data)
: sizes{}, dtype{t_dtype}, data{t_data} {
: sizes{}, dtype{t_dtype}, data{t_data}, buffer{} {
size_t ndim = t_sizes.size();
sizes.resize(ndim);
for (int i = 0; i < ndim; ++i) {
sizes[i] = t_sizes.at(i);
}
}

TensorRef::TensorRef(
const std::vector<int64_t>& t_sizes,
vkapi::ScalarType t_dtype,
executorch::runtime::FreeableBuffer&& t_buffer)
: sizes{},
dtype{t_dtype},
data{t_buffer.data()},
buffer{std::move(t_buffer)} {
size_t ndim = t_sizes.size();
sizes.resize(ndim);
for (int i = 0; i < ndim; ++i) {
Expand Down
Loading
Loading