Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions backends/vulkan/runtime/VulkanBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,8 +507,7 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
compute_graph->prepare();
compute_graph->prepare_pipelines();

compute_graph->encode_prepack();
compute_graph->prepack();
compute_graph->run_prepack();

// If dynamic shapes are not expected, then the command buffer only needs to
// be encoded once. Otherwise, wait until the first inference to encode the
Expand Down
40 changes: 40 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ ComputeGraph::ComputeGraph(GraphConfig config)
execute_descriptor_counts_.descriptor_combined_sampler_count = 0;
execute_descriptor_counts_.descriptor_storage_image_count = 0;

#define MB (1024.0 * 1024.0)
// If certain graph config variables are not specified, then set them
// automatically.
if (config_.prepack_threshold_nbytes == 0) {
config_.prepack_threshold_nbytes = 20 * MB;
config_.prepack_initial_threshold_nbytes = 20 * MB;
}
#undef MB

context_->set_cmd(/*reusable = */ true);
}

Expand Down Expand Up @@ -750,6 +759,15 @@ void ComputeGraph::prepare_pipelines() {
vkapi::ComputePipelineCache::Hasher>();
}

void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
vkapi::VulkanFence fence = context_->fences().get_fence();
context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use);
fence.wait();
context_->fences().return_fence(fence);

context_->flush();
}

void ComputeGraph::encode_prepack() {
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
node->encode(this);
Expand All @@ -766,6 +784,28 @@ void ComputeGraph::prepack() const {
context_->flush();
}

void ComputeGraph::run_prepack() {
int i = 0;
bool submitted = false;
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
// Do not trigger on the first or last prepack node.
const bool not_terminal = i != 0 && i != (prepack_nodes_.size() - 1);
size_t threshold = submitted ? config_.prepack_threshold_nbytes
: config_.prepack_initial_threshold_nbytes;
if (not_terminal && staging_nbytes_in_cmd_ > threshold) {
submit_current_cmd_and_wait(/*final_use=*/true);
staging_nbytes_in_cmd_ = 0;
context_->set_cmd();
submitted = true;
}

node->encode(this);
i++;
}
submit_current_cmd_and_wait(/*final_use=*/true);
staging_nbytes_in_cmd_ = 0;
}

void ComputeGraph::encode_execute() {
context_->flush();
context_->set_cmd(/*reusable = */ true);
Expand Down
25 changes: 25 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ class ComputeGraph final {
size_t values_in_use_ = 0;
size_t execute_count_ = 0;

// Represents the amount of staging buffer data that will be copied if the
// current Context's command buffer is submitted now.
size_t staging_nbytes_in_cmd_ = 0;

public:
//
// Accessors
Expand Down Expand Up @@ -812,13 +816,34 @@ class ComputeGraph final {
copy_into_staging(const ValueRef idx, const void* data, const size_t numel);
void copy_from_staging(const ValueRef idx, void* data, const size_t numel);

protected:
// Command Buffer Management

/*
* Submits the current command buffer in the Context to the GPU for execution,
* and wait for it to complete before returning. This function will also flush
* the Context after execution.
*/
void submit_current_cmd_and_wait(const bool final_use = false);

public:
//
// Graph Prepacking
//

inline void update_staging_nbytes_in_cmd(const size_t staging_bytes) {
staging_nbytes_in_cmd_ += staging_bytes;
}

void encode_prepack();
void prepack() const;

/*
* Executes prepacking operations to transfer model weight data from the CPU
* to GPU.
*/
void run_prepack();

//
// Graph Execution
//
Expand Down
14 changes: 14 additions & 0 deletions backends/vulkan/runtime/graph/GraphConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ struct GraphConfig final {
// Whether or not the ComputeGraph should expect input shapes to be dynamic
bool expect_dynamic_shapes;

// Execution properties that determine specifics re: how command buffer
// submission is handled, etc. 0 means this field is not set.

// During prepacking, once this threshold is reached, submit the current
// command buffer for execution. This allows the work to be distributed over
// multiple command buffer submissions, which can improve model load
// performance and prevent crashes when loading large models.
size_t prepack_threshold_nbytes = 0;
// Threshold used for the first command buffer submission during prepacking.
// This can be set to be lower than prepack_submission_threshold_nbytes to
// submit a command buffer for execution earlier which can improve performance
// by taking more advantage of parallelism between the CPU and GPU.
size_t prepack_initial_threshold_nbytes = 0;

vkapi::Adapter* external_adapter;

// Generate a default graph config with pre-configured settings
Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
TensorRefPtr tref = graph->get_tref(tref_);
size_t numel = utils::multiply_integers(tref->sizes);
api::StagingBuffer staging(graph->context(), tref->dtype, numel);
graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
size_t nbytes = numel * vkapi::element_size(tref->dtype);
staging.copy_from(tref->data, nbytes);
return staging;
Expand Down
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/vk_api/memory/Buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ class VulkanBuffer final {
return buffer_properties_.size;
}

inline size_t mem_size_as_size_t() const {
return utils::safe_downcast<size_t>(mem_size());
}

inline bool has_memory() const {
return (memory_.allocation != VK_NULL_HANDLE);
}
Expand Down
Loading