Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions backends/vulkan/runtime/api/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,13 @@ void Context::report_shader_dispatch_end() {
vkapi::DescriptorSet Context::get_descriptor_set(
const vkapi::ShaderInfo& shader_descriptor,
const utils::uvec3& local_workgroup_size,
const vkapi::SpecVarList& additional_constants) {
const vkapi::SpecVarList& additional_constants,
const uint32_t push_constants_size) {
VkDescriptorSetLayout shader_layout =
shader_layout_cache().retrieve(shader_descriptor.kernel_layout);

VkPipelineLayout pipeline_layout =
pipeline_layout_cache().retrieve(shader_layout);
pipeline_layout_cache().retrieve(shader_layout, push_constants_size);

vkapi::SpecVarList spec_constants = {
SV(local_workgroup_size[0u]),
Expand All @@ -105,7 +106,7 @@ vkapi::DescriptorSet Context::get_descriptor_set(
spec_constants.append(additional_constants);

VkPipeline pipeline = pipeline_cache().retrieve(
{pipeline_layout_cache().retrieve(shader_layout),
{pipeline_layout_cache().retrieve(shader_layout, push_constants_size),
shader_cache().retrieve(shader_descriptor),
spec_constants});

Expand Down Expand Up @@ -151,7 +152,7 @@ void Context::register_shader_dispatch(
const VkDescriptorSetLayout shader_layout =
shader_layout_cache().retrieve(shader_descriptor.kernel_layout);
const VkPipelineLayout pipeline_layout =
pipeline_layout_cache().retrieve(shader_layout);
pipeline_layout_cache().retrieve(shader_layout, push_constants_size);
cmd_.set_push_constants(
pipeline_layout, push_constants_data, push_constants_size);
}
Expand Down
9 changes: 6 additions & 3 deletions backends/vulkan/runtime/api/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,13 @@ class Context final {
vkapi::DescriptorSet get_descriptor_set(
const vkapi::ShaderInfo&,
const utils::uvec3&,
const vkapi::SpecVarList&);
const vkapi::SpecVarList&,
const uint32_t push_constants_size);

inline vkapi::DescriptorSet get_descriptor_set(
const vkapi::ShaderInfo& shader_descriptor,
const utils::uvec3& local_work_group_size) {
return get_descriptor_set(shader_descriptor, local_work_group_size, {});
return get_descriptor_set(shader_descriptor, local_work_group_size, {}, 0u);
}

void register_shader_dispatch(
Expand Down Expand Up @@ -333,8 +334,10 @@ inline bool Context::submit_compute_job(
dispatch_id);

// Factor out template parameter independent code to minimize code bloat.
// Note that push constants are not exposed yet via this API, therefore the
// push constants size is assumed to be 0.
vkapi::DescriptorSet descriptor_set = get_descriptor_set(
shader, local_work_group_size, specialization_constants);
shader, local_work_group_size, specialization_constants, 0u);

detail::bind(
descriptor_set,
Expand Down
5 changes: 3 additions & 2 deletions backends/vulkan/runtime/api/containers/ParamsBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ class ParamsBuffer final {
vulkan_buffer_(
context_p_->adapter_ptr()->vma().create_params_buffer(block)) {}

template <typename Block>
ParamsBuffer(Context* context_p, const VkDeviceSize nbytes)
// The last bool argument, though unused, is required to disambiguate this
// constructor from the one above.
ParamsBuffer(Context* context_p, const VkDeviceSize nbytes, const bool unused)
: context_p_(context_p),
vulkan_buffer_(
context_p_->adapter_ptr()->vma().create_uniform_buffer(nbytes)) {}
Expand Down
43 changes: 27 additions & 16 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,66 +658,77 @@ utils::GPUMemoryLayout vTensor::estimate_memory_layout() const {
}

const vkapi::BufferBindInfo vTensor::sizes_ubo() {
const size_t size_per_ubo = context()->adapter_ptr()->min_ubo_alignment();
const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo;
if (!uniforms_.buffer()) {
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true);
}
if (sizes_uniform_offset_ == kUniformOffsetUnset) {
VK_CHECK_COND(
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
(uniforms_size_ + size_per_ubo) <= max_ubo_size,
"Uniform data allocation has exceeded Tensor uniform buffer size");
sizes_uniform_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_size_ += size_per_ubo;
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
}
return vkapi::BufferBindInfo(uniforms_.buffer(), sizes_uniform_offset_);
return vkapi::BufferBindInfo(
uniforms_.buffer(), sizes_uniform_offset_, size_per_ubo);
}

const vkapi::BufferBindInfo vTensor::strides_ubo() {
const size_t size_per_ubo = context()->adapter_ptr()->min_ubo_alignment();
const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo;
if (!uniforms_.buffer()) {
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true);
}
if (unsqueezed_strides_offset_ == kUniformOffsetUnset) {
VK_CHECK_COND(
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
(uniforms_size_ + size_per_ubo) <= max_ubo_size,
"Uniform data allocation has exceeded Tensor uniform buffer size");
unsqueezed_strides_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_size_ += size_per_ubo;
uniforms_.update(
utils::make_whcn_ivec4(unsqueezed_strides_),
unsqueezed_strides_offset_);
}
return vkapi::BufferBindInfo(uniforms_.buffer(), unsqueezed_strides_offset_);
return vkapi::BufferBindInfo(
uniforms_.buffer(), unsqueezed_strides_offset_, size_per_ubo);
}

const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
const size_t size_per_ubo = context()->adapter_ptr()->min_ubo_alignment();
const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo;
if (!uniforms_.buffer()) {
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true);
}
if (logical_limits_uniform_offset_ == kUniformOffsetUnset) {
VK_CHECK_COND(
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
(uniforms_size_ + size_per_ubo) <= max_ubo_size,
"Uniform data allocation has exceeded Tensor uniform buffer size");
logical_limits_uniform_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_size_ += size_per_ubo;
uniforms_.update(logical_limits(), logical_limits_uniform_offset_);
}
return vkapi::BufferBindInfo(
uniforms_.buffer(), logical_limits_uniform_offset_);
uniforms_.buffer(), logical_limits_uniform_offset_, size_per_ubo);
}

const vkapi::BufferBindInfo vTensor::numel_ubo() {
const size_t size_per_ubo = context()->adapter_ptr()->min_ubo_alignment();
const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo;
if (!uniforms_.buffer()) {
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true);
}
if (numel_uniform_offset_ == kUniformOffsetUnset) {
VK_CHECK_COND(
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
(uniforms_size_ + size_per_ubo) <= max_ubo_size,
"Uniform data allocation has exceeded Tensor uniform buffer size");
numel_uniform_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_size_ += size_per_ubo;
uniforms_.update(numel(), numel_uniform_offset_);
}
return vkapi::BufferBindInfo(uniforms_.buffer(), numel_uniform_offset_);
return vkapi::BufferBindInfo(
uniforms_.buffer(), numel_uniform_offset_, size_per_ubo);
}

size_t vTensor::staging_buffer_numel() const {
Expand Down
17 changes: 7 additions & 10 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,16 +348,13 @@ class vTensor final {
uint32_t numel_uniform_offset_;
uint32_t logical_limits_uniform_offset_;

// Size allocated for each uniform
// each uniform is assumed to be a vec of 4 ints to maintain 16 byte alignemnt
constexpr static size_t kSizePerUniform = sizeof(utils::ivec4);
// Total size of tensor's uniform buffer
constexpr static size_t kMaxUniformBufferSize =
4 * // we have 4 uniforms that are passed on to shaders
kSizePerUniform;

// Initial value of uniform buffer offsets
constexpr static uint32_t kUniformOffsetUnset = kMaxUniformBufferSize;
// Maximum number of metadata fields that can be stored in the metadata UBO.
// This is used to calculate the size of the UBO that should be allocated.
constexpr static size_t kMaxMetadataFieldCount = 4;

// Initial value of uniform buffer offsets. 1 is selected as it is essentially
// impossible for a ubo to have an offset of 1.
constexpr static uint32_t kUniformOffsetUnset = 1;

vTensorStorage storage_;

Expand Down
23 changes: 12 additions & 11 deletions backends/vulkan/runtime/graph/ops/DispatchNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,30 +60,31 @@ void DispatchNode::encode(ComputeGraph* graph) {

std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();

std::array<uint8_t, kMaxPushConstantSize> push_constants_data;
uint32_t push_constants_offset = 0;

for (const auto& push_constant : push_constants_) {
push_constants_offset += push_constant.write(
push_constants_data.data(),
push_constants_offset,
kMaxPushConstantSize);
}

context->report_shader_dispatch_start(
shader_.kernel_name,
global_workgroup_size_,
local_workgroup_size_,
node_id_);

vkapi::DescriptorSet descriptor_set =
context->get_descriptor_set(shader_, local_workgroup_size_, spec_vars_);
vkapi::DescriptorSet descriptor_set = context->get_descriptor_set(
shader_, local_workgroup_size_, spec_vars_, push_constants_offset);

uint32_t idx = 0;
idx = bind_values_to_descriptor_set(
graph, args_, pipeline_barrier, descriptor_set, idx);

bind_params_to_descriptor_set(params_, descriptor_set, idx);

std::array<uint8_t, kMaxPushConstantSize> push_constants_data;
uint32_t push_constants_offset = 0;

for (const auto& push_constant : push_constants_) {
push_constants_offset += push_constant.write(
push_constants_data.data(),
push_constants_offset,
kMaxPushConstantSize);
}
context->register_shader_dispatch(
descriptor_set,
pipeline_barrier,
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ void PrepackNode::encode(ComputeGraph* graph) {

{
vkapi::PipelineBarrier pipeline_barrier{};
vkapi::DescriptorSet descriptor_set =
context->get_descriptor_set(shader_, local_workgroup_size_, spec_vars_);
vkapi::DescriptorSet descriptor_set = context->get_descriptor_set(
shader_, local_workgroup_size_, spec_vars_, 0u);

uint32_t idx = 0;
bind_tensor_to_descriptor_set(
Expand Down
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/vk_api/Adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ class Adapter final {
return supports_8bit_storage_buffers() && supports_int8_shader_types();
}

inline size_t min_ubo_alignment() const {
return physical_device_.min_ubo_alignment;
}

// Command Buffer Submission

void
Expand Down
12 changes: 11 additions & 1 deletion backends/vulkan/runtime/vk_api/Descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@ BufferBindInfo::BufferBindInfo(
const uint32_t offset_p)
: handle(buffer_p.handle()),
offset(buffer_p.mem_offset() + offset_p),
range(buffer_p.mem_range()) {}
range(buffer_p.mem_range() - offset_p) {}

BufferBindInfo::BufferBindInfo(
const VulkanBuffer& buffer_p,
const uint32_t offset_p,
const uint32_t range_p)
: handle(buffer_p.handle()),
offset(buffer_p.mem_offset() + offset_p),
range(range_p) {
VK_CHECK_COND(range_p <= (buffer_p.mem_range() - offset_p));
}

//
// ParamsBindList
Expand Down
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/vk_api/Descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ struct BufferBindInfo final {

BufferBindInfo();
BufferBindInfo(const VulkanBuffer& buffer_p, const uint32_t offset_p = 0u);
BufferBindInfo(
const VulkanBuffer& buffer_p,
const uint32_t offset_p,
const uint32_t range_p);
};

struct ParamsBindList final {
Expand Down
11 changes: 9 additions & 2 deletions backends/vulkan/runtime/vk_api/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,17 @@ PhysicalDevice::PhysicalDevice(VkPhysicalDevice physical_device_handle)
num_compute_queues(0),
supports_int16_shader_types(false),
has_unified_memory(false),
has_timestamps(properties.limits.timestampComputeAndGraphics),
timestamp_period(properties.limits.timestampPeriod) {
has_timestamps(false),
timestamp_period(0),
min_ubo_alignment(0) {
// Extract physical device properties
vkGetPhysicalDeviceProperties(handle, &properties);

// Extract fields of interest
has_timestamps = properties.limits.timestampComputeAndGraphics;
timestamp_period = properties.limits.timestampPeriod;
min_ubo_alignment = properties.limits.minUniformBufferOffsetAlignment;

vkGetPhysicalDeviceMemoryProperties(handle, &memory_properties);

VkPhysicalDeviceFeatures2 features2{
Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/runtime/vk_api/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct PhysicalDevice final {
bool has_unified_memory;
bool has_timestamps;
float timestamp_period;
size_t min_ubo_alignment;

explicit PhysicalDevice(VkPhysicalDevice);
};
Expand Down
31 changes: 25 additions & 6 deletions backends/vulkan/runtime/vk_api/Pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,17 +205,29 @@ bool operator==(const SpecVarList& lhs, const SpecVarList& rhs) {

PipelineLayout::PipelineLayout(
VkDevice device,
VkDescriptorSetLayout descriptor_layout)
VkDescriptorSetLayout descriptor_layout,
const uint32_t push_constants_size)
: device_(device), handle_{VK_NULL_HANDLE} {
// TODO: Enable push constants
VkPushConstantRange pc_range{
VK_SHADER_STAGE_COMPUTE_BIT, // stageFlags
0u, // offset
push_constants_size, // size
};
uint32_t num_push_constants = 0u;
VkPushConstantRange* pc_ranges_ptr = nullptr;
if (push_constants_size > 0u) {
num_push_constants = 1u;
pc_ranges_ptr = &pc_range;
}

const VkPipelineLayoutCreateInfo pipeline_layout_create_info{
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
nullptr, // pNext
0u, // flags
1u, // setLayoutCount
&descriptor_layout, // pSetLayouts
0u, // pushConstantRangeCount
nullptr, // pPushConstantRanges
num_push_constants, // pushConstantRangeCount
pc_ranges_ptr, // pPushConstantRanges
};

VK_CHECK(vkCreatePipelineLayout(
Expand Down Expand Up @@ -344,12 +356,19 @@ PipelineLayoutCache::~PipelineLayoutCache() {
}

VkPipelineLayout PipelineLayoutCache::retrieve(
const PipelineLayoutCache::Key& key) {
const VkDescriptorSetLayout layout,
const uint32_t push_constants_size) {
PipelineLayoutCache::Key key{layout, push_constants_size};
std::lock_guard<std::mutex> lock(cache_mutex_);

auto it = cache_.find(key);
if (cache_.cend() == it) {
it = cache_.insert({key, PipelineLayoutCache::Value(device_, key)}).first;
it = cache_
.insert(
{key,
PipelineLayoutCache::Value(
device_, layout, push_constants_size)})
.first;
}

return it->second.handle();
Expand Down
Loading