diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 92e310d36de..900854ccd75 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -658,66 +658,77 @@ utils::GPUMemoryLayout vTensor::estimate_memory_layout() const { } const vkapi::BufferBindInfo vTensor::sizes_ubo() { + const size_t size_per_ubo = context()->adapter_ptr()->min_ubo_alignment(); + const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo; if (!uniforms_.buffer()) { - uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize, true); + uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true); } if (sizes_uniform_offset_ == kUniformOffsetUnset) { VK_CHECK_COND( - (uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize, + (uniforms_size_ + size_per_ubo) <= max_ubo_size, "Uniform data allocation has exceeded Tensor uniform buffer size"); sizes_uniform_offset_ = uniforms_size_; - uniforms_size_ += kSizePerUniform; + uniforms_size_ += size_per_ubo; uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_); } - return vkapi::BufferBindInfo(uniforms_.buffer(), sizes_uniform_offset_); + return vkapi::BufferBindInfo( + uniforms_.buffer(), sizes_uniform_offset_, size_per_ubo); } const vkapi::BufferBindInfo vTensor::strides_ubo() { + const size_t size_per_ubo = context()->adapter_ptr()->min_ubo_alignment(); + const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo; if (!uniforms_.buffer()) { - uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize, true); + uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true); } if (unsqueezed_strides_offset_ == kUniformOffsetUnset) { VK_CHECK_COND( - (uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize, + (uniforms_size_ + size_per_ubo) <= max_ubo_size, "Uniform data allocation has exceeded Tensor uniform buffer size"); unsqueezed_strides_offset_ = uniforms_size_; - uniforms_size_ += kSizePerUniform; + uniforms_size_ += size_per_ubo; uniforms_.update( utils::make_whcn_ivec4(unsqueezed_strides_), unsqueezed_strides_offset_); } - return vkapi::BufferBindInfo(uniforms_.buffer(), unsqueezed_strides_offset_); + return vkapi::BufferBindInfo( + uniforms_.buffer(), unsqueezed_strides_offset_, size_per_ubo); } const vkapi::BufferBindInfo vTensor::logical_limits_ubo() { + const size_t size_per_ubo = context()->adapter_ptr()->min_ubo_alignment(); + const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo; if (!uniforms_.buffer()) { - uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize, true); + uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true); } if (logical_limits_uniform_offset_ == kUniformOffsetUnset) { VK_CHECK_COND( - (uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize, + (uniforms_size_ + size_per_ubo) <= max_ubo_size, "Uniform data allocation has exceeded Tensor uniform buffer size"); logical_limits_uniform_offset_ = uniforms_size_; - uniforms_size_ += kSizePerUniform; + uniforms_size_ += size_per_ubo; uniforms_.update(logical_limits(), logical_limits_uniform_offset_); } return vkapi::BufferBindInfo( - uniforms_.buffer(), logical_limits_uniform_offset_); + uniforms_.buffer(), logical_limits_uniform_offset_, size_per_ubo); } const vkapi::BufferBindInfo vTensor::numel_ubo() { + const size_t size_per_ubo = context()->adapter_ptr()->min_ubo_alignment(); + const size_t max_ubo_size = kMaxMetadataFieldCount * size_per_ubo; if (!uniforms_.buffer()) { - uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize, true); + uniforms_ = ParamsBuffer(storage_.context_, max_ubo_size, true); } if (numel_uniform_offset_ == kUniformOffsetUnset) { VK_CHECK_COND( - (uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize, + (uniforms_size_ + size_per_ubo) <= max_ubo_size, "Uniform data allocation has exceeded Tensor uniform buffer size"); numel_uniform_offset_ = uniforms_size_; - uniforms_size_ += kSizePerUniform; + uniforms_size_ += size_per_ubo; uniforms_.update(numel(), numel_uniform_offset_); } - return vkapi::BufferBindInfo(uniforms_.buffer(), numel_uniform_offset_); + return vkapi::BufferBindInfo( + uniforms_.buffer(), numel_uniform_offset_, size_per_ubo); } size_t vTensor::staging_buffer_numel() const { diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index 3e51be6f948..49d5fcd36a3 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -348,16 +348,13 @@ class vTensor final { uint32_t numel_uniform_offset_; uint32_t logical_limits_uniform_offset_; - // Size allocated for each uniform - // each uniform is assumed to be a vec of 4 ints to maintain 16 byte alignemnt - constexpr static size_t kSizePerUniform = sizeof(utils::ivec4); - // Total size of tensor's uniform buffer - constexpr static size_t kMaxUniformBufferSize = - 4 * // we have 4 uniforms that are passed on to shaders - kSizePerUniform; - - // Initial value of uniform buffer offsets - constexpr static uint32_t kUniformOffsetUnset = kMaxUniformBufferSize; + // Maximum number of metadata fields that can be stored in the metadata UBO. + // This is used to calculate the size of the UBO that should be allocated. + constexpr static size_t kMaxMetadataFieldCount = 4; + + // Initial value of uniform buffer offsets. 1 is selected as it is essentially + // impossible for a ubo to have an offset of 1. + constexpr static uint32_t kUniformOffsetUnset = 1; vTensorStorage storage_; diff --git a/backends/vulkan/runtime/vk_api/Adapter.h b/backends/vulkan/runtime/vk_api/Adapter.h index 0deea81a7f3..be0554161d3 100644 --- a/backends/vulkan/runtime/vk_api/Adapter.h +++ b/backends/vulkan/runtime/vk_api/Adapter.h @@ -207,6 +207,10 @@ class Adapter final { return supports_8bit_storage_buffers() && supports_int8_shader_types(); } + inline size_t min_ubo_alignment() const { + return physical_device_.min_ubo_alignment; + } + // Command Buffer Submission void diff --git a/backends/vulkan/runtime/vk_api/Descriptor.cpp b/backends/vulkan/runtime/vk_api/Descriptor.cpp index 956711bccc0..b42ade6ea02 100644 --- a/backends/vulkan/runtime/vk_api/Descriptor.cpp +++ b/backends/vulkan/runtime/vk_api/Descriptor.cpp @@ -28,7 +28,17 @@ BufferBindInfo::BufferBindInfo( const uint32_t offset_p) : handle(buffer_p.handle()), offset(buffer_p.mem_offset() + offset_p), - range(buffer_p.mem_range()) {} + range(buffer_p.mem_range() - offset_p) {} + +BufferBindInfo::BufferBindInfo( + const VulkanBuffer& buffer_p, + const uint32_t offset_p, + const uint32_t range_p) + : handle(buffer_p.handle()), + offset(buffer_p.mem_offset() + offset_p), + range(range_p) { + VK_CHECK_COND(range_p <= (buffer_p.mem_range() - offset_p)); +} // // ParamsBindList diff --git a/backends/vulkan/runtime/vk_api/Descriptor.h b/backends/vulkan/runtime/vk_api/Descriptor.h index 38401f2243d..60d66a22619 100644 --- a/backends/vulkan/runtime/vk_api/Descriptor.h +++ b/backends/vulkan/runtime/vk_api/Descriptor.h @@ -34,6 +34,10 @@ struct BufferBindInfo final { BufferBindInfo(); BufferBindInfo(const VulkanBuffer& buffer_p, const uint32_t offset_p = 0u); + BufferBindInfo( + const VulkanBuffer& buffer_p, + const uint32_t offset_p, + const uint32_t range_p); }; struct ParamsBindList final { diff --git a/backends/vulkan/runtime/vk_api/Device.cpp b/backends/vulkan/runtime/vk_api/Device.cpp index 21769c6a70e..c4119e04b78 100644 --- a/backends/vulkan/runtime/vk_api/Device.cpp +++ b/backends/vulkan/runtime/vk_api/Device.cpp @@ -39,10 +39,17 @@ PhysicalDevice::PhysicalDevice(VkPhysicalDevice physical_device_handle) num_compute_queues(0), supports_int16_shader_types(false), has_unified_memory(false), - has_timestamps(properties.limits.timestampComputeAndGraphics), - timestamp_period(properties.limits.timestampPeriod) { + has_timestamps(false), + timestamp_period(0), + min_ubo_alignment(0) { // Extract physical device properties vkGetPhysicalDeviceProperties(handle, &properties); + + // Extract fields of interest + has_timestamps = properties.limits.timestampComputeAndGraphics; + timestamp_period = properties.limits.timestampPeriod; + min_ubo_alignment = properties.limits.minUniformBufferOffsetAlignment; + vkGetPhysicalDeviceMemoryProperties(handle, &memory_properties); VkPhysicalDeviceFeatures2 features2{ diff --git a/backends/vulkan/runtime/vk_api/Device.h b/backends/vulkan/runtime/vk_api/Device.h index d883cfb7041..70d5b1db5af 100644 --- a/backends/vulkan/runtime/vk_api/Device.h +++ b/backends/vulkan/runtime/vk_api/Device.h @@ -49,6 +49,7 @@ struct PhysicalDevice final { bool has_unified_memory; bool has_timestamps; float timestamp_period; + size_t min_ubo_alignment; explicit PhysicalDevice(VkPhysicalDevice); };