Skip to content

Commit 4dfab3b

Browse files
author
ssjia
committed
[ET-VK][ez] Consolidate tensor metadata calculation + buffer binding code
Pull Request resolved: #13594 As title, this diff consolidates logic used to calculate flipped and unsqueezed tensor metadata as well as initializing + retrieving buffer bindings for UBOs that contain tensor metadata. @imported-using-ghimport Differential Revision: [D80800085](https://our.internmc.facebook.com/intern/diff/D80800085/) ghstack-source-id: 304941376
1 parent 48d88f2 commit 4dfab3b

File tree

3 files changed

+246
-194
lines changed

3 files changed

+246
-194
lines changed

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 68 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
namespace vkcompute {
1515
namespace api {
1616

17+
/*
18+
* Used to infer the sizes of a tensor that would correspond to a given
19+
* VulkanImage.
20+
*/
1721
std::vector<int64_t> calculate_sizes(
1822
const vkapi::VulkanImage& image,
1923
const utils::GPUMemoryLayout memory_layout) {
@@ -143,58 +147,19 @@ bool dim_order_is_valid(const std::vector<int64_t>& dim_order) {
143147
return sum == n * (n + 1) / 2;
144148
}
145149

146-
/*
147-
* Applies the following transformations to a tensor's dim_order vector:
148-
* 1. Reverse the order of elements so that the fastest moving dimensions are
149-
* first.
150-
* 2. Convert NCHW dimension indices to WHCN indices, so that 0 represents the
151-
* width dimension, 1 represents the height dimension, and 2 represents the
152-
* channels dimension.
153-
* 3. Unsqueeze the dim_order vector to the next multiple of 4.
154-
155-
* These transformations make it easier to use the dim order in a compute shader
156-
*/
157-
std::vector<int64_t> create_whcn_dim_order(
158-
const std::vector<int64_t>& dim_order) {
159-
size_t ndim = dim_order.size();
160-
std::vector<int64_t> whcn_order(ndim);
161-
162-
// Convert from NCHW to WHCN index, and flip the dim order so that the fastest
163-
// moving dimension is first.
164-
// example: { 1, 2, 0} -> { 2, 0, 1}
165-
// {height, width, channels} -> {channels, width, height}
166-
for (size_t whcn_i = 0, nchw_i = (ndim - 1); whcn_i < ndim;
167-
++whcn_i, --nchw_i) {
168-
whcn_order.at(whcn_i) = ndim - 1 - dim_order.at(nchw_i);
169-
}
170-
171-
// Unsqueeze to the next multiple of 4
172-
size_t ndim_up4 = utils::align_up_4(ndim);
173-
whcn_order.resize(ndim_up4);
174-
175-
// Append unsqueezed dimensions
176-
for (size_t i = ndim; i < ndim_up4; ++i) {
177-
whcn_order.at(i) = i;
178-
}
179-
180-
return whcn_order;
181-
}
182-
183-
std::vector<int64_t> unsqueeze_strides(
184-
const std::vector<int64_t>& strides,
185-
const int64_t numel) {
186-
const size_t ndim = strides.size();
187-
const size_t ndim_up4 = utils::align_up_4(strides.size());
188-
std::vector<int64_t> unsqueezed_strides(ndim_up4);
189-
for (int32_t i = 1; i <= ndim; ++i) {
190-
int64_t dim_stride = strides.at(ndim - i);
191-
unsqueezed_strides.at(ndim_up4 - i) = dim_stride;
192-
}
193-
194-
for (int32_t i = ndim + 1; i <= ndim_up4; ++i) {
195-
unsqueezed_strides.at(ndim_up4 - i) = numel;
196-
}
197-
return unsqueezed_strides;
150+
utils::ivec4 flip_and_unsqueeze_ivec4(
151+
const std::vector<int64_t>& tensor_metadata,
152+
const vTensor::Attribute metadata_type,
153+
const size_t numel) {
154+
VK_CHECK_COND(tensor_metadata.size() <= 4);
155+
std::vector<int32_t> flipped_metadata =
156+
flip_and_unsqueeze<int32_t>(tensor_metadata, metadata_type, numel);
157+
return {
158+
flipped_metadata.at(0),
159+
flipped_metadata.at(1),
160+
flipped_metadata.at(2),
161+
flipped_metadata.at(3),
162+
};
198163
}
199164

200165
std::vector<int64_t> calculate_padded_sizes(
@@ -309,7 +274,8 @@ int64_t calculate_gpu_buffer_numel(
309274
return numel;
310275
}
311276

312-
int32_t pack_into_int32(const std::vector<int64_t>& vec, const int32_t extra) {
277+
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>>
278+
int32_t pack_into_int32(const std::vector<T>& vec, const int32_t extra) {
313279
int32_t packed = static_cast<int32_t>(
314280
vec.at(0) + (vec.at(1) << 4) + (vec.at(2) << 8) + (vec.at(3) << 12) +
315281
(extra << 16));
@@ -322,7 +288,8 @@ int32_t create_hashed_layout(
322288
const int32_t packed_dim,
323289
const utils::StorageType storage_type) {
324290
if (storage_type == utils::kBuffer) {
325-
return pack_into_int32(create_whcn_dim_order(dim_order), 0);
291+
return pack_into_int32(
292+
flip_and_unsqueeze<int64_t>(dim_order, kTensorDimOrder, 0), 0);
326293
}
327294
return pack_into_int32(axis_map, packed_dim);
328295
}
@@ -595,8 +562,9 @@ vTensor::vTensor(
595562
packed_dim_,
596563
storage_type)),
597564
// Related to tensor metadata UBOs
598-
nbytes_per_ubo_{context->adapter_ptr()->min_ubo_alignment()},
599-
max_ubo_nbytes_{calculate_max_ubo_nbytes(nbytes_per_ubo_, storage_type)},
565+
min_nbytes_per_ubo_{context->adapter_ptr()->min_ubo_alignment()},
566+
max_ubo_nbytes_{
567+
calculate_max_ubo_nbytes(min_nbytes_per_ubo_, storage_type)},
600568
uniforms_(),
601569
// Construct Tensor storage
602570
storage_(std::make_shared<vTensorStorage>(
@@ -607,23 +575,13 @@ vTensor::vTensor(
607575
sizes,
608576
dtype_,
609577
allocate_memory)) {
610-
// Derived metadata
611-
std::vector<int64_t> whcn_dim_order(4, 0);
612-
std::vector<int64_t> unsqueezed_strides(4, 0);
613-
// Only calculate derived metadata if needed for the desired storage type.
614-
// Note that logical limits may be used by buffer storage as well in order to
615-
// set global work group sizes for some compute shaders.
616-
if (storage_type == utils::kBuffer) {
617-
whcn_dim_order = create_whcn_dim_order(dim_order_);
618-
unsqueezed_strides = unsqueeze_strides(strides_, numel_);
619-
}
620-
621578
uniform_data_ = std::make_shared<UniformData>(UniformData{
579+
numel_,
622580
sizes_,
623-
whcn_dim_order,
624-
unsqueezed_strides,
625-
calculate_logical_limits(storage_->image_extents_, axis_map_),
626-
numel_});
581+
dim_order_,
582+
strides_,
583+
calculate_logical_limits(storage_->image_extents_, axis_map_)});
584+
627585
VK_CHECK_COND(
628586
dim_order_is_valid(dim_order_), "computed dim order is invalid");
629587
}
@@ -648,18 +606,18 @@ vTensor::vTensor(
648606
packed_dim_,
649607
utils::kTexture3D)),
650608
// Related to tensor metadata UBOs
651-
nbytes_per_ubo_{context->adapter_ptr()->min_ubo_alignment()},
609+
min_nbytes_per_ubo_{context->adapter_ptr()->min_ubo_alignment()},
652610
max_ubo_nbytes_{
653-
calculate_max_ubo_nbytes(nbytes_per_ubo_, utils::kTexture3D)},
611+
calculate_max_ubo_nbytes(min_nbytes_per_ubo_, utils::kTexture3D)},
654612
uniforms_(),
655613
// Construct Tensor storage
656614
storage_(std::make_shared<vTensorStorage>(context, image)) {
657615
uniform_data_ = std::make_shared<UniformData>(UniformData{
616+
numel_,
658617
sizes_,
659618
{0, 0, 0, 0},
660619
{0, 0, 0, 0},
661-
calculate_logical_limits(storage_->image_extents_, axis_map_),
662-
numel_});
620+
calculate_logical_limits(storage_->image_extents_, axis_map_)});
663621
}
664622

665623
vTensor::vTensor(vTensor& other)
@@ -672,7 +630,7 @@ vTensor::vTensor(vTensor& other)
672630
strides_(other.strides_.begin(), other.strides_.end()),
673631
numel_(other.numel_),
674632
hashed_layout_(other.hashed_layout_),
675-
nbytes_per_ubo_{other.nbytes_per_ubo_},
633+
min_nbytes_per_ubo_{other.min_nbytes_per_ubo_},
676634
max_ubo_nbytes_{other.max_ubo_nbytes_},
677635
uniforms_(),
678636
// Copy Tensor storage
@@ -697,22 +655,35 @@ vTensor::vTensor(
697655
axis_map_,
698656
packed_dim_,
699657
other.storage_type())),
700-
nbytes_per_ubo_{other.nbytes_per_ubo_},
658+
min_nbytes_per_ubo_{other.min_nbytes_per_ubo_},
701659
max_ubo_nbytes_{other.max_ubo_nbytes_},
702660
uniforms_(),
703661
// Copy Tensor storage
704662
storage_(other.storage_) {
705663
uniform_data_ = std::make_shared<UniformData>(UniformData{
664+
static_cast<size_t>(utils::multiply_integers(sizes_)),
706665
sizes_,
707-
create_whcn_dim_order(dim_order_),
708-
unsqueeze_strides(strides_, numel_),
709-
other.logical_limits(),
710-
static_cast<size_t>(utils::multiply_integers(sizes_))});
666+
dim_order_,
667+
strides_,
668+
other.logical_limits()});
711669

712670
VK_CHECK_COND(
713671
dim_order_is_valid(dim_order_), "new dim order provided is invalid");
714672
}
715673

674+
vTensor::UniformData::UniformData(
675+
const size_t numel_ll,
676+
const std::vector<int64_t>& sizes,
677+
const std::vector<int64_t>& dim_order,
678+
const std::vector<int64_t>& strides,
679+
const utils::uvec3& limits)
680+
: numel(utils::safe_downcast<int32_t>(numel_ll)),
681+
sizes_v(flip_and_unsqueeze_ivec4(sizes, kTensorSizes, numel_ll)),
682+
dim_order_v(
683+
flip_and_unsqueeze_ivec4(dim_order, kTensorDimOrder, numel_ll)),
684+
strides_v(flip_and_unsqueeze_ivec4(strides, kTensorStrides, numel_ll)),
685+
logical_limits(limits) {}
686+
716687
uint32_t vTensor::UniformData::write_attribute(
717688
void* dst,
718689
const uint32_t dst_offset,
@@ -727,11 +698,11 @@ uint32_t vTensor::UniformData::write_attribute(
727698
return sizeof(member_name); \
728699
}
729700
switch (attr) {
701+
WRITE_ATTRIBUTE_CASE(NUMEL, numel);
730702
WRITE_ATTRIBUTE_CASE(SIZES, sizes_v);
731-
WRITE_ATTRIBUTE_CASE(WHCN_DIM_ORDER, whcn_dim_order_v);
703+
WRITE_ATTRIBUTE_CASE(WHCN_DIM_ORDER, dim_order_v);
732704
WRITE_ATTRIBUTE_CASE(STRIDES, strides_v);
733705
WRITE_ATTRIBUTE_CASE(LOGICAL_LIMITS, logical_limits);
734-
WRITE_ATTRIBUTE_CASE(NUMEL, numel);
735706
default:
736707
VK_THROW("Invalid Attribute");
737708
}
@@ -806,84 +777,25 @@ size_t vTensor::get_max_ubo_nbytes(const size_t nbytes_per_ubo) const {
806777
}
807778

808779
const vkapi::BufferBindInfo vTensor::sizes_ubo() {
809-
if (!uniforms_.buffer()) {
810-
uniforms_ = ParamsBuffer(storage_->context_, max_ubo_nbytes_, true);
811-
}
812-
if (sizes_uniform_offset_ == kUniformOffsetUnset) {
813-
VK_CHECK_COND(
814-
(uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
815-
"Uniform data allocation has exceeded Tensor uniform buffer size");
816-
sizes_uniform_offset_ = uniforms_size_;
817-
uniforms_size_ += nbytes_per_ubo_;
818-
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
819-
}
820-
return vkapi::BufferBindInfo(
821-
uniforms_.buffer(), sizes_uniform_offset_, nbytes_per_ubo_);
780+
return metadata_ubo_impl(&sizes_uniform_offset_, uniform_data_->sizes_v);
822781
}
823782

824783
const vkapi::BufferBindInfo vTensor::dim_order_ubo() {
825-
if (!uniforms_.buffer()) {
826-
uniforms_ = ParamsBuffer(storage_->context_, max_ubo_nbytes_, true);
827-
}
828-
if (dim_order_uniform_offset_ == kUniformOffsetUnset) {
829-
VK_CHECK_COND(
830-
(uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
831-
"Uniform data allocation has exceeded Tensor uniform buffer size");
832-
dim_order_uniform_offset_ = uniforms_size_;
833-
uniforms_size_ += nbytes_per_ubo_;
834-
uniforms_.update(
835-
uniform_data_->whcn_dim_order_v, dim_order_uniform_offset_);
836-
}
837-
return vkapi::BufferBindInfo(
838-
uniforms_.buffer(), dim_order_uniform_offset_, nbytes_per_ubo_);
784+
return metadata_ubo_impl(
785+
&dim_order_uniform_offset_, uniform_data_->dim_order_v);
839786
}
840787

841788
const vkapi::BufferBindInfo vTensor::strides_ubo() {
842-
if (!uniforms_.buffer()) {
843-
uniforms_ = ParamsBuffer(storage_->context_, max_ubo_nbytes_, true);
844-
}
845-
if (strides_uniform_offset == kUniformOffsetUnset) {
846-
VK_CHECK_COND(
847-
(uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
848-
"Uniform data allocation has exceeded Tensor uniform buffer size");
849-
strides_uniform_offset = uniforms_size_;
850-
uniforms_size_ += nbytes_per_ubo_;
851-
uniforms_.update(uniform_data_->strides_v, strides_uniform_offset);
852-
}
853-
return vkapi::BufferBindInfo(
854-
uniforms_.buffer(), strides_uniform_offset, nbytes_per_ubo_);
789+
return metadata_ubo_impl(&strides_uniform_offset, uniform_data_->strides_v);
855790
}
856791

857792
const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
858-
if (!uniforms_.buffer()) {
859-
uniforms_ = ParamsBuffer(storage_->context_, max_ubo_nbytes_, true);
860-
}
861-
if (logical_limits_uniform_offset_ == kUniformOffsetUnset) {
862-
VK_CHECK_COND(
863-
(uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
864-
"Uniform data allocation has exceeded Tensor uniform buffer size");
865-
logical_limits_uniform_offset_ = uniforms_size_;
866-
uniforms_size_ += nbytes_per_ubo_;
867-
uniforms_.update(logical_limits(), logical_limits_uniform_offset_);
868-
}
869-
return vkapi::BufferBindInfo(
870-
uniforms_.buffer(), logical_limits_uniform_offset_, nbytes_per_ubo_);
793+
return metadata_ubo_impl(
794+
&logical_limits_uniform_offset_, uniform_data_->logical_limits);
871795
}
872796

873797
const vkapi::BufferBindInfo vTensor::numel_ubo() {
874-
if (!uniforms_.buffer()) {
875-
uniforms_ = ParamsBuffer(storage_->context_, max_ubo_nbytes_, true);
876-
}
877-
if (numel_uniform_offset_ == kUniformOffsetUnset) {
878-
VK_CHECK_COND(
879-
(uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
880-
"Uniform data allocation has exceeded Tensor uniform buffer size");
881-
numel_uniform_offset_ = uniforms_size_;
882-
uniforms_size_ += nbytes_per_ubo_;
883-
uniforms_.update(numel(), numel_uniform_offset_);
884-
}
885-
return vkapi::BufferBindInfo(
886-
uniforms_.buffer(), numel_uniform_offset_, nbytes_per_ubo_);
798+
return metadata_ubo_impl(&numel_uniform_offset_, uniform_data_->numel);
887799
}
888800

889801
VkMemoryRequirements vTensor::get_memory_requirements() const {
@@ -936,22 +848,21 @@ void vTensor::update_metadata() {
936848
strides_ = calculate_strides(sizes_, dim_order_);
937849

938850
// Update uniform data if it has been modified
939-
uniform_data_->numel = numel_;
940-
uniform_data_->sizes_v = utils::make_whcn_ivec4(sizes_);
941-
uniform_data_->whcn_dim_order_v =
942-
utils::make_ivec4(create_whcn_dim_order(dim_order_));
943-
uniform_data_->strides_v =
944-
utils::make_whcn_ivec4(unsqueeze_strides(strides_, numel_));
945851
uniform_data_->numel = utils::safe_downcast<int32_t>(numel_);
852+
uniform_data_->sizes_v =
853+
flip_and_unsqueeze_ivec4(sizes_, kTensorSizes, numel_);
854+
uniform_data_->dim_order_v =
855+
flip_and_unsqueeze_ivec4(dim_order_, kTensorDimOrder, numel_);
856+
uniform_data_->strides_v =
857+
flip_and_unsqueeze_ivec4(strides_, kTensorStrides, numel_);
946858
uniform_data_->logical_limits.limits =
947859
calculate_logical_limits(sizes_, axis_map_, packed_dim_);
948860

949861
if (sizes_uniform_offset_ != kUniformOffsetUnset) {
950862
uniforms_.update(uniform_data_->sizes_v, sizes_uniform_offset_);
951863
}
952864
if (dim_order_uniform_offset_ != kUniformOffsetUnset) {
953-
uniforms_.update(
954-
uniform_data_->whcn_dim_order_v, dim_order_uniform_offset_);
865+
uniforms_.update(uniform_data_->dim_order_v, dim_order_uniform_offset_);
955866
}
956867
if (strides_uniform_offset != kUniformOffsetUnset) {
957868
uniforms_.update(uniform_data_->strides_v, strides_uniform_offset);

0 commit comments

Comments
 (0)