1414namespace vkcompute {
1515namespace api {
1616
17+ /*
18+ * Used to infer the sizes of a tensor that would correspond to a given
19+ * VulkanImage.
20+ */
1721std::vector<int64_t > calculate_sizes (
1822 const vkapi::VulkanImage& image,
1923 const utils::GPUMemoryLayout memory_layout) {
@@ -143,58 +147,19 @@ bool dim_order_is_valid(const std::vector<int64_t>& dim_order) {
143147 return sum == n * (n + 1 ) / 2 ;
144148}
145149
146- /*
147- * Applies the following transformations to a tensor's dim_order vector:
148- * 1. Reverse the order of elements so that the fastest moving dimensions are
149- * first.
150- * 2. Convert NCHW dimension indices to WHCN indices, so that 0 represents the
151- * width dimension, 1 represents the height dimension, and 2 represents the
152- * channels dimension.
153- * 3. Unsqueeze the dim_order vector to the next multiple of 4.
154-
155- * These transformations make it easier to use the dim order in a compute shader
156- */
157- std::vector<int64_t > create_whcn_dim_order (
158- const std::vector<int64_t >& dim_order) {
159- size_t ndim = dim_order.size ();
160- std::vector<int64_t > whcn_order (ndim);
161-
162- // Convert from NCHW to WHCN index, and flip the dim order so that the fastest
163- // moving dimension is first.
164- // example: { 1, 2, 0} -> { 2, 0, 1}
165- // {height, width, channels} -> {channels, width, height}
166- for (size_t whcn_i = 0 , nchw_i = (ndim - 1 ); whcn_i < ndim;
167- ++whcn_i, --nchw_i) {
168- whcn_order.at (whcn_i) = ndim - 1 - dim_order.at (nchw_i);
169- }
170-
171- // Unsqueeze to the next multiple of 4
172- size_t ndim_up4 = utils::align_up_4 (ndim);
173- whcn_order.resize (ndim_up4);
174-
175- // Append unsqueezed dimensions
176- for (size_t i = ndim; i < ndim_up4; ++i) {
177- whcn_order.at (i) = i;
178- }
179-
180- return whcn_order;
181- }
182-
183- std::vector<int64_t > unsqueeze_strides (
184- const std::vector<int64_t >& strides,
185- const int64_t numel) {
186- const size_t ndim = strides.size ();
187- const size_t ndim_up4 = utils::align_up_4 (strides.size ());
188- std::vector<int64_t > unsqueezed_strides (ndim_up4);
189- for (int32_t i = 1 ; i <= ndim; ++i) {
190- int64_t dim_stride = strides.at (ndim - i);
191- unsqueezed_strides.at (ndim_up4 - i) = dim_stride;
192- }
193-
194- for (int32_t i = ndim + 1 ; i <= ndim_up4; ++i) {
195- unsqueezed_strides.at (ndim_up4 - i) = numel;
196- }
197- return unsqueezed_strides;
150+ utils::ivec4 flip_and_unsqueeze_ivec4 (
151+ const std::vector<int64_t >& tensor_metadata,
152+ const vTensor::Attribute metadata_type,
153+ const size_t numel) {
154+ VK_CHECK_COND (tensor_metadata.size () <= 4 );
155+ std::vector<int32_t > flipped_metadata =
156+ flip_and_unsqueeze<int32_t >(tensor_metadata, metadata_type, numel);
157+ return {
158+ flipped_metadata.at (0 ),
159+ flipped_metadata.at (1 ),
160+ flipped_metadata.at (2 ),
161+ flipped_metadata.at (3 ),
162+ };
198163}
199164
200165std::vector<int64_t > calculate_padded_sizes (
@@ -309,7 +274,8 @@ int64_t calculate_gpu_buffer_numel(
309274 return numel;
310275}
311276
312- int32_t pack_into_int32 (const std::vector<int64_t >& vec, const int32_t extra) {
277+ template <typename T, typename = std::enable_if_t <std::is_integral<T>::value>>
278+ int32_t pack_into_int32 (const std::vector<T>& vec, const int32_t extra) {
313279 int32_t packed = static_cast <int32_t >(
314280 vec.at (0 ) + (vec.at (1 ) << 4 ) + (vec.at (2 ) << 8 ) + (vec.at (3 ) << 12 ) +
315281 (extra << 16 ));
@@ -322,7 +288,8 @@ int32_t create_hashed_layout(
322288 const int32_t packed_dim,
323289 const utils::StorageType storage_type) {
324290 if (storage_type == utils::kBuffer ) {
325- return pack_into_int32 (create_whcn_dim_order (dim_order), 0 );
291+ return pack_into_int32 (
292+ flip_and_unsqueeze<int64_t >(dim_order, kTensorDimOrder , 0 ), 0 );
326293 }
327294 return pack_into_int32 (axis_map, packed_dim);
328295}
@@ -595,8 +562,9 @@ vTensor::vTensor(
595562 packed_dim_,
596563 storage_type)),
597564 // Related to tensor metadata UBOs
598- nbytes_per_ubo_{context->adapter_ptr ()->min_ubo_alignment ()},
599- max_ubo_nbytes_{calculate_max_ubo_nbytes (nbytes_per_ubo_, storage_type)},
565+ min_nbytes_per_ubo_{context->adapter_ptr ()->min_ubo_alignment ()},
566+ max_ubo_nbytes_{
567+ calculate_max_ubo_nbytes (min_nbytes_per_ubo_, storage_type)},
600568 uniforms_ (),
601569 // Construct Tensor storage
602570 storage_(std::make_shared<vTensorStorage>(
@@ -607,23 +575,13 @@ vTensor::vTensor(
607575 sizes,
608576 dtype_,
609577 allocate_memory)) {
610- // Derived metadata
611- std::vector<int64_t > whcn_dim_order (4 , 0 );
612- std::vector<int64_t > unsqueezed_strides (4 , 0 );
613- // Only calculate derived metadata if needed for the desired storage type.
614- // Note that logical limits may be used by buffer storage as well in order to
615- // set global work group sizes for some compute shaders.
616- if (storage_type == utils::kBuffer ) {
617- whcn_dim_order = create_whcn_dim_order (dim_order_);
618- unsqueezed_strides = unsqueeze_strides (strides_, numel_);
619- }
620-
621578 uniform_data_ = std::make_shared<UniformData>(UniformData{
579+ numel_,
622580 sizes_,
623- whcn_dim_order ,
624- unsqueezed_strides ,
625- calculate_logical_limits (storage_->image_extents_ , axis_map_),
626- numel_});
581+ dim_order_ ,
582+ strides_ ,
583+ calculate_logical_limits (storage_->image_extents_ , axis_map_)});
584+
627585 VK_CHECK_COND (
628586 dim_order_is_valid (dim_order_), " computed dim order is invalid" );
629587}
@@ -648,18 +606,18 @@ vTensor::vTensor(
648606 packed_dim_,
649607 utils::kTexture3D )),
650608 // Related to tensor metadata UBOs
651- nbytes_per_ubo_ {context->adapter_ptr ()->min_ubo_alignment ()},
609+ min_nbytes_per_ubo_ {context->adapter_ptr ()->min_ubo_alignment ()},
652610 max_ubo_nbytes_{
653- calculate_max_ubo_nbytes (nbytes_per_ubo_ , utils::kTexture3D )},
611+ calculate_max_ubo_nbytes (min_nbytes_per_ubo_ , utils::kTexture3D )},
654612 uniforms_ (),
655613 // Construct Tensor storage
656614 storage_(std::make_shared<vTensorStorage>(context, image)) {
657615 uniform_data_ = std::make_shared<UniformData>(UniformData{
616+ numel_,
658617 sizes_,
659618 {0 , 0 , 0 , 0 },
660619 {0 , 0 , 0 , 0 },
661- calculate_logical_limits (storage_->image_extents_ , axis_map_),
662- numel_});
620+ calculate_logical_limits (storage_->image_extents_ , axis_map_)});
663621}
664622
665623vTensor::vTensor (vTensor& other)
@@ -672,7 +630,7 @@ vTensor::vTensor(vTensor& other)
672630 strides_(other.strides_.begin(), other.strides_.end()),
673631 numel_(other.numel_),
674632 hashed_layout_(other.hashed_layout_),
675- nbytes_per_ubo_ {other.nbytes_per_ubo_ },
633+ min_nbytes_per_ubo_ {other.min_nbytes_per_ubo_ },
676634 max_ubo_nbytes_{other.max_ubo_nbytes_ },
677635 uniforms_ (),
678636 // Copy Tensor storage
@@ -697,22 +655,35 @@ vTensor::vTensor(
697655 axis_map_,
698656 packed_dim_,
699657 other.storage_type())),
700- nbytes_per_ubo_ {other.nbytes_per_ubo_ },
658+ min_nbytes_per_ubo_ {other.min_nbytes_per_ubo_ },
701659 max_ubo_nbytes_{other.max_ubo_nbytes_ },
702660 uniforms_ (),
703661 // Copy Tensor storage
704662 storage_(other.storage_) {
705663 uniform_data_ = std::make_shared<UniformData>(UniformData{
664+ static_cast <size_t >(utils::multiply_integers (sizes_)),
706665 sizes_,
707- create_whcn_dim_order (dim_order_),
708- unsqueeze_strides (strides_, numel_),
709- other.logical_limits (),
710- static_cast <size_t >(utils::multiply_integers (sizes_))});
666+ dim_order_,
667+ strides_,
668+ other.logical_limits ()});
711669
712670 VK_CHECK_COND (
713671 dim_order_is_valid (dim_order_), " new dim order provided is invalid" );
714672}
715673
674+ vTensor::UniformData::UniformData (
675+ const size_t numel_ll,
676+ const std::vector<int64_t >& sizes,
677+ const std::vector<int64_t >& dim_order,
678+ const std::vector<int64_t >& strides,
679+ const utils::uvec3& limits)
680+ : numel(utils::safe_downcast<int32_t >(numel_ll)),
681+ sizes_v(flip_and_unsqueeze_ivec4(sizes, kTensorSizes , numel_ll)),
682+ dim_order_v(
683+ flip_and_unsqueeze_ivec4 (dim_order, kTensorDimOrder , numel_ll)),
684+ strides_v(flip_and_unsqueeze_ivec4(strides, kTensorStrides , numel_ll)),
685+ logical_limits(limits) {}
686+
716687uint32_t vTensor::UniformData::write_attribute (
717688 void * dst,
718689 const uint32_t dst_offset,
@@ -727,11 +698,11 @@ uint32_t vTensor::UniformData::write_attribute(
727698 return sizeof (member_name); \
728699 }
729700 switch (attr) {
701+ WRITE_ATTRIBUTE_CASE (NUMEL, numel);
730702 WRITE_ATTRIBUTE_CASE (SIZES, sizes_v);
731- WRITE_ATTRIBUTE_CASE (WHCN_DIM_ORDER, whcn_dim_order_v );
703+ WRITE_ATTRIBUTE_CASE (WHCN_DIM_ORDER, dim_order_v );
732704 WRITE_ATTRIBUTE_CASE (STRIDES, strides_v);
733705 WRITE_ATTRIBUTE_CASE (LOGICAL_LIMITS, logical_limits);
734- WRITE_ATTRIBUTE_CASE (NUMEL, numel);
735706 default :
736707 VK_THROW (" Invalid Attribute" );
737708 }
@@ -806,84 +777,25 @@ size_t vTensor::get_max_ubo_nbytes(const size_t nbytes_per_ubo) const {
806777}
807778
808779const vkapi::BufferBindInfo vTensor::sizes_ubo () {
809- if (!uniforms_.buffer ()) {
810- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
811- }
812- if (sizes_uniform_offset_ == kUniformOffsetUnset ) {
813- VK_CHECK_COND (
814- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
815- " Uniform data allocation has exceeded Tensor uniform buffer size" );
816- sizes_uniform_offset_ = uniforms_size_;
817- uniforms_size_ += nbytes_per_ubo_;
818- uniforms_.update (utils::make_whcn_ivec4 (sizes_), sizes_uniform_offset_);
819- }
820- return vkapi::BufferBindInfo (
821- uniforms_.buffer (), sizes_uniform_offset_, nbytes_per_ubo_);
780+ return metadata_ubo_impl (&sizes_uniform_offset_, uniform_data_->sizes_v );
822781}
823782
824783const vkapi::BufferBindInfo vTensor::dim_order_ubo () {
825- if (!uniforms_.buffer ()) {
826- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
827- }
828- if (dim_order_uniform_offset_ == kUniformOffsetUnset ) {
829- VK_CHECK_COND (
830- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
831- " Uniform data allocation has exceeded Tensor uniform buffer size" );
832- dim_order_uniform_offset_ = uniforms_size_;
833- uniforms_size_ += nbytes_per_ubo_;
834- uniforms_.update (
835- uniform_data_->whcn_dim_order_v , dim_order_uniform_offset_);
836- }
837- return vkapi::BufferBindInfo (
838- uniforms_.buffer (), dim_order_uniform_offset_, nbytes_per_ubo_);
784+ return metadata_ubo_impl (
785+ &dim_order_uniform_offset_, uniform_data_->dim_order_v );
839786}
840787
841788const vkapi::BufferBindInfo vTensor::strides_ubo () {
842- if (!uniforms_.buffer ()) {
843- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
844- }
845- if (strides_uniform_offset == kUniformOffsetUnset ) {
846- VK_CHECK_COND (
847- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
848- " Uniform data allocation has exceeded Tensor uniform buffer size" );
849- strides_uniform_offset = uniforms_size_;
850- uniforms_size_ += nbytes_per_ubo_;
851- uniforms_.update (uniform_data_->strides_v , strides_uniform_offset);
852- }
853- return vkapi::BufferBindInfo (
854- uniforms_.buffer (), strides_uniform_offset, nbytes_per_ubo_);
789+ return metadata_ubo_impl (&strides_uniform_offset, uniform_data_->strides_v );
855790}
856791
857792const vkapi::BufferBindInfo vTensor::logical_limits_ubo () {
858- if (!uniforms_.buffer ()) {
859- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
860- }
861- if (logical_limits_uniform_offset_ == kUniformOffsetUnset ) {
862- VK_CHECK_COND (
863- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
864- " Uniform data allocation has exceeded Tensor uniform buffer size" );
865- logical_limits_uniform_offset_ = uniforms_size_;
866- uniforms_size_ += nbytes_per_ubo_;
867- uniforms_.update (logical_limits (), logical_limits_uniform_offset_);
868- }
869- return vkapi::BufferBindInfo (
870- uniforms_.buffer (), logical_limits_uniform_offset_, nbytes_per_ubo_);
793+ return metadata_ubo_impl (
794+ &logical_limits_uniform_offset_, uniform_data_->logical_limits );
871795}
872796
873797const vkapi::BufferBindInfo vTensor::numel_ubo () {
874- if (!uniforms_.buffer ()) {
875- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
876- }
877- if (numel_uniform_offset_ == kUniformOffsetUnset ) {
878- VK_CHECK_COND (
879- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
880- " Uniform data allocation has exceeded Tensor uniform buffer size" );
881- numel_uniform_offset_ = uniforms_size_;
882- uniforms_size_ += nbytes_per_ubo_;
883- uniforms_.update (numel (), numel_uniform_offset_);
884- }
885- return vkapi::BufferBindInfo (
886- uniforms_.buffer (), numel_uniform_offset_, nbytes_per_ubo_);
798+ return metadata_ubo_impl (&numel_uniform_offset_, uniform_data_->numel );
887799}
888800
889801VkMemoryRequirements vTensor::get_memory_requirements () const {
@@ -936,22 +848,21 @@ void vTensor::update_metadata() {
936848 strides_ = calculate_strides (sizes_, dim_order_);
937849
938850 // Update uniform data if it has been modified
939- uniform_data_->numel = numel_;
940- uniform_data_->sizes_v = utils::make_whcn_ivec4 (sizes_);
941- uniform_data_->whcn_dim_order_v =
942- utils::make_ivec4 (create_whcn_dim_order (dim_order_));
943- uniform_data_->strides_v =
944- utils::make_whcn_ivec4 (unsqueeze_strides (strides_, numel_));
945851 uniform_data_->numel = utils::safe_downcast<int32_t >(numel_);
852+ uniform_data_->sizes_v =
853+ flip_and_unsqueeze_ivec4 (sizes_, kTensorSizes , numel_);
854+ uniform_data_->dim_order_v =
855+ flip_and_unsqueeze_ivec4 (dim_order_, kTensorDimOrder , numel_);
856+ uniform_data_->strides_v =
857+ flip_and_unsqueeze_ivec4 (strides_, kTensorStrides , numel_);
946858 uniform_data_->logical_limits .limits =
947859 calculate_logical_limits (sizes_, axis_map_, packed_dim_);
948860
949861 if (sizes_uniform_offset_ != kUniformOffsetUnset ) {
950862 uniforms_.update (uniform_data_->sizes_v , sizes_uniform_offset_);
951863 }
952864 if (dim_order_uniform_offset_ != kUniformOffsetUnset ) {
953- uniforms_.update (
954- uniform_data_->whcn_dim_order_v , dim_order_uniform_offset_);
865+ uniforms_.update (uniform_data_->dim_order_v , dim_order_uniform_offset_);
955866 }
956867 if (strides_uniform_offset != kUniformOffsetUnset ) {
957868 uniforms_.update (uniform_data_->strides_v , strides_uniform_offset);
0 commit comments