1414namespace vkcompute {
1515namespace api {
1616
17+ /*
18+ * Used to infer the sizes of a tensor that would correspond to a given
19+ * VulkanImage.
20+ */
1721std::vector<int64_t > calculate_sizes (
1822 const vkapi::VulkanImage& image,
1923 const utils::GPUMemoryLayout memory_layout) {
@@ -143,58 +147,19 @@ bool dim_order_is_valid(const std::vector<int64_t>& dim_order) {
143147 return sum == n * (n + 1 ) / 2 ;
144148}
145149
146- /*
147- * Applies the following transformations to a tensor's dim_order vector:
148- * 1. Reverse the order of elements so that the fastest moving dimensions are
149- * first.
150- * 2. Convert NCHW dimension indices to WHCN indices, so that 0 represents the
151- * width dimension, 1 represents the height dimension, and 2 represents the
152- * channels dimension.
153- * 3. Unsqueeze the dim_order vector to the next multiple of 4.
154-
155- * These transformations make it easier to use the dim order in a compute shader
156- */
157- std::vector<int64_t > create_whcn_dim_order (
158- const std::vector<int64_t >& dim_order) {
159- size_t ndim = dim_order.size ();
160- std::vector<int64_t > whcn_order (ndim);
161-
162- // Convert from NCHW to WHCN index, and flip the dim order so that the fastest
163- // moving dimension is first.
164- // example: { 1, 2, 0} -> { 2, 0, 1}
165- // {height, width, channels} -> {channels, width, height}
166- for (size_t whcn_i = 0 , nchw_i = (ndim - 1 ); whcn_i < ndim;
167- ++whcn_i, --nchw_i) {
168- whcn_order.at (whcn_i) = ndim - 1 - dim_order.at (nchw_i);
169- }
170-
171- // Unsqueeze to the next multiple of 4
172- size_t ndim_up4 = utils::align_up_4 (ndim);
173- whcn_order.resize (ndim_up4);
174-
175- // Append unsqueezed dimensions
176- for (size_t i = ndim; i < ndim_up4; ++i) {
177- whcn_order.at (i) = i;
178- }
179-
180- return whcn_order;
181- }
182-
183- std::vector<int64_t > unsqueeze_strides (
184- const std::vector<int64_t >& strides,
185- const int64_t numel) {
186- const size_t ndim = strides.size ();
187- const size_t ndim_up4 = utils::align_up_4 (strides.size ());
188- std::vector<int64_t > unsqueezed_strides (ndim_up4);
189- for (int32_t i = 1 ; i <= ndim; ++i) {
190- int64_t dim_stride = strides.at (ndim - i);
191- unsqueezed_strides.at (ndim_up4 - i) = dim_stride;
192- }
193-
194- for (int32_t i = ndim + 1 ; i <= ndim_up4; ++i) {
195- unsqueezed_strides.at (ndim_up4 - i) = numel;
196- }
197- return unsqueezed_strides;
150+ utils::ivec4 flip_and_unsqueeze_ivec4 (
151+ const std::vector<int64_t >& tensor_metadata,
152+ const vTensor::Attribute metadata_type,
153+ const size_t numel) {
154+ VK_CHECK_COND (tensor_metadata.size () <= 4 );
155+ std::vector<int32_t > flipped_metadata =
156+ flip_and_unsqueeze<int32_t >(tensor_metadata, metadata_type, numel);
157+ return {
158+ flipped_metadata.at (0 ),
159+ flipped_metadata.at (1 ),
160+ flipped_metadata.at (2 ),
161+ flipped_metadata.at (3 ),
162+ };
198163}
199164
200165std::vector<int64_t > calculate_padded_sizes (
@@ -309,7 +274,8 @@ int64_t calculate_gpu_buffer_numel(
309274 return numel;
310275}
311276
312- int32_t pack_into_int32 (const std::vector<int64_t >& vec, const int32_t extra) {
277+ template <typename T, typename = std::enable_if_t <std::is_integral<T>::value>>
278+ int32_t pack_into_int32 (const std::vector<T>& vec, const int32_t extra) {
313279 int32_t packed = static_cast <int32_t >(
314280 vec.at (0 ) + (vec.at (1 ) << 4 ) + (vec.at (2 ) << 8 ) + (vec.at (3 ) << 12 ) +
315281 (extra << 16 ));
@@ -322,22 +288,24 @@ int32_t create_hashed_layout(
322288 const int32_t packed_dim,
323289 const utils::StorageType storage_type) {
324290 if (storage_type == utils::kBuffer ) {
325- return pack_into_int32 (create_whcn_dim_order (dim_order), 0 );
291+ return pack_into_int32 (
292+ flip_and_unsqueeze<int64_t >(dim_order, kTensorDimOrder , 0 ), 0 );
326293 }
327294 return pack_into_int32 (axis_map, packed_dim);
328295}
329296
330297size_t calculate_max_ubo_nbytes (
331- const size_t nbytes_per_ubo ,
298+ const size_t min_nbytes_per_ubo ,
332299 const utils::StorageType storage_type) {
333- // For texture backed tensors, the metadata fields needed are:
334- // sizes, logical limits
335- size_t max_metadata_field_count = 2u ;
300+ size_t ivec4_ubo_nbytes = utils::align_up ( size_t ( 16 ), min_nbytes_per_ubo);
301+ size_t uvec3_ubo_nbytes = utils::align_up ( size_t ( 12 ), min_nbytes_per_ubo);
302+ size_t int32_ubo_nbytes = utils::align_up ( size_t ( 4 ), min_nbytes_per_ubo) ;
336303 if (storage_type == utils::kBuffer ) {
337304 // sizes, strides, dim order, numel
338- max_metadata_field_count = 4u ;
305+ return 3 * ivec4_ubo_nbytes + int32_ubo_nbytes ;
339306 }
340- return max_metadata_field_count * nbytes_per_ubo;
307+ // sizes, logical limits
308+ return ivec4_ubo_nbytes + uvec3_ubo_nbytes;
341309}
342310
343311//
@@ -595,8 +563,9 @@ vTensor::vTensor(
595563 packed_dim_,
596564 storage_type)),
597565 // Related to tensor metadata UBOs
598- nbytes_per_ubo_{context->adapter_ptr ()->min_ubo_alignment ()},
599- max_ubo_nbytes_{calculate_max_ubo_nbytes (nbytes_per_ubo_, storage_type)},
566+ min_nbytes_per_ubo_{context->adapter_ptr ()->min_ubo_alignment ()},
567+ max_ubo_nbytes_{
568+ calculate_max_ubo_nbytes (min_nbytes_per_ubo_, storage_type)},
600569 uniforms_ (),
601570 // Construct Tensor storage
602571 storage_(std::make_shared<vTensorStorage>(
@@ -607,23 +576,13 @@ vTensor::vTensor(
607576 sizes,
608577 dtype_,
609578 allocate_memory)) {
610- // Derived metadata
611- std::vector<int64_t > whcn_dim_order (4 , 0 );
612- std::vector<int64_t > unsqueezed_strides (4 , 0 );
613- // Only calculate derived metadata if needed for the desired storage type.
614- // Note that logical limits may be used by buffer storage as well in order to
615- // set global work group sizes for some compute shaders.
616- if (storage_type == utils::kBuffer ) {
617- whcn_dim_order = create_whcn_dim_order (dim_order_);
618- unsqueezed_strides = unsqueeze_strides (strides_, numel_);
619- }
620-
621579 uniform_data_ = std::make_shared<UniformData>(UniformData{
580+ numel_,
622581 sizes_,
623- whcn_dim_order ,
624- unsqueezed_strides ,
625- calculate_logical_limits (storage_->image_extents_ , axis_map_),
626- numel_});
582+ dim_order_ ,
583+ strides_ ,
584+ calculate_logical_limits (storage_->image_extents_ , axis_map_)});
585+
627586 VK_CHECK_COND (
628587 dim_order_is_valid (dim_order_), " computed dim order is invalid" );
629588}
@@ -648,18 +607,18 @@ vTensor::vTensor(
648607 packed_dim_,
649608 utils::kTexture3D )),
650609 // Related to tensor metadata UBOs
651- nbytes_per_ubo_ {context->adapter_ptr ()->min_ubo_alignment ()},
610+ min_nbytes_per_ubo_ {context->adapter_ptr ()->min_ubo_alignment ()},
652611 max_ubo_nbytes_{
653- calculate_max_ubo_nbytes (nbytes_per_ubo_ , utils::kTexture3D )},
612+ calculate_max_ubo_nbytes (min_nbytes_per_ubo_ , utils::kTexture3D )},
654613 uniforms_ (),
655614 // Construct Tensor storage
656615 storage_(std::make_shared<vTensorStorage>(context, image)) {
657616 uniform_data_ = std::make_shared<UniformData>(UniformData{
617+ numel_,
658618 sizes_,
659619 {0 , 0 , 0 , 0 },
660620 {0 , 0 , 0 , 0 },
661- calculate_logical_limits (storage_->image_extents_ , axis_map_),
662- numel_});
621+ calculate_logical_limits (storage_->image_extents_ , axis_map_)});
663622}
664623
665624vTensor::vTensor (vTensor& other)
@@ -672,7 +631,7 @@ vTensor::vTensor(vTensor& other)
672631 strides_(other.strides_.begin(), other.strides_.end()),
673632 numel_(other.numel_),
674633 hashed_layout_(other.hashed_layout_),
675- nbytes_per_ubo_ {other.nbytes_per_ubo_ },
634+ min_nbytes_per_ubo_ {other.min_nbytes_per_ubo_ },
676635 max_ubo_nbytes_{other.max_ubo_nbytes_ },
677636 uniforms_ (),
678637 // Copy Tensor storage
@@ -697,22 +656,35 @@ vTensor::vTensor(
697656 axis_map_,
698657 packed_dim_,
699658 other.storage_type())),
700- nbytes_per_ubo_ {other.nbytes_per_ubo_ },
659+ min_nbytes_per_ubo_ {other.min_nbytes_per_ubo_ },
701660 max_ubo_nbytes_{other.max_ubo_nbytes_ },
702661 uniforms_ (),
703662 // Copy Tensor storage
704663 storage_(other.storage_) {
705664 uniform_data_ = std::make_shared<UniformData>(UniformData{
665+ static_cast <size_t >(utils::multiply_integers (sizes_)),
706666 sizes_,
707- create_whcn_dim_order (dim_order_),
708- unsqueeze_strides (strides_, numel_),
709- other.logical_limits (),
710- static_cast <size_t >(utils::multiply_integers (sizes_))});
667+ dim_order_,
668+ strides_,
669+ other.logical_limits ()});
711670
712671 VK_CHECK_COND (
713672 dim_order_is_valid (dim_order_), " new dim order provided is invalid" );
714673}
715674
675+ vTensor::UniformData::UniformData (
676+ const size_t numel_ll,
677+ const std::vector<int64_t >& sizes,
678+ const std::vector<int64_t >& dim_order,
679+ const std::vector<int64_t >& strides,
680+ const utils::uvec3& limits)
681+ : numel(utils::safe_downcast<int32_t >(numel_ll)),
682+ sizes_v(flip_and_unsqueeze_ivec4(sizes, kTensorSizes , numel_ll)),
683+ dim_order_v(
684+ flip_and_unsqueeze_ivec4 (dim_order, kTensorDimOrder , numel_ll)),
685+ strides_v(flip_and_unsqueeze_ivec4(strides, kTensorStrides , numel_ll)),
686+ logical_limits(limits) {}
687+
716688uint32_t vTensor::UniformData::write_attribute (
717689 void * dst,
718690 const uint32_t dst_offset,
@@ -727,11 +699,11 @@ uint32_t vTensor::UniformData::write_attribute(
727699 return sizeof (member_name); \
728700 }
729701 switch (attr) {
702+ WRITE_ATTRIBUTE_CASE (NUMEL, numel);
730703 WRITE_ATTRIBUTE_CASE (SIZES, sizes_v);
731- WRITE_ATTRIBUTE_CASE (WHCN_DIM_ORDER, whcn_dim_order_v );
704+ WRITE_ATTRIBUTE_CASE (WHCN_DIM_ORDER, dim_order_v );
732705 WRITE_ATTRIBUTE_CASE (STRIDES, strides_v);
733706 WRITE_ATTRIBUTE_CASE (LOGICAL_LIMITS, logical_limits);
734- WRITE_ATTRIBUTE_CASE (NUMEL, numel);
735707 default :
736708 VK_THROW (" Invalid Attribute" );
737709 }
@@ -806,84 +778,25 @@ size_t vTensor::get_max_ubo_nbytes(const size_t nbytes_per_ubo) const {
806778}
807779
808780const vkapi::BufferBindInfo vTensor::sizes_ubo () {
809- if (!uniforms_.buffer ()) {
810- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
811- }
812- if (sizes_uniform_offset_ == kUniformOffsetUnset ) {
813- VK_CHECK_COND (
814- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
815- " Uniform data allocation has exceeded Tensor uniform buffer size" );
816- sizes_uniform_offset_ = uniforms_size_;
817- uniforms_size_ += nbytes_per_ubo_;
818- uniforms_.update (utils::make_whcn_ivec4 (sizes_), sizes_uniform_offset_);
819- }
820- return vkapi::BufferBindInfo (
821- uniforms_.buffer (), sizes_uniform_offset_, nbytes_per_ubo_);
781+ return metadata_ubo_impl (&sizes_uniform_offset_, uniform_data_->sizes_v );
822782}
823783
824784const vkapi::BufferBindInfo vTensor::dim_order_ubo () {
825- if (!uniforms_.buffer ()) {
826- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
827- }
828- if (dim_order_uniform_offset_ == kUniformOffsetUnset ) {
829- VK_CHECK_COND (
830- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
831- " Uniform data allocation has exceeded Tensor uniform buffer size" );
832- dim_order_uniform_offset_ = uniforms_size_;
833- uniforms_size_ += nbytes_per_ubo_;
834- uniforms_.update (
835- uniform_data_->whcn_dim_order_v , dim_order_uniform_offset_);
836- }
837- return vkapi::BufferBindInfo (
838- uniforms_.buffer (), dim_order_uniform_offset_, nbytes_per_ubo_);
785+ return metadata_ubo_impl (
786+ &dim_order_uniform_offset_, uniform_data_->dim_order_v );
839787}
840788
841789const vkapi::BufferBindInfo vTensor::strides_ubo () {
842- if (!uniforms_.buffer ()) {
843- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
844- }
845- if (strides_uniform_offset == kUniformOffsetUnset ) {
846- VK_CHECK_COND (
847- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
848- " Uniform data allocation has exceeded Tensor uniform buffer size" );
849- strides_uniform_offset = uniforms_size_;
850- uniforms_size_ += nbytes_per_ubo_;
851- uniforms_.update (uniform_data_->strides_v , strides_uniform_offset);
852- }
853- return vkapi::BufferBindInfo (
854- uniforms_.buffer (), strides_uniform_offset, nbytes_per_ubo_);
790+ return metadata_ubo_impl (&strides_uniform_offset, uniform_data_->strides_v );
855791}
856792
857793const vkapi::BufferBindInfo vTensor::logical_limits_ubo () {
858- if (!uniforms_.buffer ()) {
859- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
860- }
861- if (logical_limits_uniform_offset_ == kUniformOffsetUnset ) {
862- VK_CHECK_COND (
863- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
864- " Uniform data allocation has exceeded Tensor uniform buffer size" );
865- logical_limits_uniform_offset_ = uniforms_size_;
866- uniforms_size_ += nbytes_per_ubo_;
867- uniforms_.update (logical_limits (), logical_limits_uniform_offset_);
868- }
869- return vkapi::BufferBindInfo (
870- uniforms_.buffer (), logical_limits_uniform_offset_, nbytes_per_ubo_);
794+ return metadata_ubo_impl (
795+ &logical_limits_uniform_offset_, uniform_data_->logical_limits );
871796}
872797
873798const vkapi::BufferBindInfo vTensor::numel_ubo () {
874- if (!uniforms_.buffer ()) {
875- uniforms_ = ParamsBuffer (storage_->context_ , max_ubo_nbytes_, true );
876- }
877- if (numel_uniform_offset_ == kUniformOffsetUnset ) {
878- VK_CHECK_COND (
879- (uniforms_size_ + nbytes_per_ubo_) <= max_ubo_nbytes_,
880- " Uniform data allocation has exceeded Tensor uniform buffer size" );
881- numel_uniform_offset_ = uniforms_size_;
882- uniforms_size_ += nbytes_per_ubo_;
883- uniforms_.update (numel (), numel_uniform_offset_);
884- }
885- return vkapi::BufferBindInfo (
886- uniforms_.buffer (), numel_uniform_offset_, nbytes_per_ubo_);
799+ return metadata_ubo_impl (&numel_uniform_offset_, uniform_data_->numel );
887800}
888801
889802VkMemoryRequirements vTensor::get_memory_requirements () const {
@@ -936,22 +849,21 @@ void vTensor::update_metadata() {
936849 strides_ = calculate_strides (sizes_, dim_order_);
937850
938851 // Update uniform data if it has been modified
939- uniform_data_->numel = numel_;
940- uniform_data_->sizes_v = utils::make_whcn_ivec4 (sizes_);
941- uniform_data_->whcn_dim_order_v =
942- utils::make_ivec4 (create_whcn_dim_order (dim_order_));
943- uniform_data_->strides_v =
944- utils::make_whcn_ivec4 (unsqueeze_strides (strides_, numel_));
945852 uniform_data_->numel = utils::safe_downcast<int32_t >(numel_);
853+ uniform_data_->sizes_v =
854+ flip_and_unsqueeze_ivec4 (sizes_, kTensorSizes , numel_);
855+ uniform_data_->dim_order_v =
856+ flip_and_unsqueeze_ivec4 (dim_order_, kTensorDimOrder , numel_);
857+ uniform_data_->strides_v =
858+ flip_and_unsqueeze_ivec4 (strides_, kTensorStrides , numel_);
946859 uniform_data_->logical_limits .limits =
947860 calculate_logical_limits (sizes_, axis_map_, packed_dim_);
948861
949862 if (sizes_uniform_offset_ != kUniformOffsetUnset ) {
950863 uniforms_.update (uniform_data_->sizes_v , sizes_uniform_offset_);
951864 }
952865 if (dim_order_uniform_offset_ != kUniformOffsetUnset ) {
953- uniforms_.update (
954- uniform_data_->whcn_dim_order_v , dim_order_uniform_offset_);
866+ uniforms_.update (uniform_data_->dim_order_v , dim_order_uniform_offset_);
955867 }
956868 if (strides_uniform_offset != kUniformOffsetUnset ) {
957869 uniforms_.update (uniform_data_->strides_v , strides_uniform_offset);
0 commit comments