Skip to content

Commit 77534a6

Browse files
committed
Update base for Update on "[ET-VK] Adding convenience functions in Compute graph to get PushConstantDataInfo for various attributes of a tensor."
This diff adds convenience functions in the Compute graph to get PushConstantDataInfo for various attributes of a tensor. Differential Revision: [D66853502](https://our.internmc.facebook.com/intern/diff/D66853502/) [ghstack-poisoned]
2 parents 630eb2b + 343aa0c commit 77534a6

File tree

21 files changed

+379
-103
lines changed

21 files changed

+379
-103
lines changed

.ci/scripts/setup-vulkan-linux-deps.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ install_swiftshader() {
2727

2828
install_vulkan_sdk() {
2929
VULKAN_SDK_VERSION=$1
30-
_vulkan_sdk_url="https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
30+
_vulkan_sdk_url="https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.xz"
3131

3232
_vulkan_sdk_dir=/tmp/vulkansdk
3333
mkdir -p $_vulkan_sdk_dir
@@ -37,12 +37,12 @@ install_vulkan_sdk() {
3737
curl --silent --show-error --location --fail --retry 3 \
3838
--output "${_tmp_archive}" "${_vulkan_sdk_url}"
3939

40-
tar -C "${_vulkan_sdk_dir}" -xzf "${_tmp_archive}"
40+
tar -C "${_vulkan_sdk_dir}" -xJf "${_tmp_archive}"
4141

4242
export PATH="${PATH}:${_vulkan_sdk_dir}/${VULKAN_SDK_VERSION}/x86_64/bin/"
4343
}
4444

45-
VULKAN_SDK_VERSION="1.2.198.1"
45+
VULKAN_SDK_VERSION="1.3.296.0"
4646

4747
install_swiftshader
4848
install_vulkan_sdk "${VULKAN_SDK_VERSION}"

backends/arm/quantizer/quantization_annotation/generic_annotator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
torch.ops.aten.tile.default,
5454
torch.ops.aten.flip.default,
5555
torch.ops.aten.cat.default,
56+
torch.ops.aten.concatenate.default,
5657
torch.ops.aten.stack.default,
5758
torch.ops.aten.chunk.default,
5859
torch.ops.aten.contiguous.default,

backends/arm/test/quantizer/test_generic_annotater.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,10 @@ def test_flip(self):
8686
self.check_annotation(
8787
SingleOpModel(torch.flip, (torch.randn(2, 4),), dims=(0, 1)),
8888
)
89+
90+
def test_concat(self):
91+
self.check_annotation(
92+
SingleOpModel(
93+
torch.concatenate, ((torch.randn(2, 3), torch.randn(2, 3)),), dim=0
94+
),
95+
)

backends/cadence/fusion_g3/operators/op_quantize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ Tensor& quantize_per_tensor_out(
570570
err == torch::executor::Error::Ok,
571571
"Failed to resize out Tensor in quantize_per_tensor_out");
572572

573-
check_quantize_per_tensor_args(input, quant_min, quant_max, dtype, out);
573+
// check_quantize_per_tensor_args(input, quant_min, quant_max, dtype, out);
574574

575575
float scale_data = (float)scale;
576576
int zero_point_data = (int)zero_point;
@@ -696,7 +696,7 @@ Tensor& quantize_per_channel_out(
696696
zero_point.numel(),
697697
input.size(axis));
698698

699-
check_quantize_per_tensor_args(input, quant_min, quant_max, dtype, out);
699+
// check_quantize_per_tensor_args(input, quant_min, quant_max, dtype, out);
700700

701701
const double* scale_dt = scale.const_data_ptr<double>();
702702
const int64_t* zero_point_dt = zero_point.const_data_ptr<int64_t>();

backends/vulkan/runtime/api/Context.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,9 @@ void Context::register_shader_dispatch(
119119
const vkapi::DescriptorSet& descriptors,
120120
vkapi::PipelineBarrier& pipeline_barrier,
121121
const vkapi::ShaderInfo& shader_descriptor,
122-
const utils::uvec3& global_workgroup_size) {
122+
const utils::uvec3& global_workgroup_size,
123+
const void* push_constants_data,
124+
const uint32_t push_constants_size) {
123125
// Adjust the global workgroup size based on the output tile size
124126
uint32_t global_wg_w = utils::div_up(
125127
global_workgroup_size[0u], shader_descriptor.out_tile_size[0u]);
@@ -145,6 +147,15 @@ void Context::register_shader_dispatch(
145147
cmd_.bind_descriptors(descriptors.get_bind_handle());
146148
cmd_.insert_barrier(pipeline_barrier);
147149

150+
if (push_constants_size > 0 && push_constants_data != nullptr) {
151+
const VkDescriptorSetLayout shader_layout =
152+
shader_layout_cache().retrieve(shader_descriptor.kernel_layout);
153+
const VkPipelineLayout pipeline_layout =
154+
pipeline_layout_cache().retrieve(shader_layout);
155+
cmd_.set_push_constants(
156+
pipeline_layout, push_constants_data, push_constants_size);
157+
}
158+
148159
cmd_.dispatch(effective_global_wg);
149160
}
150161

backends/vulkan/runtime/api/Context.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,9 @@ class Context final {
200200
const vkapi::DescriptorSet&,
201201
vkapi::PipelineBarrier&,
202202
const vkapi::ShaderInfo&,
203-
const utils::uvec3&);
203+
const utils::uvec3&,
204+
const void* = nullptr,
205+
const uint32_t = 0);
204206

205207
void register_blit(
206208
vkapi::PipelineBarrier&,

backends/vulkan/runtime/api/containers/StagingBuffer.h

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ class StagingBuffer final {
2323
private:
2424
Context* context_p_;
2525
vkapi::ScalarType dtype_;
26-
size_t numel_;
27-
size_t nbytes_;
2826
vkapi::VulkanBuffer vulkan_buffer_;
2927

3028
void* mapped_data_;
@@ -36,10 +34,8 @@ class StagingBuffer final {
3634
const size_t numel)
3735
: context_p_(context_p),
3836
dtype_(dtype),
39-
numel_(numel),
40-
nbytes_(element_size(dtype_) * numel_),
41-
vulkan_buffer_(
42-
context_p_->adapter_ptr()->vma().create_staging_buffer(nbytes_)),
37+
vulkan_buffer_(context_p_->adapter_ptr()->vma().create_staging_buffer(
38+
element_size(dtype_) * numel)),
4339
mapped_data_(nullptr) {}
4440

4541
StagingBuffer(const StagingBuffer&) = delete;
@@ -68,15 +64,15 @@ class StagingBuffer final {
6864
}
6965

7066
inline size_t numel() {
71-
return numel_;
67+
return nbytes() / element_size(dtype_);
7268
}
7369

7470
inline size_t nbytes() {
75-
return nbytes_;
71+
return vulkan_buffer_.mem_size();
7672
}
7773

7874
inline void copy_from(const void* src, const size_t nbytes) {
79-
VK_CHECK_COND(nbytes <= nbytes_);
75+
VK_CHECK_COND(nbytes <= this->nbytes());
8076
memcpy(data(), src, nbytes);
8177
vmaFlushAllocation(
8278
vulkan_buffer_.vma_allocator(),
@@ -86,7 +82,7 @@ class StagingBuffer final {
8682
}
8783

8884
inline void copy_to(void* dst, const size_t nbytes) {
89-
VK_CHECK_COND(nbytes <= nbytes_);
85+
VK_CHECK_COND(nbytes <= this->nbytes());
9086
vmaInvalidateAllocation(
9187
vulkan_buffer_.vma_allocator(),
9288
vulkan_buffer_.allocation(),
@@ -96,7 +92,7 @@ class StagingBuffer final {
9692
}
9793

9894
inline void set_staging_zeros() {
99-
memset(data(), 0, nbytes_);
95+
memset(data(), 0, nbytes());
10096
}
10197
};
10298

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/backends/vulkan/runtime/api/containers/Tensor.h>
10+
#include <cstring>
1011

1112
namespace vkcompute {
1213
namespace api {
@@ -446,11 +447,10 @@ vTensor::vTensor(
446447
dim_order_(calculate_dim_order(sizes_.size(), packed_dim_)),
447448
axis_map_(default_axis_map()),
448449
strides_(calculate_strides(sizes, dim_order_)),
449-
numel_(utils::multiply_integers(sizes_)),
450450
padded_sizes_{calculate_padded_sizes(sizes, packed_dim_)},
451-
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
451+
unsqueezed_strides_{
452+
unsqueeze_strides(strides_, utils::multiply_integers(sizes_))},
452453
padded_numel_(utils::multiply_integers(padded_sizes_)),
453-
logical_limits_{{0, 0, 0}},
454454
uniforms_(),
455455
// Utility Uniform Buffers that can be passed to shaders as arguments
456456
uniforms_size_(0),
@@ -467,6 +467,11 @@ vTensor::vTensor(
467467
padded_sizes_,
468468
dtype_,
469469
allocate_memory) {
470+
uniform_data_ = std::make_shared<UniformData>(UniformData{
471+
sizes_,
472+
unsqueezed_strides_,
473+
{{0, 0, 0}},
474+
static_cast<size_t>(utils::multiply_integers(sizes_))});
470475
VK_CHECK_COND(
471476
dim_order_is_valid(dim_order_), "computed dim order is invalid");
472477

@@ -494,11 +499,9 @@ vTensor::vTensor(
494499
dim_order_(),
495500
axis_map_(default_axis_map()),
496501
strides_(),
497-
numel_(utils::multiply_integers(sizes_)),
498502
padded_sizes_(calculate_padded_sizes(sizes_, packed_dim_)),
499503
unsqueezed_strides_(),
500504
padded_numel_(utils::multiply_integers(padded_sizes_)),
501-
logical_limits_(),
502505
uniforms_(),
503506
// Utility Uniform Buffers that can be passed to shaders as arguments
504507
uniforms_size_(0),
@@ -508,6 +511,11 @@ vTensor::vTensor(
508511
logical_limits_uniform_offset_(kUniformOffsetUnset),
509512
// Construct Tensor storage
510513
storage_(context, image) {
514+
uniform_data_ = std::make_shared<UniformData>(UniformData{
515+
sizes_,
516+
{0, 0, 0, 0},
517+
{{0, 0, 0}},
518+
static_cast<size_t>(utils::multiply_integers(sizes_))});
511519
set_logical_limits(storage_.image_extents_);
512520
}
513521

@@ -519,13 +527,11 @@ vTensor::vTensor(vTensor& other)
519527
dim_order_(other.dim_order_.begin(), other.dim_order_.end()),
520528
axis_map_(other.axis_map_.begin(), other.axis_map_.end()),
521529
strides_(other.strides_.begin(), other.strides_.end()),
522-
numel_(other.numel_),
523530
padded_sizes_{other.padded_sizes_.begin(), other.padded_sizes_.end()},
524531
unsqueezed_strides_{
525532
other.unsqueezed_strides_.begin(),
526533
other.unsqueezed_strides_.end()},
527534
padded_numel_(other.padded_numel_),
528-
logical_limits_{other.logical_limits_},
529535
uniforms_(),
530536
// Empty initialize Utility Uniform Buffers
531537
uniforms_size_(0),
@@ -534,7 +540,9 @@ vTensor::vTensor(vTensor& other)
534540
numel_uniform_offset_(kUniformOffsetUnset),
535541
logical_limits_uniform_offset_(kUniformOffsetUnset),
536542
// Copy Tensor storage
537-
storage_(other.storage_) {}
543+
storage_(other.storage_) {
544+
uniform_data_ = std::make_shared<UniformData>(*other.get_uniform_data());
545+
}
538546

539547
vTensor::vTensor(
540548
vTensor& other,
@@ -548,11 +556,10 @@ vTensor::vTensor(
548556
dim_order_(dim_order.begin(), dim_order.end()),
549557
axis_map_(default_axis_map()),
550558
strides_(calculate_strides(sizes_, dim_order_)),
551-
numel_(utils::multiply_integers(sizes_)),
552559
padded_sizes_{calculate_padded_sizes(sizes, packed_dim_)},
553-
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
560+
unsqueezed_strides_{
561+
unsqueeze_strides(strides_, utils::multiply_integers(sizes_))},
554562
padded_numel_(utils::multiply_integers(padded_sizes_)),
555-
logical_limits_(other.logical_limits_),
556563
uniforms_(),
557564
// Empty initialize Utility Uniform Buffers
558565
uniforms_size_(0),
@@ -562,14 +569,45 @@ vTensor::vTensor(
562569
logical_limits_uniform_offset_(kUniformOffsetUnset),
563570
// Copy Tensor storage
564571
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
572+
uniform_data_ = std::make_shared<UniformData>(UniformData{
573+
sizes_,
574+
unsqueezed_strides_,
575+
{other.logical_limits()},
576+
static_cast<size_t>(utils::multiply_integers(sizes_))});
577+
565578
VK_CHECK_COND(
566579
dim_order_is_valid(dim_order_), "new dim order provided is invalid");
567580
VK_CHECK_COND(
568-
offset_numel + numel_ <= other.numel(),
581+
offset_numel + numel() <= other.numel(),
569582
"Tensor alias cannot access more elements than available in the original"
570583
"tensor");
571584
}
572585

586+
uint32_t vTensor::UniformData::write_attribute(
587+
void* dst,
588+
const uint32_t dst_offset,
589+
const uint32_t max_dst_size,
590+
const Attribute attr) {
591+
#define WRITE_ATTRIBUTE_CASE(enum_name, member_name) \
592+
case vTensor::Attribute::enum_name: { \
593+
VK_CHECK_COND( \
594+
(dst_offset + sizeof(member_name)) <= max_dst_size, \
595+
"Attempting to write tensor attribute outside data boundary."); \
596+
memcpy((uint8_t*)dst + dst_offset, &member_name, sizeof(member_name)); \
597+
return sizeof(member_name); \
598+
}
599+
switch (attr) {
600+
WRITE_ATTRIBUTE_CASE(SIZES, sizes_v);
601+
WRITE_ATTRIBUTE_CASE(STRIDES, strides_v);
602+
WRITE_ATTRIBUTE_CASE(LOGICAL_LIMITS, logical_limits);
603+
WRITE_ATTRIBUTE_CASE(NUMEL, numel);
604+
default:
605+
VK_THROW("Invalid Attribute");
606+
}
607+
#undef WRITE_ATTRIBUTE_CASE
608+
return 0;
609+
}
610+
573611
vkapi::VulkanImage& vTensor::image(
574612
vkapi::PipelineBarrier& pipeline_barrier,
575613
const vkapi::PipelineStageFlags stage) & {
@@ -601,9 +639,9 @@ vkapi::VulkanBuffer& vTensor::buffer(
601639
}
602640

603641
void vTensor::set_logical_limits(const utils::uvec3& image_extents) {
604-
logical_limits_.limits[0] = image_extents[axis_map_.at(0)];
605-
logical_limits_.limits[1] = image_extents[axis_map_.at(1)];
606-
logical_limits_.limits[2] = image_extents[axis_map_.at(2)];
642+
uniform_data_->logical_limits.limits[0] = image_extents[axis_map_.at(0)];
643+
uniform_data_->logical_limits.limits[1] = image_extents[axis_map_.at(1)];
644+
uniform_data_->logical_limits.limits[2] = image_extents[axis_map_.at(2)];
607645
}
608646

609647
utils::GPUMemoryLayout vTensor::estimate_memory_layout() const {
@@ -661,7 +699,7 @@ const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
661699
"Uniform data allocation has exceeded Tensor uniform buffer size");
662700
logical_limits_uniform_offset_ = uniforms_size_;
663701
uniforms_size_ += kSizePerUniform;
664-
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
702+
uniforms_.update(logical_limits(), logical_limits_uniform_offset_);
665703
}
666704
return vkapi::BufferBindInfo(
667705
uniforms_.buffer(), logical_limits_uniform_offset_);
@@ -677,7 +715,7 @@ const vkapi::BufferBindInfo vTensor::numel_ubo() {
677715
"Uniform data allocation has exceeded Tensor uniform buffer size");
678716
numel_uniform_offset_ = uniforms_size_;
679717
uniforms_size_ += kSizePerUniform;
680-
uniforms_.update(numel_, numel_uniform_offset_);
718+
uniforms_.update(numel(), numel_uniform_offset_);
681719
}
682720
return vkapi::BufferBindInfo(uniforms_.buffer(), numel_uniform_offset_);
683721
}
@@ -687,10 +725,10 @@ size_t vTensor::staging_buffer_numel() const {
687725
const bool int8_supported =
688726
storage_.context_->adapter_ptr()->has_full_int8_buffers_support();
689727
if (is_int8 && !int8_supported) {
690-
return utils::align_up_4(numel_);
728+
return utils::align_up_4(numel());
691729
}
692730
if (storage_type() == utils::kBuffer) {
693-
return numel_;
731+
return numel();
694732
}
695733
return padded_numel_;
696734
}
@@ -720,30 +758,32 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
720758

721759
void vTensor::update_metadata() {
722760
strides_ = calculate_strides(sizes_, dim_order_);
723-
numel_ = utils::multiply_integers(sizes_);
761+
uniform_data_->numel = utils::multiply_integers(sizes_);
724762

725763
padded_sizes_ = calculate_padded_sizes(sizes_, packed_dim_);
726-
unsqueezed_strides_ = unsqueeze_strides(strides_, numel_);
764+
unsqueezed_strides_ = unsqueeze_strides(strides_, numel());
727765
padded_numel_ = utils::multiply_integers(padded_sizes_);
728766

767+
// Update uniform data if it has been modified
768+
uniform_data_->sizes_v = utils::make_whcn_ivec4(sizes_);
769+
uniform_data_->strides_v = utils::make_whcn_ivec4(unsqueezed_strides_);
770+
729771
// Calculate the image extents that would have been used to allocate a texture
730772
// withthe current sizes, and use that to set the logical limits.
731773
set_logical_limits(
732774
calculate_image_extents(padded_sizes_, axis_map_, packed_dim_));
733775

734776
if (sizes_uniform_offset_ != kUniformOffsetUnset) {
735-
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
777+
uniforms_.update(uniform_data_->sizes_v, sizes_uniform_offset_);
736778
}
737779
if (unsqueezed_strides_offset_ != kUniformOffsetUnset) {
738-
uniforms_.update(
739-
utils::make_whcn_ivec4(unsqueezed_strides_),
740-
unsqueezed_strides_offset_);
780+
uniforms_.update(uniform_data_->strides_v, unsqueezed_strides_offset_);
741781
}
742782
if (numel_uniform_offset_ != kUniformOffsetUnset) {
743-
uniforms_.update(numel_, numel_uniform_offset_);
783+
uniforms_.update(numel(), numel_uniform_offset_);
744784
}
745785
if (logical_limits_uniform_offset_ != kUniformOffsetUnset) {
746-
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
786+
uniforms_.update(logical_limits(), logical_limits_uniform_offset_);
747787
}
748788
}
749789

@@ -796,6 +836,8 @@ void vTensor::virtual_clone(const vTensor& other) {
796836
dim_order_ = other.dim_order_;
797837
axis_map_ = other.axis_map_;
798838
packed_dim_ = other.packed_dim_;
839+
840+
*uniform_data_ = *other.get_uniform_data();
799841
}
800842

801843
void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {

0 commit comments

Comments
 (0)