Skip to content

Commit 916c207

Browse files
SS-JIAssjia
andauthored
[ET-VK] Introduce BufferMetadata GLSL struct to abstract tensor layout (#13595)
Stack from [ghstack](https://github.com/ezyang/ghstack) (oldest at bottom): * #13597 * #13596 * __->__ #13595 * #13594 * #13593 * #13600 * #13599 * #13598 Differential Revision: [D80800082](https://our.internmc.facebook.com/intern/diff/D80800082) Co-authored-by: ssjia <[email protected]>
1 parent 4d095f9 commit 916c207

File tree

13 files changed

+374
-102
lines changed

13 files changed

+374
-102
lines changed

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ vTensor::vTensor(
567567
max_ubo_nbytes_{
568568
calculate_max_ubo_nbytes(min_nbytes_per_ubo_, storage_type)},
569569
uniforms_(),
570+
buffer_meta_(),
570571
// Construct Tensor storage
571572
storage_(std::make_shared<vTensorStorage>(
572573
context,
@@ -611,6 +612,7 @@ vTensor::vTensor(
611612
max_ubo_nbytes_{
612613
calculate_max_ubo_nbytes(min_nbytes_per_ubo_, utils::kTexture3D)},
613614
uniforms_(),
615+
buffer_meta_(),
614616
// Construct Tensor storage
615617
storage_(std::make_shared<vTensorStorage>(context, image)) {
616618
uniform_data_ = std::make_shared<UniformData>(UniformData{
@@ -634,6 +636,7 @@ vTensor::vTensor(vTensor& other)
634636
min_nbytes_per_ubo_{other.min_nbytes_per_ubo_},
635637
max_ubo_nbytes_{other.max_ubo_nbytes_},
636638
uniforms_(),
639+
buffer_meta_(),
637640
// Copy Tensor storage
638641
storage_(other.storage_) {
639642
uniform_data_ = std::make_shared<UniformData>(*other.get_uniform_data());
@@ -659,6 +662,7 @@ vTensor::vTensor(
659662
min_nbytes_per_ubo_{other.min_nbytes_per_ubo_},
660663
max_ubo_nbytes_{other.max_ubo_nbytes_},
661664
uniforms_(),
665+
buffer_meta_(),
662666
// Copy Tensor storage
663667
storage_(other.storage_) {
664668
uniform_data_ = std::make_shared<UniformData>(UniformData{
@@ -711,6 +715,38 @@ uint32_t vTensor::UniformData::write_attribute(
711715
return 0;
712716
}
713717

718+
vTensor::BufferMetadata::BufferMetadata(
719+
std::vector<int64_t>& src_sizes,
720+
std::vector<int64_t>& src_dim_order,
721+
std::vector<int64_t>& src_strides,
722+
size_t src_numel) {
723+
update(src_sizes, src_dim_order, src_strides, src_numel);
724+
}
725+
726+
void vTensor::BufferMetadata::update(
727+
std::vector<int64_t>& src_sizes,
728+
std::vector<int64_t>& src_dim_order,
729+
std::vector<int64_t>& src_strides,
730+
size_t src_numel) {
731+
int32_t fixed_ndim = utils::safe_downcast<int32_t>(kTensorDimLimit);
732+
733+
std::vector<uint32_t> fu_sizes = flip_and_unsqueeze<uint32_t>(
734+
src_sizes, kTensorSizes, src_numel, fixed_ndim);
735+
std::vector<uint32_t> fu_dim_order = flip_and_unsqueeze<uint32_t>(
736+
src_dim_order, kTensorDimOrder, src_numel, fixed_ndim);
737+
std::vector<uint32_t> fu_strides = flip_and_unsqueeze<uint32_t>(
738+
src_strides, kTensorStrides, src_numel, fixed_ndim);
739+
740+
for (int i = 0; i < fixed_ndim; ++i) {
741+
sizes[i] = fu_sizes.at(i);
742+
dim_order[i] = fu_dim_order.at(i);
743+
strides[i] = fu_strides.at(i);
744+
}
745+
746+
ndim = utils::safe_downcast<uint32_t>(src_sizes.size());
747+
numel = utils::safe_downcast<uint32_t>(src_numel);
748+
}
749+
714750
vkapi::VulkanImage& vTensor::image(
715751
vkapi::PipelineBarrier& pipeline_barrier,
716752
const vkapi::PipelineStageFlags stage) & {
@@ -799,6 +835,15 @@ const vkapi::BufferBindInfo vTensor::numel_ubo() {
799835
return metadata_ubo_impl(&numel_uniform_offset_, uniform_data_->numel);
800836
}
801837

838+
const vkapi::BufferBindInfo vTensor::buffer_meta_ubo() {
839+
size_t ubo_nbytes = sizeof(BufferMetadata);
840+
if (!buffer_meta_.buffer()) {
841+
BufferMetadata data(sizes_, dim_order_, strides_, numel_);
842+
buffer_meta_ = ParamsBuffer(storage_->context_, data);
843+
}
844+
return vkapi::BufferBindInfo(buffer_meta_.buffer(), 0, ubo_nbytes);
845+
}
846+
802847
VkMemoryRequirements vTensor::get_memory_requirements() const {
803848
switch (storage_type()) {
804849
case utils::kBuffer:
@@ -875,6 +920,11 @@ void vTensor::update_metadata() {
875920
uniforms_.update(
876921
uniform_data_->logical_limits.limits, logical_limits_uniform_offset_);
877922
}
923+
924+
if (buffer_meta_.buffer()) {
925+
BufferMetadata data(sizes_, dim_order_, strides_, numel_);
926+
buffer_meta_.update(data);
927+
}
878928
}
879929

880930
void vTensor::check_sizes(const std::vector<int64_t>& sizes) const {

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
namespace vkcompute {
2020
namespace api {
2121

22+
static constexpr size_t kTensorDimLimit = 8;
23+
2224
/*
2325
* Given a GPUMemoryLayout value, produce a dim order vector that matches the
2426
* given memory layout. The produced dim order vector will be in the NCHW
@@ -262,6 +264,26 @@ class vTensor final {
262264
const Attribute attr);
263265
};
264266

267+
struct BufferMetadata {
268+
uint32_t sizes[kTensorDimLimit];
269+
uint32_t dim_order[kTensorDimLimit];
270+
uint32_t strides[kTensorDimLimit];
271+
uint32_t ndim;
272+
uint32_t numel;
273+
274+
BufferMetadata(
275+
std::vector<int64_t>& sizes,
276+
std::vector<int64_t>& dim_order,
277+
std::vector<int64_t>& strides,
278+
size_t numel);
279+
280+
void update(
281+
std::vector<int64_t>& sizes,
282+
std::vector<int64_t>& dim_order,
283+
std::vector<int64_t>& strides,
284+
size_t numel);
285+
};
286+
265287
private:
266288
/*
267289
* "Core" tensor metadata. They are the minimum amount of information required
@@ -332,6 +354,11 @@ class vTensor final {
332354
*/
333355
ParamsBuffer uniforms_;
334356

357+
/*
358+
* Used to store data for BufferMetadata to pass to shaders as buffer_meta_ubo
359+
*/
360+
ParamsBuffer buffer_meta_;
361+
335362
uint32_t uniforms_size_ = 0u;
336363
uint32_t sizes_uniform_offset_ = kUniformOffsetUnset;
337364
uint32_t dim_order_uniform_offset_ = kUniformOffsetUnset;
@@ -557,6 +584,8 @@ class vTensor final {
557584

558585
const vkapi::BufferBindInfo numel_ubo();
559586

587+
const vkapi::BufferBindInfo buffer_meta_ubo();
588+
560589
public:
561590
inline size_t staging_buffer_numel() const {
562591
return storage_->buffer_len();

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,10 @@ class ComputeGraph final {
357357
return values_.at(idx).toConstTensor().has_buffer_storage();
358358
}
359359

360+
inline bool is_texture_storage(const ValueRef idx) const {
361+
return !is_buffer_storage(idx);
362+
}
363+
360364
/*
361365
* Checks that the following is true:
362366
* 1. The value at `idx` is a tensor
@@ -411,6 +415,10 @@ class ComputeGraph final {
411415
return values_.at(idx).toTensor().sizes_ubo();
412416
}
413417

418+
inline vkapi::BufferBindInfo buffer_meta_ubo(const ValueRef idx) {
419+
return values_.at(idx).toTensor().buffer_meta_ubo();
420+
}
421+
414422
inline vkapi::BufferBindInfo strides_ubo(const ValueRef idx) {
415423
return values_.at(idx).toTensor().strides_ubo();
416424
}

backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ $if IS_COMPARISON_OP:
3434

3535
layout(std430) buffer;
3636

37+
#include "indexing.glslh"
38+
3739
$if IS_COMPARISON_OP:
3840
${layout_declare_tensor(B, "w", "t_out", "uint8", STORAGE)}
3941
$else:
@@ -43,13 +45,11 @@ ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
4345
${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}
4446

4547
$if STORAGE == "buffer":
48+
${layout_declare_ubo(B, "BufferMetadata", "outp")}
49+
${layout_declare_ubo(B, "BufferMetadata", "inp")}
50+
${layout_declare_ubo(B, "BufferMetadata", "other")}
51+
4652
layout(push_constant) uniform restrict Block {
47-
ivec4 in_sizes;
48-
ivec4 other_sizes;
49-
ivec4 out_strides;
50-
ivec4 in_strides;
51-
ivec4 other_strides;
52-
int out_numel;
5353
float alpha;
5454
};
5555
$else:
@@ -83,25 +83,30 @@ $else:
8383
#ifdef USING_BUFFER
8484

8585
void main() {
86-
const int out_bufi = ivec3(gl_GlobalInvocationID).x;
87-
if (out_bufi >= out_numel) {
86+
const uint out_bufi = gl_GlobalInvocationID.x;
87+
if (out_bufi >= numel(outp)) {
8888
return;
8989
}
9090

9191
// Simple case; no broadcasting
92-
if (in_sizes == other_sizes) {
92+
if (are_equal(inp, other)) {
9393
t_out[out_bufi] = T(op(t_in[out_bufi], t_other[out_bufi], T(alpha)));
9494
return;
9595
}
9696

97-
const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);
98-
const ivec4 in_tidx = min(out_tidx, in_sizes - 1);
99-
const ivec4 other_tidx = min(out_tidx, other_sizes - 1);
97+
TensorIndex outp_tidx;
98+
linear_idx_to_tensor_idx(outp, out_bufi, outp_tidx);
99+
100+
TensorIndex inp_tidx = outp_tidx;
101+
clamp_tensor_idx(inp, inp_tidx);
102+
103+
TensorIndex other_tidx = outp_tidx;
104+
clamp_tensor_idx(other, other_tidx);
100105

101-
const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
102-
const int other_bufi = tidx_to_bufi(other_tidx, other_strides);
106+
uint inp_bufi = tensor_idx_to_linear_idx(inp, inp_tidx);
107+
uint other_bufi = tensor_idx_to_linear_idx(other, other_tidx);
103108

104-
t_out[out_bufi] = T(op(t_in[in_bufi], t_other[other_bufi], T(alpha)));
109+
t_out[out_bufi] = T(op(t_in[inp_bufi], t_other[other_bufi], T(alpha)));
105110
}
106111

107112
#else // USING_TEXTURE

backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,40 +4,33 @@
44

55
#define T ${buffer_scalar_type(DTYPE)}
66

7-
#include "indexing_utils.h"
8-
97
${define_required_extensions(DTYPE)}
108

119
layout(std430) buffer;
1210

13-
${layout_declare_tensor(0, "w", "nchw_buf", DTYPE, STORAGE)}
14-
${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
11+
#include "indexing.glslh"
12+
13+
${layout_declare_tensor(B, "w", "nchw_buf", DTYPE, STORAGE)}
14+
${layout_declare_tensor(B, "r", "t_inp", DTYPE, STORAGE)}
1515

16-
$if USE_PUSH_CONST:
17-
layout(push_constant) uniform restrict Block {
18-
ivec4 in_sizes;
19-
ivec4 in_strides;
20-
int numel;
21-
};
22-
$else:
23-
${layout_declare_ubo(2, "ivec4", "in_sizes")}
24-
${layout_declare_ubo(3, "ivec4", "in_strides")}
25-
${layout_declare_ubo(4, "int", "numel")}
16+
${layout_declare_ubo(B, "BufferMetadata", "inp")}
2617

2718
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
2819

2920
// This constant is unused in this shader but is kept so that the signature is
3021
// consistent with image_to_nchw.
31-
layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM;
22+
${layout_declare_spec_const(C, "int", "unused", "0")}
3223

3324
void main() {
34-
int nchwi = int(gl_GlobalInvocationID.x);
35-
if (nchwi >= numel) {
25+
uint inp_bufi = gl_GlobalInvocationID.x;
26+
if (inp_bufi>= numel(inp)) {
3627
return;
3728
}
3829

39-
ivec4 in_tidx = nchwi_to_tidx(nchwi, in_sizes);
40-
const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
30+
TensorIndex inp_tidx;
31+
linear_idx_to_tensor_idx(inp, inp_bufi, inp_tidx);
32+
33+
uint nchwi = tensor_idx_to_contiguous_idx(inp, inp_tidx);
4134

42-
nchw_buf[nchwi] = t_in[in_bufi];
35+
nchw_buf[nchwi] = t_inp[inp_bufi];
4336
}

backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,3 @@ buffer_to_nchw:
1919
- VALUE: int32
2020
shader_variants:
2121
- NAME: buffer_to_nchw
22-
- NAME: buffer_to_nchw_no_pc
23-
USE_PUSH_CONST: False

0 commit comments

Comments
 (0)