Skip to content

Commit 41386c6

Browse files
authored
[ET-VK] Replace Uniform buffers with push constants for binary op
Differential Revision: D66853542 Pull Request resolved: #7230
1 parent c55c794 commit 41386c6

File tree

4 files changed

+28
-22
lines changed

4 files changed

+28
-22
lines changed

backends/vulkan/runtime/graph/ops/DispatchNode.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,15 @@ class PushConstantDataInfo {
4646
payload_.attr = attr;
4747
}
4848

49-
explicit PushConstantDataInfo(const void* data, uint32_t dataLen)
49+
explicit PushConstantDataInfo(
50+
const void* data,
51+
uint32_t dataLen,
52+
uint32_t pushConstantLen = 0)
5053
: tensorUniformData(nullptr) {
5154
VK_CHECK_COND(
5255
dataLen <= 16, "Single push constant data size must be <= 16 bytes");
53-
payload_.dataSize = dataLen;
54-
memcpy(payload_.data, data, payload_.dataSize);
56+
payload_.dataSize = pushConstantLen ? pushConstantLen : dataLen;
57+
memcpy(payload_.data, data, dataLen);
5558
}
5659

5760
/*

backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@ layout(std430) buffer;
1919
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
2020
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
2121
${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}
22-
${layout_declare_ubo(B, "ivec4", "out_sizes")}
23-
${layout_declare_ubo(B, "ivec4", "in_sizes")}
24-
${layout_declare_ubo(B, "ivec4", "other_sizes")}
25-
${layout_declare_ubo(B, "ivec2", "broadcast_params")}
26-
${layout_declare_ubo(B, "float", "alpha")}
2722

2823
#include "broadcasting_utils.h"
2924
#include "indexing_utils.h"
@@ -40,6 +35,14 @@ const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
4035
${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")}
4136
const lowp ivec4 other_axis_map = unhash_axis_map(other_layout);
4237

38+
layout(push_constant) uniform restrict Block {
39+
ivec4 out_sizes;
40+
ivec4 in_sizes;
41+
ivec4 other_sizes;
42+
ivec2 broadcast_params;
43+
float alpha;
44+
};
45+
4346
void main() {
4447
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
4548
const ivec4 tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, packed_dim);

backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,10 @@ void add_binary_op_node(
6767
alpha_val = graph.extract_scalar<float>(alpha);
6868
}
6969

70-
const utils::ivec2 broadcast_params = create_broadcast_params(*t_in1, *t_in2);
70+
const struct BinaryOpsParams {
71+
const utils::ivec2 broadcast_params;
72+
const float alpha_val;
73+
} binary_ops_params{create_broadcast_params(*t_in1, *t_in2), alpha_val};
7174

7275
std::string kernel_name("binary_");
7376
kernel_name.reserve(kShaderNameReserve);
@@ -83,16 +86,16 @@ void add_binary_op_node(
8386
{{out, vkapi::MemoryAccessType::WRITE},
8487
{{arg1, arg2}, vkapi::MemoryAccessType::READ}},
8588
// Shader params buffers
86-
{t_out->sizes_ubo(),
87-
t_in1->sizes_ubo(),
88-
t_in2->sizes_ubo(),
89-
graph.create_params_buffer(broadcast_params),
90-
graph.create_params_buffer(alpha_val)},
89+
{},
9190
// Specialization Constants
9291
{t_out->hashed_layout(), t_in1->hashed_layout(), t_in2->hashed_layout()},
9392
// Resizing Logic
9493
resize_binary_op_node,
95-
{}));
94+
{},
95+
{{graph.sizes_pc_of(out),
96+
graph.sizes_pc_of(arg1),
97+
graph.sizes_pc_of(arg2),
98+
PushConstantDataInfo(&binary_ops_params, sizeof(binary_ops_params))}}));
9699
}
97100

98101
#define DEFINE_BINARY_OP_WITH_ALPHA_FN(op_name) \

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,9 +1601,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
16011601
auto addFn = VK_GET_OP_FN("aten.add.Tensor");
16021602
addFn(graph, {a.value, b.value, kDummyValueRef, c});
16031603

1604-
// +2: alpha UBO, broadcast UBO for arithmetic shader
1605-
// +1: t.sizes_ubo() for arithmetic shader output c
1606-
expected_vma_allocation_count += 3;
1604+
// no new allocations if binary op uses push constants
16071605
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
16081606

16091607
IOValueRef d = graph.add_input_tensor(
@@ -1624,17 +1622,16 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
16241622
auto mulFn = VK_GET_OP_FN("aten.mul.Tensor");
16251623
mulFn(graph, {c, d.value, e});
16261624

1627-
// +2: alpha UBO, broadcast UBO for arithmetic shader
1628-
// +1: t.sizes_ubo() for arithmetic shader output e
1629-
expected_vma_allocation_count += 3;
1625+
// no new allocations if binary op uses push constants
16301626
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
16311627

16321628
IOValueRef out = {};
16331629
out.value = e;
16341630
out.staging = graph.set_output_tensor(out.value);
16351631

1632+
// +1: staging buffer input tensor
16361633
// +1: staging buffer for the output tensor
1637-
expected_vma_allocation_count += 1;
1634+
expected_vma_allocation_count += 2;
16381635
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
16391636

16401637
graph.prepare();

0 commit comments

Comments
 (0)