Skip to content

Commit 5488056

Browse files
authored
[ET-VK][BE] Remove usage of vTensorPtr and get_tensor (#13167)
Note that although the volume of changes in this diff are very high, the changes themselves are extremely mechanical. This diff was written almost entirely with a LLM, but I have looked through each file and validated the changes. ## Changes This diff updates callsites using `graph->get_tensor(value_ref)` in favor of just using the `ValueRef` directly. A simple example (and the vast majority of changes in this diff) is a change such as: ``` vTensorPtr tensor = graph->get_tensor(tensor_ref); some_fn(tensor->sizes()); ``` To instead be ``` std::vector<int64_t> tensor_sizes = graph->sizes_of(tensor_ref); some_fn(tensor_sizes); ``` or ``` some_fn(graph->sizes_of(tensor_ref)); ``` ## Motivation Overall, the goal is to make the `get_tensor()` API protected so that it can only be used in specific situations. In addition to the primary motivation of improving the consistency of API usage throughout the codebase, there is a practical benefit as well. `get_tensor` has a limitation that no values can be added to the graph while the `vTensorPtr` is in scope. Also, forcing tensor modifications via functions like `virtual_resize()` to go through the `ComputeGraph` will allow the graph to track changes for the purposes of determining when a command buffer re-encode or resize propagation is necessary, which will result in performance benefits. Differential Revision: [D79564594](https://our.internmc.facebook.com/intern/diff/D79564594/)
1 parent 72ef7b1 commit 5488056

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+740
-809
lines changed

backends/vulkan/runtime/VulkanBackend.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -390,18 +390,20 @@ bool maybe_resize_input(
390390
const size_t input_i,
391391
executorch::aten::Tensor& et_tensor) {
392392
ValueRef in_tensor_ref = graph->inputs()[input_i].value;
393-
vTensorPtr in_tensor = graph->get_tensor(in_tensor_ref);
393+
394+
const std::vector<int64_t> in_tensor_vk_sizes =
395+
graph->sizes_of(in_tensor_ref);
394396

395397
ET_CHECK_MSG(
396-
et_tensor.dim() == in_tensor->sizes().size(),
398+
et_tensor.dim() == in_tensor_vk_sizes.size(),
397399
"Cannot resize input tensor: old ndim %zu does not match new ndim %zu",
398-
static_cast<size_t>(in_tensor->sizes().size()),
400+
static_cast<size_t>(in_tensor_vk_sizes.size()),
399401
static_cast<size_t>(et_tensor.dim()));
400402

401403
bool should_resize = false;
402404
std::vector<int64_t> new_sizes(et_tensor.dim());
403405
for (size_t i = 0; i < et_tensor.dim(); i++) {
404-
if (in_tensor->sizes()[i] != et_tensor.sizes()[i]) {
406+
if (in_tensor_vk_sizes[i] != et_tensor.sizes()[i]) {
405407
should_resize = true;
406408
}
407409
new_sizes.at(i) = et_tensor.sizes()[i];
@@ -411,10 +413,11 @@ bool maybe_resize_input(
411413
graph->resize_input(input_i, new_sizes);
412414
}
413415

416+
const size_t in_tensor_vk_numel = graph->numel_of(in_tensor_ref);
414417
ET_CHECK_MSG(
415-
in_tensor->numel() == et_tensor.numel(),
418+
in_tensor_vk_numel == et_tensor.numel(),
416419
"Vulkan tensor numel %zu does not match ET tensor numel %zu",
417-
static_cast<size_t>(in_tensor->numel()),
420+
static_cast<size_t>(in_tensor_vk_numel),
418421
static_cast<size_t>(et_tensor.numel()));
419422

420423
return should_resize;
@@ -445,12 +448,14 @@ void maybe_resize_output(
445448
const size_t output_i,
446449
executorch::aten::Tensor& et_tensor) {
447450
ValueRef out_tensor_ref = graph->outputs()[output_i].value;
448-
vTensorPtr out_tensor = graph->get_tensor(out_tensor_ref);
451+
452+
const std::vector<int64_t> out_tensor_vk_sizes =
453+
graph->sizes_of(out_tensor_ref);
449454

450455
executorch::aten::SizesType new_output_size[kTensorDimensionLimit];
451-
size_t ndim = out_tensor->sizes().size();
456+
size_t ndim = out_tensor_vk_sizes.size();
452457
for (int i = 0; i < ndim; ++i) {
453-
new_output_size[i] = out_tensor->sizes()[i];
458+
new_output_size[i] = out_tensor_vk_sizes[i];
454459
}
455460

456461
executorch::aten::ArrayRef<executorch::aten::SizesType> output_size{

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,38 @@ utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) {
704704
return create_local_wg_size(create_global_wg_size(idx));
705705
}
706706

707+
void ComputeGraph::bind_tensor_to_descriptor_set(
708+
const ValueRef ref,
709+
vkapi::PipelineBarrier& pipeline_barrier,
710+
const vkapi::MemoryAccessFlags access_type,
711+
vkapi::DescriptorSet& descriptor_set,
712+
const uint32_t idx) {
713+
vTensorPtr tensor = get_tensor(ref);
714+
if (tensor->buffer()) {
715+
vkapi::VulkanBuffer& buffer = tensor->buffer(
716+
pipeline_barrier, vkapi::PipelineStage::COMPUTE, access_type);
717+
descriptor_set.bind(idx, buffer);
718+
} else {
719+
vkapi::VulkanImage& image = tensor->image(
720+
pipeline_barrier, vkapi::PipelineStage::COMPUTE, access_type);
721+
descriptor_set.bind(idx, image);
722+
}
723+
}
724+
725+
void ComputeGraph::bind_value_to_descriptor_set(
726+
const ValueRef ref,
727+
vkapi::PipelineBarrier& pipeline_barrier,
728+
const vkapi::MemoryAccessFlags access_type,
729+
vkapi::DescriptorSet& descriptor_set,
730+
const uint32_t idx) {
731+
if (val_is_tensor(ref)) {
732+
bind_tensor_to_descriptor_set(
733+
ref, pipeline_barrier, access_type, descriptor_set, idx);
734+
} else if (val_is_staging(ref)) {
735+
descriptor_set.bind(idx, get_staging(ref)->buffer());
736+
}
737+
}
738+
707739
void ComputeGraph::copy_into_staging(
708740
const ValueRef idx,
709741
const void* data,
@@ -891,6 +923,17 @@ void ComputeGraph::execute() {
891923
execute_count_++;
892924
}
893925

926+
void ComputeGraph::virtual_clone(const ValueRef dst, const ValueRef src) {
927+
get_tensor(dst)->virtual_clone(*get_tensor(src));
928+
}
929+
930+
void ComputeGraph::virtual_transpose(
931+
const ValueRef tensor,
932+
const int64_t dim0,
933+
const int64_t dim1) {
934+
get_tensor(tensor)->virtual_transpose(dim0, dim1);
935+
}
936+
894937
void ComputeGraph::resize_input(
895938
const int64_t idx,
896939
const std::vector<int64_t>& new_sizes) {

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,10 @@ class ComputeGraph final {
319319
return values_.at(idx).toConstTensor().numel();
320320
}
321321

322+
inline size_t staging_buffer_numel_of(const ValueRef idx) const {
323+
return values_.at(idx).toConstTensor().staging_buffer_numel();
324+
}
325+
322326
inline utils::StorageType storage_type_of(const ValueRef idx) const {
323327
return values_.at(idx).toConstTensor().storage_type();
324328
}
@@ -832,6 +836,20 @@ class ComputeGraph final {
832836
*/
833837
utils::uvec3 create_local_wg_size(const ValueRef idx);
834838

839+
void bind_tensor_to_descriptor_set(
840+
const ValueRef ref,
841+
vkapi::PipelineBarrier& pipeline_barrier,
842+
const vkapi::MemoryAccessFlags accessType,
843+
vkapi::DescriptorSet& descriptor_set,
844+
const uint32_t idx);
845+
846+
void bind_value_to_descriptor_set(
847+
const ValueRef ref,
848+
vkapi::PipelineBarrier& pipeline_barrier,
849+
const vkapi::MemoryAccessFlags access_type,
850+
vkapi::DescriptorSet& descriptor_set,
851+
const uint32_t idx);
852+
835853
//
836854
// Input/Output
837855
//
@@ -890,14 +908,27 @@ class ComputeGraph final {
890908

891909
void execute();
892910

911+
//
912+
// Tensor View
913+
//
914+
915+
void virtual_clone(const ValueRef dst, const ValueRef src);
916+
917+
void virtual_transpose(
918+
const ValueRef tensor,
919+
const int64_t dim0,
920+
const int64_t dim1);
921+
893922
//
894923
// Dynamic Shape support
895924
//
896925

897926
void resize_input(const int64_t idx, const std::vector<int64_t>& new_sizes);
927+
898928
void virtual_resize(
899929
const ValueRef idx,
900930
const std::vector<int64_t>& new_sizes);
931+
901932
void propagate_resize();
902933

903934
//

backends/vulkan/runtime/graph/ops/BlitNode.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,9 @@ BlitNode::BlitNode(
2626
}
2727

2828
void BlitNode::encode(ComputeGraph* graph) {
29-
auto src_tensor = graph->get_tensor(src_);
30-
auto dst_tensor = graph->get_tensor(dst_);
3129
VK_CHECK_COND(
32-
src_tensor->storage_type() != utils::kBuffer &&
33-
dst_tensor->storage_type() != utils::kBuffer,
30+
graph->storage_type_of(src_) != utils::kBuffer &&
31+
graph->storage_type_of(dst_) != utils::kBuffer,
3432
"BlitNode: Only texture backed tensors are supported.");
3533

3634
api::Context* const context = graph->context();
@@ -41,18 +39,18 @@ void BlitNode::encode(ComputeGraph* graph) {
4139
// Hack to get timing data for non shader op
4240
std::string kernel_name("Blit_");
4341
kernel_name.reserve(32);
44-
kernel_name += vkapi::to_string(src_tensor->dtype());
42+
kernel_name += vkapi::to_string(graph->dtype_of(src_));
4543
kernel_name += "_to_";
46-
kernel_name += vkapi::to_string(dst_tensor->dtype());
44+
kernel_name += vkapi::to_string(graph->dtype_of(dst_));
4745

4846
context->report_shader_dispatch_start(
4947
kernel_name, utils::uvec3(), utils::WorkgroupSize(), node_id_);
5048

5149
context->register_blit(
5250
pipeline_barrier,
53-
src_tensor->image(
51+
graph->get_tensor(src_)->image(
5452
pipeline_barrier, vkapi::PipelineStage::TRANSFER, vkapi::kRead),
55-
dst_tensor->image(
53+
graph->get_tensor(dst_)->image(
5654
pipeline_barrier, vkapi::PipelineStage::TRANSFER, vkapi::kWrite));
5755

5856
context->report_shader_dispatch_end();

backends/vulkan/runtime/graph/ops/PrepackNode.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ namespace vkcompute {
1818

1919
vkapi::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) {
2020
std::string noop_shader_name("no_op");
21-
vTensorPtr t_packed = graph.get_tensor(packed);
22-
add_dtype_suffix(noop_shader_name, *t_packed);
23-
add_storage_type_suffix(noop_shader_name, *t_packed);
21+
add_dtype_suffix(noop_shader_name, graph.dtype_of(packed));
22+
add_storage_type_suffix(noop_shader_name, graph.storage_type_of(packed));
2423
return VK_KERNEL_FROM_STR(noop_shader_name);
2524
}
2625

@@ -48,13 +47,13 @@ PrepackNode::PrepackNode(
4847
}
4948

5049
api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
51-
vTensorPtr packed = graph->get_tensor(packed_);
52-
53-
// If no TensorRef is provided, create a staging buffer of zeros according to
54-
// the vkapi::vTensor metadata.
50+
// If no TensorRef is provided, create a staging buffer of zeros based on the
51+
// Tensor metadata.
5552
if (graph->val_is_none(tref_)) {
56-
size_t numel = utils::multiply_integers(packed->sizes());
57-
api::StagingBuffer staging(graph->context(), packed->dtype(), numel);
53+
const std::vector<int64_t> packed_sizes = graph->sizes_of(packed_);
54+
size_t numel = utils::multiply_integers(packed_sizes);
55+
api::StagingBuffer staging(
56+
graph->context(), graph->dtype_of(packed_), numel);
5857
staging.set_staging_zeros();
5958
return staging;
6059
}
@@ -80,7 +79,6 @@ void PrepackNode::encode(ComputeGraph* graph) {
8079

8180
context->check_device_capabilities(shader_);
8281

83-
vTensorPtr packed = graph->get_tensor(packed_);
8482
api::StagingBuffer staging = create_staging_buffer(graph);
8583

8684
std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
@@ -101,8 +99,8 @@ void PrepackNode::encode(ComputeGraph* graph) {
10199
shader_, local_workgroup_size_, spec_vars_, push_constants_offset);
102100

103101
uint32_t idx = 0;
104-
bind_tensor_to_descriptor_set(
105-
*packed,
102+
graph->bind_tensor_to_descriptor_set(
103+
packed_,
106104
pipeline_barrier,
107105
vkapi::MemoryAccessType::WRITE,
108106
descriptor_set,
@@ -128,8 +126,8 @@ void PrepackNode::encode(ComputeGraph* graph) {
128126
vkapi::DescriptorSet descriptor_set = context->get_descriptor_set(
129127
noop_shader_, utils::WorkgroupSize(1, 1, 1));
130128

131-
bind_tensor_to_descriptor_set(
132-
*packed,
129+
graph->bind_tensor_to_descriptor_set(
130+
packed_,
133131
pipeline_barrier,
134132
vkapi::MemoryAccessType::READ,
135133
descriptor_set,

backends/vulkan/runtime/graph/ops/impl/Arange.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,22 @@ void resize_arange_node(
2020
ComputeGraph* graph,
2121
const std::vector<ArgGroup>& args,
2222
const std::vector<ValueRef>& extra_args) {
23-
vTensorPtr out = graph->get_tensor(args[0].refs[0]);
23+
const ValueRef out = args.at(0).refs.at(0);
2424

2525
int start_val = 0;
2626
int step_val = 1;
27-
if (!graph->val_is_none(extra_args[0])) {
28-
start_val = graph->extract_scalar<int64_t>(extra_args[0]);
27+
if (!graph->val_is_none(extra_args.at(0))) {
28+
start_val = graph->extract_scalar<int64_t>(extra_args.at(0));
2929
}
30-
int end_val = graph->extract_scalar<int64_t>(extra_args[1]);
31-
if (!graph->val_is_none(extra_args[2])) {
32-
step_val = graph->extract_scalar<int64_t>(extra_args[2]);
30+
const int end_val = graph->extract_scalar<int64_t>(extra_args.at(1));
31+
if (!graph->val_is_none(extra_args.at(2))) {
32+
step_val = graph->extract_scalar<int64_t>(extra_args.at(2));
3333
}
3434

35-
std::vector<int64_t> out_sizes = {
35+
const std::vector<int64_t> out_sizes = {
3636
utils::div_up(end_val - start_val, step_val)};
3737

38-
out->virtual_resize(out_sizes);
38+
graph->virtual_resize(out, out_sizes);
3939
}
4040

4141
void check_arange_input(
@@ -82,11 +82,9 @@ void add_arange_node(
8282
}
8383
}
8484

85-
vTensorPtr t_out = graph.get_tensor(out);
86-
8785
std::string kernel_name("arange");
8886
kernel_name.reserve(kShaderNameReserve);
89-
add_dtype_suffix(kernel_name, *t_out);
87+
add_dtype_suffix(kernel_name, graph.dtype_of(out));
9088

9189
graph.execute_nodes().emplace_back(new DispatchNode(
9290
graph,
@@ -96,7 +94,7 @@ void add_arange_node(
9694
// Inputs and Outputs
9795
{{out, vkapi::kWrite}},
9896
// Shader params buffers
99-
{t_out->sizes_ubo(),
97+
{graph.sizes_ubo(out),
10098
graph.create_params_buffer(start_val),
10199
graph.create_params_buffer(step_val)},
102100
// Push Constants

backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,44 +46,42 @@ void add_native_batch_norm_node(
4646
ValueRef var_ref,
4747
ValueRef eps_ref,
4848
ValueRef out_tuple_ref) {
49-
std::vector<int64_t> in_sizes = graph.get_tensor(in_ref)->sizes();
50-
std::vector<int64_t> out_sizes = graph.get_tensor(in_ref)->sizes();
49+
const std::vector<int64_t> in_sizes = graph.sizes_of(in_ref);
50+
const std::vector<int64_t> out_sizes = graph.sizes_of(in_ref);
5151

5252
VK_CHECK_COND(in_sizes.size() == 4, "BatchNorm only support 4d tensor");
5353
VK_CHECK_COND(out_sizes.size() == 4, "BatchNorm only support 4d tensor");
5454

5555
// Only the first element of the return value is propagated. The remaining 2
5656
// elements are zero-size dummy tensor.
57-
ValueRef out_ref = graph.get_value_list(out_tuple_ref)->at(0);
57+
const ValueRef out_ref = graph.get_value_list(out_tuple_ref)->at(0);
5858

59-
utils::StorageType stype = graph.storage_type_of(out_ref);
59+
const utils::StorageType stype = graph.storage_type_of(out_ref);
6060

61-
int64_t num_channels = dim_at<kChannel4D>(in_sizes);
61+
const int64_t num_channels = dim_at<kChannel4D>(in_sizes);
6262

63-
ValueRef arg_weight =
63+
const ValueRef arg_weight =
6464
check_and_prepack_arg(graph, weight_ref, stype, num_channels, "weight");
65-
ValueRef arg_bias =
65+
const ValueRef arg_bias =
6666
check_and_prepack_arg(graph, bias_ref, stype, num_channels, "bias");
67-
ValueRef arg_mean =
67+
const ValueRef arg_mean =
6868
check_and_prepack_arg(graph, mean_ref, stype, num_channels, "mean");
69-
ValueRef arg_var =
69+
const ValueRef arg_var =
7070
check_and_prepack_arg(graph, var_ref, stype, num_channels, "var");
71-
float epsilon = graph.extract_scalar<float>(eps_ref);
72-
73-
vTensorPtr t_in = graph.get_tensor(in_ref);
71+
const float epsilon = graph.extract_scalar<float>(eps_ref);
7472

7573
VK_CHECK_COND(!graph.val_is_tref(out_ref), "Output should not be tref");
76-
vTensorPtr t_out = graph.get_tensor(out_ref);
7774

75+
const std::vector<int64_t> out_tensor_sizes = graph.sizes_of(out_ref);
7876
VK_CHECK_COND(
79-
dim_at<kChannel4D>(t_out->sizes()) == num_channels,
77+
dim_at<kChannel4D>(out_tensor_sizes) == num_channels,
8078
"out channel must match in channel");
8179

8280
std::string kernel_name = "batchnorm";
83-
add_dtype_suffix(kernel_name, *t_out);
81+
add_dtype_suffix(kernel_name, graph.dtype_of(out_ref));
8482

85-
int32_t num_texel_per_batch =
86-
utils::div_up_4((dim_at<kChannel4D>(t_in->sizes())));
83+
const int32_t num_texel_per_batch =
84+
utils::div_up_4((dim_at<kChannel4D>(in_sizes)));
8785

8886
graph.execute_nodes().emplace_back(new DispatchNode(
8987
graph,
@@ -92,7 +90,7 @@ void add_native_batch_norm_node(
9290
graph.create_local_wg_size(out_ref),
9391
{{out_ref, vkapi::kWrite},
9492
{{in_ref, arg_weight, arg_bias, arg_mean, arg_var}, vkapi::kRead}},
95-
{t_out->logical_limits_ubo(),
93+
{graph.logical_limits_ubo(out_ref),
9694
graph.create_params_buffer(epsilon),
9795
graph.create_params_buffer(num_texel_per_batch)},
9896
// Push Constants

0 commit comments

Comments
 (0)