Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions backends/vulkan/op_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,10 +489,8 @@ def register_rotary_emb_op():

@update_features(
[
exir_ops.edge.aten.clone.default,
exir_ops.edge.aten.permute.default,
exir_ops.edge.aten.permute_copy.default,
exir_ops.edge.aten.view_copy.default,
]
)
def register_view_ops():
Expand All @@ -502,6 +500,21 @@ def register_view_ops():
)


@update_features(
[
exir_ops.edge.aten.view_copy.default,
exir_ops.edge.aten.squeeze_copy.dims,
exir_ops.edge.aten.unsqueeze_copy.default,
exir_ops.edge.aten.clone.default,
]
)
def register_view_ops_with_buffer_meta():
return OpFeatures(
inputs_storage=utils.ANY_STORAGE,
supports_resize=True,
)


# Fully featured transfer operators (i.e. operators that copy data from the input
# tensor(s) to the output tensor(s)), which have memory layout agnostic implementations
# for both texture and buffer storage types.
Expand Down Expand Up @@ -562,9 +575,6 @@ def register_ported_op():
# Ops ported from PyTorch Vulkan backend. These ops are in a separate registry because they support all packed dimensions
@update_features(
[
# Shape Manipulation
exir_ops.edge.aten.squeeze_copy.dims,
exir_ops.edge.aten.unsqueeze_copy.default,
# Tensor combination
exir_ops.edge.aten.repeat.default,
exir_ops.edge.aten.split_with_sizes_copy.default,
Expand Down
44 changes: 44 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/view_buffer.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#version 450 core

#define PRECISION ${PRECISION}

#define T ${buffer_scalar_type(DTYPE)}

${define_required_extensions(DTYPE)}

layout(std430) buffer;

#include "indexing.glslh"

${layout_declare_tensor(B, "w", "t_outp", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_inp", DTYPE, STORAGE)}

${layout_declare_ubo(B, "BufferMetadata", "outp")}
${layout_declare_ubo(B, "BufferMetadata", "inp")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

/*
* The insight behind the view operation is that the contiguous index of each
* tensor element in the input and output tensors are the same.
*/
void main() {
const uint outp_bufi = gl_GlobalInvocationID.x;
if (outp_bufi >= numel(outp)) {
return;
}

TensorIndex outp_tidx;
linear_idx_to_tensor_idx(outp, outp_bufi, outp_tidx);

// To map the output to the input, find the input element that has the same
// contiguous index as the output element.
const uint contig_idx = tensor_idx_to_contiguous_idx(outp, outp_tidx);

TensorIndex inp_tidx;
contiguous_idx_to_tensor_idx(inp, contig_idx, inp_tidx);

const uint inp_bufi = tensor_idx_to_linear_idx(inp, inp_tidx);

t_outp[outp_bufi] = t_inp[inp_bufi];
}
20 changes: 20 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/view_buffer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

view_buffer:
parameter_names_with_default_values:
DTYPE: float
STORAGE: buffer
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
- VALUE: double
- VALUE: int8
- VALUE: uint8
- VALUE: int32
shader_variants:
- NAME: view_buffer
6 changes: 5 additions & 1 deletion backends/vulkan/runtime/graph/ops/impl/Clone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,11 @@ void clone(ComputeGraph& graph, const std::vector<ValueRef>& args) {
if (src_storage == utils::kBuffer && dst_storage == utils::kTexture3D) {
return add_buffer_to_image_node(graph, src, dst);
}
VK_THROW("Buffer to buffer memory layout transition not supported yet!");

std::vector<ValueRef> extra_args = {};
// Buffer to buffer copy
return add_view_copy_buffer_node(
graph, src, dst, extra_args, resize_clone_node);
}

// Clone node is not the most efficient implementation for the aten.clone
Expand Down
44 changes: 43 additions & 1 deletion backends/vulkan/runtime/graph/ops/impl/Squeeze.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <executorch/backends/vulkan/runtime/graph/ops/impl/Clone.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/Permute.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/View.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>

Expand Down Expand Up @@ -55,8 +56,49 @@ void add_squeeze_copy_dims_node(
}
}

void resize_squeeze_node(
ComputeGraph* graph,
const std::vector<ArgGroup>& args,
const std::vector<ValueRef>& extra_args) {
const ValueRef out = args.at(0).refs.at(0);
const ValueRef in = args.at(1).refs.at(0);
const ValueRef dims_ref = extra_args.at(0);

const IntListPtr dims = graph->get_int_list(dims_ref);

std::vector<int64_t> out_sizes = graph->sizes_of(in);

// Remove the dimensions specified in dims if their size is 1
for (int64_t dim : *dims) {
if (dim >= 0 && dim < static_cast<int64_t>(out_sizes.size()) &&
out_sizes[dim] == 1) {
out_sizes.erase(out_sizes.begin() + dim);
// After erasing, all subsequent dims shift left by one
// So we need to decrement all subsequent dims in dims
for (auto& d : *dims) {
if (d > dim) {
--d;
}
}
}
}

graph->virtual_resize(out, out_sizes);
}

void squeeze_copy_dims(ComputeGraph& graph, const std::vector<ValueRef>& args) {
return add_squeeze_copy_dims_node(graph, args[0], args[1], args[2]);
int idx = 0;
const ValueRef in = args.at(idx++);
const ValueRef dims = args.at(idx++);
const ValueRef out = args.at(idx++);

std::vector<ValueRef> resize_args = {dims};

if (graph.is_buffer_storage(in)) {
return add_view_copy_buffer_node(
graph, in, out, resize_args, resize_squeeze_node);
}
return add_squeeze_copy_dims_node(graph, in, dims, out);
}

REGISTER_OPERATORS {
Expand Down
37 changes: 36 additions & 1 deletion backends/vulkan/runtime/graph/ops/impl/Unsqueeze.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/Permute.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/View.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>

Expand Down Expand Up @@ -45,8 +46,42 @@ void add_unsqueeze_node(
add_permute_node(graph, in, permute_dims_ref, out);
}

void resize_unsqueeze_node(
ComputeGraph* graph,
const std::vector<ArgGroup>& args,
const std::vector<ValueRef>& extra_args) {
const ValueRef out = args.at(0).refs.at(0);
const ValueRef in = args.at(1).refs.at(0);
const ValueRef dims_ref = extra_args.at(0);

const IntListPtr dims = graph->get_int_list(dims_ref);

std::vector<int64_t> out_sizes = graph->sizes_of(in);

// Insert singleton dimensions at the specified positions
for (auto dim : *dims) {
int64_t d = dim;
if (d < 0) {
d += static_cast<int64_t>(out_sizes.size()) + 1;
}
out_sizes.insert(out_sizes.begin() + d, 1);
}

graph->virtual_resize(out, out_sizes);
}

void unsqueeze(ComputeGraph& graph, const std::vector<ValueRef>& args) {
return add_unsqueeze_node(graph, args[0], args[1], args[2]);
int idx = 0;
const ValueRef in = args.at(idx++);
const ValueRef dims = args.at(idx++);
const ValueRef out = args.at(idx++);

std::vector<ValueRef> resize_args = {dims};
if (graph.is_buffer_storage(in)) {
return add_view_copy_buffer_node(
graph, in, out, resize_args, resize_unsqueeze_node);
}
return add_unsqueeze_node(graph, in, dims, out);
}

REGISTER_OPERATORS {
Expand Down
41 changes: 40 additions & 1 deletion backends/vulkan/runtime/graph/ops/impl/View.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,47 @@ void add_view_node(
resize_view_node));
}

void add_view_copy_buffer_node(
ComputeGraph& graph,
ValueRef in,
ValueRef out,
const std::vector<ValueRef>& resize_args,
const ExecuteNode::ResizeFunction& resize_fn) {
std::string kernel_name = "view_buffer";
add_dtype_suffix(kernel_name, graph.dtype_of(out));

graph.execute_nodes().emplace_back(new DynamicDispatchNode(
graph,
VK_KERNEL_FROM_STR(kernel_name),
default_pick_global_wg_size,
default_pick_local_wg_size,
// Inputs and Outputs
{{out, vkapi::kWrite}, {in, vkapi::kRead}},
// Parameter Buffers
{graph.buffer_meta_ubo(out), graph.buffer_meta_ubo(in)},
// Push Constants
{},
// Specialization Constants
{},
// Resize Args
resize_args,
// Resizing Logic
resize_fn));
}

void view(ComputeGraph& graph, const std::vector<ValueRef>& args) {
return add_view_node(graph, args[0], args[1], args[2]);
int idx = 0;
const ValueRef in = args.at(idx++);
const ValueRef sizes = args.at(idx++);
const ValueRef out = args.at(idx++);

std::vector<ValueRef> resize_args = {sizes};

if (graph.is_buffer_storage(out)) {
return add_view_copy_buffer_node(
graph, in, out, resize_args, resize_view_node);
}
return add_view_node(graph, in, sizes, out);
}

REGISTER_OPERATORS {
Expand Down
12 changes: 12 additions & 0 deletions backends/vulkan/runtime/graph/ops/impl/View.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@

namespace vkcompute {

/*
* Dispatches the view_copy compute shader. This can be used to implement ops
* that preserve the "contiguous" indexes of elements between the input and
* output such as view_copy, squeeze_copy, unsqueeze_copy, etc.
*/
void add_view_copy_buffer_node(
ComputeGraph& graph,
ValueRef in,
ValueRef out,
const std::vector<ValueRef>& resize_args,
const ExecuteNode::ResizeFunction& resize_fn);

void add_view_node(
ComputeGraph& graph,
ValueRef in,
Expand Down
Loading
Loading