Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ ValueRef ComputeGraph::add_tensor_like(
ValueRef ComputeGraph::add_tensor_like(
const ValueRef idx,
const utils::GPUMemoryLayout memory_layout) {
return add_tensor(sizes_of(idx), dtype_of(idx), memory_layout);
return add_tensor(
sizes_of(idx), dtype_of(idx), storage_type_of(idx), memory_layout);
}

ValueRef ComputeGraph::add_tensor(
Expand Down
29 changes: 17 additions & 12 deletions backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@

namespace vkcompute {

ValueRef prepack_arg(
ValueRef check_and_prepack_arg(
ComputeGraph& graph,
ValueRef arg_ref,
const utils::StorageType stype,
int64_t num_channels,
const std::string& debug_name) {
VK_CHECK_COND(
Expand All @@ -33,7 +34,7 @@ ValueRef prepack_arg(
// batch_norm's param are broadcasted on the channel dimension.
// In this implementation, we pack the weights along the x dimension, and
// in the shader, we lookup using the along the x.
return prepack_if_tensor_ref(graph, arg_ref, utils::kWidthPacked);
return prepack_standard(graph, arg_ref, stype, utils::kWidthPacked);
}

void add_native_batch_norm_node(
Expand All @@ -51,22 +52,26 @@ void add_native_batch_norm_node(
VK_CHECK_COND(in_sizes.size() == 4, "BatchNorm only support 4d tensor");
VK_CHECK_COND(out_sizes.size() == 4, "BatchNorm only support 4d tensor");

// Only the first element of the return value is propagated. The remaining 2
// elements are zero-size dummy tensor.
ValueRef out_ref = graph.get_value_list(out_tuple_ref)->at(0);

utils::StorageType stype = graph.storage_type_of(out_ref);

int64_t num_channels = dim_at<kChannel4D>(in_sizes);

ValueRef arg_weight = prepack_arg(graph, weight_ref, num_channels, "weight");
ValueRef arg_bias = prepack_arg(graph, bias_ref, num_channels, "bias");
ValueRef arg_mean = prepack_arg(graph, mean_ref, num_channels, "mean");
ValueRef arg_var = prepack_arg(graph, var_ref, num_channels, "var");
ValueRef arg_weight =
check_and_prepack_arg(graph, weight_ref, stype, num_channels, "weight");
ValueRef arg_bias =
check_and_prepack_arg(graph, bias_ref, stype, num_channels, "bias");
ValueRef arg_mean =
check_and_prepack_arg(graph, mean_ref, stype, num_channels, "mean");
ValueRef arg_var =
check_and_prepack_arg(graph, var_ref, stype, num_channels, "var");
float epsilon = graph.extract_scalar<float>(eps_ref);

vTensorPtr t_in = graph.get_tensor(in_ref);

// Only the first element of the return value is propagated. The remaining 2
// elements are zero-size dummy tensor.
const auto out_tuple_val = graph.get_value_list(out_tuple_ref);

ValueRef out_ref = out_tuple_val->at(0);

VK_CHECK_COND(!graph.val_is_tref(out_ref), "Output should not be tref");
vTensorPtr t_out = graph.get_tensor(out_ref);

Expand Down
5 changes: 2 additions & 3 deletions backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@ void add_binary_op_node(
const ValueRef alpha,
const ValueRef out,
const std::string& op_name) {
ValueRef arg1 = prepack_if_tensor_ref(graph, in1);
ValueRef arg2 =
prepack_if_tensor_ref(graph, in2, graph.estimate_memory_layout_of(arg1));
ValueRef arg1 = prepack_standard_like(graph, in1, out, true);
ValueRef arg2 = prepack_standard_like(graph, in2, out, true);

vTensorPtr t_in1 = graph.get_tensor(arg1);
vTensorPtr t_in2 = graph.get_tensor(arg2);
Expand Down
35 changes: 19 additions & 16 deletions backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ utils::uvec3 create_conv2d_global_wg_size(
void add_conv2d_node(
ComputeGraph& graph,
const ValueRef in,
const ValueRef weight,
const ValueRef weight_data,
const ValueRef bias,
const ValueRef stride,
const ValueRef padding,
Expand All @@ -330,19 +330,18 @@ void add_conv2d_node(
const int64_t groups_val = graph.get_int(groups);

const Conv2dMethod method =
get_conv2d_method(graph, weight, groups_val, transposed_val);
get_conv2d_method(graph, weight_data, groups_val, transposed_val);

ValueRef arg_in = prepack_if_tensor_ref(graph, in);
ValueRef arg_weight = prepack_weights(graph, weight, method);
ValueRef arg_weight = prepack_weights(graph, weight_data, method);
ValueRef arg_bias = prepack_biases(
graph,
bias,
weight,
weight_data,
transposed_val,
/* storage_type = */ utils::kTexture2D,
/* memory_layout = */ utils::kWidthPacked);

vTensorPtr t_in = graph.get_tensor(arg_in);
vTensorPtr t_in = graph.get_tensor(in);
vTensorPtr t_out = graph.get_tensor(out);
if (t_in->sizes().at(0) > 1) {
VK_THROW("conv2d: input batch size > 1 is not supported yet!");
Expand All @@ -351,20 +350,25 @@ void add_conv2d_node(

Kernel2dParams kernel_params = create_kernel2d_params(
graph,
weight,
weight_data,
/*kernel_size_only = */ false,
stride,
padding,
dilation);
Conv2dParams extra_params =
create_conv2d_params(graph, weight, kernel_params, transposed_val);
create_conv2d_params(graph, weight_data, kernel_params, transposed_val);

OutputParams out_params = {out_min_val, out_max_val};

check_conv2d_params(kernel_params, transposed_val);

vkapi::ShaderInfo shader = get_conv2d_shader(
graph, *t_out, /*prepack_weights = */ false, method, weight, clamp_out);
graph,
*t_out,
/*prepack_weights = */ false,
method,
weight_data,
clamp_out);

graph.execute_nodes().emplace_back(new DispatchNode(
graph,
Expand All @@ -373,7 +377,7 @@ void add_conv2d_node(
graph.create_local_wg_size(out),
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{
t_out->logical_limits_ubo(),
Expand All @@ -386,7 +390,7 @@ void add_conv2d_node(
{},
// Resizing Logic
resize_conv2d_node,
{weight, stride, padding, dilation, transposed, output_padding}));
{weight_data, stride, padding, dilation, transposed, output_padding}));
}

void add_conv1d_node(
Expand All @@ -402,9 +406,8 @@ void add_conv1d_node(
const ValueRef out_max,
const ValueRef out,
const bool clamp_out) {
ValueRef arg_in = prepack_if_tensor_ref(graph, in);
ValueRef arg_weight =
prepack_if_tensor_ref(graph, weight, utils::kWidthPacked);
ValueRef arg_weight = prepack_standard(
graph, weight, graph.storage_type_of(out), utils::kWidthPacked);
ValueRef arg_bias = prepack_biases(
graph,
bias,
Expand All @@ -422,7 +425,7 @@ void add_conv1d_node(
out_max_val = graph.extract_scalar<float>(out_max);
}

vTensorPtr t_in = graph.get_tensor(arg_in);
vTensorPtr t_in = graph.get_tensor(in);
vTensorPtr t_weight = graph.get_tensor(arg_weight);
vTensorPtr t_bias = graph.get_tensor(arg_bias);
vTensorPtr t_out = graph.get_tensor(out);
Expand Down Expand Up @@ -471,7 +474,7 @@ void add_conv1d_node(
local_size,
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{
t_out->logical_limits_ubo(),
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/Embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ void add_embedding_node(
}

void embedding(ComputeGraph& graph, const std::vector<ValueRef>& args) {
ValueRef weight = prepack_if_tensor_ref(graph, args[0]);
ValueRef in = prepack_if_tensor_ref(graph, args[1]);
ValueRef in = args[1];
ValueRef out = args[5];
ValueRef weight = prepack_standard_like(graph, args[0], out);

add_embedding_node(graph, weight, in, out);
}
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ int64_t get_dim_idx(ComputeGraph& graph, ValueRef in, ValueRef dim_ref) {
}

void index_select(ComputeGraph& graph, const std::vector<ValueRef>& args) {
ValueRef in = prepack_if_tensor_ref(graph, args[0]);
ValueRef in = args[0];
ValueRef dim_ref = args[1];
ValueRef idx = prepack_if_tensor_ref(graph, args[2]);
ValueRef idx = args[2];
ValueRef out = args[3];

const int64_t dim_idx = get_dim_idx(graph, in, dim_ref);
Expand Down
19 changes: 12 additions & 7 deletions backends/vulkan/runtime/graph/ops/impl/Linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,11 @@ void add_addmm_naive_node(
const ValueRef out,
const Params& params,
const ValueRef mat2_is_transposed) {
ValueRef self = prepack_if_tensor_ref(graph, self_data, utils::kWidthPacked);
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
utils::StorageType stype = graph.storage_type_of(out);
ValueRef self = prepack_standard(
graph, self_data, stype, utils::kWidthPacked, /*passthrough = */ true);
ValueRef mat2 = prepack_standard(
graph, mat2_data, stype, utils::kHeightPacked, /*passthrough = */ true);

std::string kernel_name =
graph.get_bool(mat2_is_transposed) ? "linear_naive" : "addmm_naive";
Expand Down Expand Up @@ -145,9 +148,11 @@ void add_addmm_optimized_node(
const ValueRef out,
const Params& params,
const ValueRef mat2_is_transposed) {
ValueRef self =
prepack_if_tensor_ref(graph, self_data, utils::kChannelsPacked);
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
utils::StorageType stype = graph.storage_type_of(out);
ValueRef self = prepack_standard(
graph, self_data, stype, utils::kChannelsPacked, /*passthrough=*/true);
ValueRef mat2 = prepack_standard(
graph, mat2_data, stype, utils::kHeightPacked, /*passthrough=*/true);

// Ensure mat1 is width packed
ValueRef mat1_W_packed = graph.add_tensor_like(mat1, utils::kWidthPacked);
Expand Down Expand Up @@ -276,8 +281,8 @@ void linear(ComputeGraph& graph, const std::vector<ValueRef>& args) {
ValueRef weight_data = args.at(1);
ValueRef bias = args.at(2);
ValueRef out = args.at(3);
ValueRef weight =
prepack_if_tensor_ref(graph, weight_data, utils::kWidthPacked);
ValueRef weight = prepack_standard(
graph, weight_data, graph.storage_type_of(out), utils::kWidthPacked);
ValueRef mat2_is_transposed = graph.add_scalar(true);
if (graph.val_is_none(bias)) {
return add_matmul_node(graph, input, weight, out, mat2_is_transposed);
Expand Down
21 changes: 18 additions & 3 deletions backends/vulkan/runtime/graph/ops/impl/MatMul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,12 @@ void add_matmul_naive_buffer_node(
const ValueRef mat2_data,
const ValueRef out,
const ValueRef mat2_is_transposed) {
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
ValueRef mat2 = prepack_standard(
graph,
mat2_data,
graph.storage_type_of(out),
utils::kHeightPacked,
/*passthrough = */ true);

std::string kernel_name = "matmul_naive_buffer";
add_dtype_suffix(kernel_name, graph.dtype_of(out));
Expand Down Expand Up @@ -103,7 +108,12 @@ void add_matmul_naive_texture3d_node(
const ValueRef mat2_data,
const ValueRef out,
const ValueRef mat2_is_transposed) {
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
ValueRef mat2 = prepack_standard(
graph,
mat2_data,
graph.storage_type_of(out),
utils::kHeightPacked,
/*passthrough = */ true);

std::string kernel_name = graph.get_bool(mat2_is_transposed)
? "matmul_transposed_naive"
Expand Down Expand Up @@ -146,7 +156,12 @@ void add_matmul_optimized_node(
const ValueRef mat2_data,
const ValueRef out,
const ValueRef mat2_is_transposed) {
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
ValueRef mat2 = prepack_standard(
graph,
mat2_data,
graph.storage_type_of(out),
utils::kHeightPacked,
/*passthrough = */ true);

// Ensure mat1 is width packed
ValueRef mat1_W_packed = graph.add_tensor_like(mat1, utils::kWidthPacked);
Expand Down
17 changes: 7 additions & 10 deletions backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ void add_native_layer_norm_node(
ComputeGraph& graph,
const ValueRef in,
const ValueRef normalized_shape,
const ValueRef weight,
const ValueRef bias,
const ValueRef weight_data,
const ValueRef bias_data,
const ValueRef eps,
const ValueRef out) {
const auto normalized_shape_dim =
Expand All @@ -67,19 +67,16 @@ void add_native_layer_norm_node(
VK_THROW("native_layer_norm only supports normalized_shape with dim == 1");
}

if (graph.val_is_none(weight)) {
if (graph.val_is_none(weight_data)) {
VK_THROW("native_layer_norm requires weight to be non-None");
}

if (graph.val_is_none(bias)) {
if (graph.val_is_none(bias_data)) {
VK_THROW("native_layer_norm requires bias to be non-None");
}

ValueRef arg_in = prepack_if_tensor_ref(graph, in);
ValueRef arg_weight = prepack_if_tensor_ref(
graph, weight, graph.estimate_memory_layout_of(arg_in));
ValueRef arg_bias = prepack_if_tensor_ref(
graph, bias, graph.estimate_memory_layout_of(arg_in));
ValueRef arg_weight = prepack_standard_like(graph, weight_data, in);
ValueRef arg_bias = prepack_standard_like(graph, bias_data, in);

const auto out_val = graph.get_value_list(out);
vTensorPtr t_out = graph.get_tensor(out_val->at(0));
Expand Down Expand Up @@ -107,7 +104,7 @@ void add_native_layer_norm_node(
// Inputs and Outputs
{{{out_val->at(0), out_val->at(1), out_val->at(2)},
vkapi::MemoryAccessType::WRITE},
{{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{t_out->logical_limits_ubo(),
t_out->sizes_ubo(),
Expand Down
10 changes: 4 additions & 6 deletions backends/vulkan/runtime/graph/ops/impl/Pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ void add_max_pool2d_node(
const ValueRef dilation,
const ValueRef ceil_mode,
const ValueRef out) {
ValueRef arg = prepack_if_tensor_ref(graph, in);
vTensorPtr t_in = graph.get_tensor(arg);
vTensorPtr t_in = graph.get_tensor(in);

const auto out_val = graph.get_value_list(out);
vTensorPtr t_out = graph.get_tensor(out_val->at(0));
Expand Down Expand Up @@ -100,7 +99,7 @@ void add_max_pool2d_node(
local_size,
// Inputs and Outputs
{{{out_val->at(0), out_val->at(1)}, vkapi::MemoryAccessType::WRITE},
{arg, vkapi::MemoryAccessType::READ}},
{in, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{
t_out->logical_limits_ubo(),
Expand Down Expand Up @@ -149,8 +148,7 @@ void add_avg_pool2d_node(
const ValueRef count_include_pad,
const ValueRef divisor_override,
const ValueRef out) {
ValueRef arg = prepack_if_tensor_ref(graph, in);
vTensorPtr t_in = graph.get_tensor(arg);
vTensorPtr t_in = graph.get_tensor(in);
vTensorPtr t_out = graph.get_tensor(out);

check_pool2d_args(*t_in, *t_out);
Expand All @@ -174,7 +172,7 @@ void add_avg_pool2d_node(
local_size,
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{arg, vkapi::MemoryAccessType::READ}},
{in, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{t_out->logical_limits_ubo(),
t_in->sizes_ubo(),
Expand Down
Loading
Loading