Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ ValueRef ComputeGraph::add_tensor_like(
ValueRef ComputeGraph::add_tensor_like(
const ValueRef idx,
const utils::GPUMemoryLayout memory_layout) {
return add_tensor(sizes_of(idx), dtype_of(idx), memory_layout);
return add_tensor(
sizes_of(idx), dtype_of(idx), storage_type_of(idx), memory_layout);
}

ValueRef ComputeGraph::add_tensor(
Expand Down
29 changes: 17 additions & 12 deletions backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@

namespace vkcompute {

ValueRef prepack_arg(
ValueRef check_and_prepack_arg(
ComputeGraph& graph,
ValueRef arg_ref,
const utils::StorageType stype,
int64_t num_channels,
const std::string& debug_name) {
VK_CHECK_COND(
Expand All @@ -33,7 +34,7 @@ ValueRef prepack_arg(
// batch_norm's param are broadcasted on the channel dimension.
// In this implementation, we pack the weights along the x dimension, and
// in the shader, we lookup using the along the x.
return prepack_if_tensor_ref(graph, arg_ref, utils::kWidthPacked);
return prepack_standard(graph, arg_ref, stype, utils::kWidthPacked);
}

void add_native_batch_norm_node(
Expand All @@ -51,22 +52,26 @@ void add_native_batch_norm_node(
VK_CHECK_COND(in_sizes.size() == 4, "BatchNorm only support 4d tensor");
VK_CHECK_COND(out_sizes.size() == 4, "BatchNorm only support 4d tensor");

// Only the first element of the return value is propagated. The remaining 2
// elements are zero-size dummy tensor.
ValueRef out_ref = graph.get_value_list(out_tuple_ref)->at(0);

utils::StorageType stype = graph.storage_type_of(out_ref);

int64_t num_channels = dim_at<kChannel4D>(in_sizes);

ValueRef arg_weight = prepack_arg(graph, weight_ref, num_channels, "weight");
ValueRef arg_bias = prepack_arg(graph, bias_ref, num_channels, "bias");
ValueRef arg_mean = prepack_arg(graph, mean_ref, num_channels, "mean");
ValueRef arg_var = prepack_arg(graph, var_ref, num_channels, "var");
ValueRef arg_weight =
check_and_prepack_arg(graph, weight_ref, stype, num_channels, "weight");
ValueRef arg_bias =
check_and_prepack_arg(graph, bias_ref, stype, num_channels, "bias");
ValueRef arg_mean =
check_and_prepack_arg(graph, mean_ref, stype, num_channels, "mean");
ValueRef arg_var =
check_and_prepack_arg(graph, var_ref, stype, num_channels, "var");
float epsilon = graph.extract_scalar<float>(eps_ref);

vTensorPtr t_in = graph.get_tensor(in_ref);

// Only the first element of the return value is propagated. The remaining 2
// elements are zero-size dummy tensor.
const auto out_tuple_val = graph.get_value_list(out_tuple_ref);

ValueRef out_ref = out_tuple_val->at(0);

VK_CHECK_COND(!graph.val_is_tref(out_ref), "Output should not be tref");
vTensorPtr t_out = graph.get_tensor(out_ref);

Expand Down
5 changes: 2 additions & 3 deletions backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@ void add_binary_op_node(
const ValueRef alpha,
const ValueRef out,
const std::string& op_name) {
ValueRef arg1 = prepack_if_tensor_ref(graph, in1);
ValueRef arg2 =
prepack_if_tensor_ref(graph, in2, graph.estimate_memory_layout_of(arg1));
ValueRef arg1 = prepack_standard_like(graph, in1, out, true);
ValueRef arg2 = prepack_standard_like(graph, in2, out, true);

vTensorPtr t_in1 = graph.get_tensor(arg1);
vTensorPtr t_in2 = graph.get_tensor(arg2);
Expand Down
35 changes: 19 additions & 16 deletions backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ utils::uvec3 create_conv2d_global_wg_size(
void add_conv2d_node(
ComputeGraph& graph,
const ValueRef in,
const ValueRef weight,
const ValueRef weight_data,
const ValueRef bias,
const ValueRef stride,
const ValueRef padding,
Expand All @@ -330,19 +330,18 @@ void add_conv2d_node(
const int64_t groups_val = graph.get_int(groups);

const Conv2dMethod method =
get_conv2d_method(graph, weight, groups_val, transposed_val);
get_conv2d_method(graph, weight_data, groups_val, transposed_val);

ValueRef arg_in = prepack_if_tensor_ref(graph, in);
ValueRef arg_weight = prepack_weights(graph, weight, method);
ValueRef arg_weight = prepack_weights(graph, weight_data, method);
ValueRef arg_bias = prepack_biases(
graph,
bias,
weight,
weight_data,
transposed_val,
/* storage_type = */ utils::kTexture2D,
/* memory_layout = */ utils::kWidthPacked);

vTensorPtr t_in = graph.get_tensor(arg_in);
vTensorPtr t_in = graph.get_tensor(in);
vTensorPtr t_out = graph.get_tensor(out);
if (t_in->sizes().at(0) > 1) {
VK_THROW("conv2d: input batch size > 1 is not supported yet!");
Expand All @@ -351,20 +350,25 @@ void add_conv2d_node(

Kernel2dParams kernel_params = create_kernel2d_params(
graph,
weight,
weight_data,
/*kernel_size_only = */ false,
stride,
padding,
dilation);
Conv2dParams extra_params =
create_conv2d_params(graph, weight, kernel_params, transposed_val);
create_conv2d_params(graph, weight_data, kernel_params, transposed_val);

OutputParams out_params = {out_min_val, out_max_val};

check_conv2d_params(kernel_params, transposed_val);

vkapi::ShaderInfo shader = get_conv2d_shader(
graph, *t_out, /*prepack_weights = */ false, method, weight, clamp_out);
graph,
*t_out,
/*prepack_weights = */ false,
method,
weight_data,
clamp_out);

graph.execute_nodes().emplace_back(new DispatchNode(
graph,
Expand All @@ -373,7 +377,7 @@ void add_conv2d_node(
graph.create_local_wg_size(out),
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{
t_out->logical_limits_ubo(),
Expand All @@ -386,7 +390,7 @@ void add_conv2d_node(
{},
// Resizing Logic
resize_conv2d_node,
{weight, stride, padding, dilation, transposed, output_padding}));
{weight_data, stride, padding, dilation, transposed, output_padding}));
}

void add_conv1d_node(
Expand All @@ -402,9 +406,8 @@ void add_conv1d_node(
const ValueRef out_max,
const ValueRef out,
const bool clamp_out) {
ValueRef arg_in = prepack_if_tensor_ref(graph, in);
ValueRef arg_weight =
prepack_if_tensor_ref(graph, weight, utils::kWidthPacked);
ValueRef arg_weight = prepack_standard(
graph, weight, graph.storage_type_of(out), utils::kWidthPacked);
ValueRef arg_bias = prepack_biases(
graph,
bias,
Expand All @@ -422,7 +425,7 @@ void add_conv1d_node(
out_max_val = graph.extract_scalar<float>(out_max);
}

vTensorPtr t_in = graph.get_tensor(arg_in);
vTensorPtr t_in = graph.get_tensor(in);
vTensorPtr t_weight = graph.get_tensor(arg_weight);
vTensorPtr t_bias = graph.get_tensor(arg_bias);
vTensorPtr t_out = graph.get_tensor(out);
Expand Down Expand Up @@ -471,7 +474,7 @@ void add_conv1d_node(
local_size,
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{
t_out->logical_limits_ubo(),
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/Embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ void add_embedding_node(
}

void embedding(ComputeGraph& graph, const std::vector<ValueRef>& args) {
ValueRef weight = prepack_if_tensor_ref(graph, args[0]);
ValueRef in = prepack_if_tensor_ref(graph, args[1]);
ValueRef in = args[1];
ValueRef out = args[5];
ValueRef weight = prepack_standard_like(graph, args[0], out);

add_embedding_node(graph, weight, in, out);
}
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ int64_t get_dim_idx(ComputeGraph& graph, ValueRef in, ValueRef dim_ref) {
}

void index_select(ComputeGraph& graph, const std::vector<ValueRef>& args) {
ValueRef in = prepack_if_tensor_ref(graph, args[0]);
ValueRef in = args[0];
ValueRef dim_ref = args[1];
ValueRef idx = prepack_if_tensor_ref(graph, args[2]);
ValueRef idx = args[2];
ValueRef out = args[3];

const int64_t dim_idx = get_dim_idx(graph, in, dim_ref);
Expand Down
19 changes: 12 additions & 7 deletions backends/vulkan/runtime/graph/ops/impl/Linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,11 @@ void add_addmm_naive_node(
const ValueRef out,
const Params& params,
const ValueRef mat2_is_transposed) {
ValueRef self = prepack_if_tensor_ref(graph, self_data, utils::kWidthPacked);
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
utils::StorageType stype = graph.storage_type_of(out);
ValueRef self = prepack_standard(
graph, self_data, stype, utils::kWidthPacked, /*passthrough = */ true);
ValueRef mat2 = prepack_standard(
graph, mat2_data, stype, utils::kHeightPacked, /*passthrough = */ true);

std::string kernel_name =
graph.get_bool(mat2_is_transposed) ? "linear_naive" : "addmm_naive";
Expand Down Expand Up @@ -145,9 +148,11 @@ void add_addmm_optimized_node(
const ValueRef out,
const Params& params,
const ValueRef mat2_is_transposed) {
ValueRef self =
prepack_if_tensor_ref(graph, self_data, utils::kChannelsPacked);
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
utils::StorageType stype = graph.storage_type_of(out);
ValueRef self = prepack_standard(
graph, self_data, stype, utils::kChannelsPacked, /*passthrough=*/true);
ValueRef mat2 = prepack_standard(
graph, mat2_data, stype, utils::kHeightPacked, /*passthrough=*/true);

// Ensure mat1 is width packed
ValueRef mat1_W_packed = graph.add_tensor_like(mat1, utils::kWidthPacked);
Expand Down Expand Up @@ -276,8 +281,8 @@ void linear(ComputeGraph& graph, const std::vector<ValueRef>& args) {
ValueRef weight_data = args.at(1);
ValueRef bias = args.at(2);
ValueRef out = args.at(3);
ValueRef weight =
prepack_if_tensor_ref(graph, weight_data, utils::kWidthPacked);
ValueRef weight = prepack_standard(
graph, weight_data, graph.storage_type_of(out), utils::kWidthPacked);
ValueRef mat2_is_transposed = graph.add_scalar(true);
if (graph.val_is_none(bias)) {
return add_matmul_node(graph, input, weight, out, mat2_is_transposed);
Expand Down
21 changes: 18 additions & 3 deletions backends/vulkan/runtime/graph/ops/impl/MatMul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,12 @@ void add_matmul_naive_buffer_node(
const ValueRef mat2_data,
const ValueRef out,
const ValueRef mat2_is_transposed) {
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
ValueRef mat2 = prepack_standard(
graph,
mat2_data,
graph.storage_type_of(out),
utils::kHeightPacked,
/*passthrough = */ true);

std::string kernel_name = "matmul_naive_buffer";
add_dtype_suffix(kernel_name, graph.dtype_of(out));
Expand Down Expand Up @@ -103,7 +108,12 @@ void add_matmul_naive_texture3d_node(
const ValueRef mat2_data,
const ValueRef out,
const ValueRef mat2_is_transposed) {
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
ValueRef mat2 = prepack_standard(
graph,
mat2_data,
graph.storage_type_of(out),
utils::kHeightPacked,
/*passthrough = */ true);

std::string kernel_name = graph.get_bool(mat2_is_transposed)
? "matmul_transposed_naive"
Expand Down Expand Up @@ -146,7 +156,12 @@ void add_matmul_optimized_node(
const ValueRef mat2_data,
const ValueRef out,
const ValueRef mat2_is_transposed) {
ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, utils::kHeightPacked);
ValueRef mat2 = prepack_standard(
graph,
mat2_data,
graph.storage_type_of(out),
utils::kHeightPacked,
/*passthrough = */ true);

// Ensure mat1 is width packed
ValueRef mat1_W_packed = graph.add_tensor_like(mat1, utils::kWidthPacked);
Expand Down
17 changes: 7 additions & 10 deletions backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ void add_native_layer_norm_node(
ComputeGraph& graph,
const ValueRef in,
const ValueRef normalized_shape,
const ValueRef weight,
const ValueRef bias,
const ValueRef weight_data,
const ValueRef bias_data,
const ValueRef eps,
const ValueRef out) {
const auto normalized_shape_dim =
Expand All @@ -67,19 +67,16 @@ void add_native_layer_norm_node(
VK_THROW("native_layer_norm only supports normalized_shape with dim == 1");
}

if (graph.val_is_none(weight)) {
if (graph.val_is_none(weight_data)) {
VK_THROW("native_layer_norm requires weight to be non-None");
}

if (graph.val_is_none(bias)) {
if (graph.val_is_none(bias_data)) {
VK_THROW("native_layer_norm requires bias to be non-None");
}

ValueRef arg_in = prepack_if_tensor_ref(graph, in);
ValueRef arg_weight = prepack_if_tensor_ref(
graph, weight, graph.estimate_memory_layout_of(arg_in));
ValueRef arg_bias = prepack_if_tensor_ref(
graph, bias, graph.estimate_memory_layout_of(arg_in));
ValueRef arg_weight = prepack_standard_like(graph, weight_data, in);
ValueRef arg_bias = prepack_standard_like(graph, bias_data, in);

const auto out_val = graph.get_value_list(out);
vTensorPtr t_out = graph.get_tensor(out_val->at(0));
Expand Down Expand Up @@ -107,7 +104,7 @@ void add_native_layer_norm_node(
// Inputs and Outputs
{{{out_val->at(0), out_val->at(1), out_val->at(2)},
vkapi::MemoryAccessType::WRITE},
{{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{t_out->logical_limits_ubo(),
t_out->sizes_ubo(),
Expand Down
10 changes: 4 additions & 6 deletions backends/vulkan/runtime/graph/ops/impl/Pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ void add_max_pool2d_node(
const ValueRef dilation,
const ValueRef ceil_mode,
const ValueRef out) {
ValueRef arg = prepack_if_tensor_ref(graph, in);
vTensorPtr t_in = graph.get_tensor(arg);
vTensorPtr t_in = graph.get_tensor(in);

const auto out_val = graph.get_value_list(out);
vTensorPtr t_out = graph.get_tensor(out_val->at(0));
Expand Down Expand Up @@ -100,7 +99,7 @@ void add_max_pool2d_node(
local_size,
// Inputs and Outputs
{{{out_val->at(0), out_val->at(1)}, vkapi::MemoryAccessType::WRITE},
{arg, vkapi::MemoryAccessType::READ}},
{in, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{
t_out->logical_limits_ubo(),
Expand Down Expand Up @@ -149,8 +148,7 @@ void add_avg_pool2d_node(
const ValueRef count_include_pad,
const ValueRef divisor_override,
const ValueRef out) {
ValueRef arg = prepack_if_tensor_ref(graph, in);
vTensorPtr t_in = graph.get_tensor(arg);
vTensorPtr t_in = graph.get_tensor(in);
vTensorPtr t_out = graph.get_tensor(out);

check_pool2d_args(*t_in, *t_out);
Expand All @@ -174,7 +172,7 @@ void add_avg_pool2d_node(
local_size,
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{arg, vkapi::MemoryAccessType::READ}},
{in, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{t_out->logical_limits_ubo(),
t_in->sizes_ubo(),
Expand Down
Loading
Loading