Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,18 @@ $else:

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")}

$if STORAGE == "buffer":
${layout_declare_spec_const(C, "int", "out_packed_dim", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "in_packed_dim", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "other_packed_dim", "DEFAULT_LAYOUT")}
const lowp ivec4 out_dim_order = unhash_dim_order(out_layout);
$else:
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
const lowp int packed_dim = unhash_packed_dim(out_layout);

${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);

${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 other_axis_map = unhash_axis_map(other_layout);

#ifdef USING_BUFFER
Expand All @@ -77,7 +76,7 @@ void main() {
return;
}

const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);
const ivec4 in_tidx = min(out_tidx, in_sizes - 1);
const ivec4 other_tidx = min(out_tidx, other_sizes - 1);

Expand Down
56 changes: 25 additions & 31 deletions backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,6 @@
*/
#define mod4(x) ((x) & 3)

/*
* Find the packed dimension of a tensor given its strides. The packed dimension
* is the "fastest moving" dimension which will have a stride of 1.
*/
int find_packed_dim(const ivec4 strides) {
int packed_dim = 0;
for (int i = 0; i <= 3; i++) {
if (strides[i] == 1) {
packed_dim = i;
break;
}
}
return packed_dim;
}

/*
* Get the staging buffer indices that contain the data of the texel that
* corresponds to the provided tensor index. Since the texel have 4 elements,
Expand Down Expand Up @@ -129,27 +114,26 @@ int tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes) {
tidx.x;
}

// TODO(ssjia): make this function use dim order so that it can work with any
// dim order. Currently it assumes that the dim order is contiguous, except for
// the packed dim.
ivec4 bufi_to_tidx(int bufi, const ivec4 strides, const int packed_dim) {
ivec4 bufi_to_tidx(int bufi, const ivec4 strides, const ivec4 dim_order) {
ivec4 idx;
for (int i = 3; i >= 0; i--) {
if (i != packed_dim) {
idx[i] = bufi / strides[i];
bufi %= strides[i];
}
int dim = dim_order[i];
idx[dim] = bufi / strides[dim];
bufi %= strides[dim];
}
idx[packed_dim] = bufi;
return idx;
}

// Convenience overload of the above function, which will determine the packed
// dim from the strides automatically so it doesn't have to be passed in as a
// function argument.
ivec4 bufi_to_tidx(const int bufi, const ivec4 strides) {
int packed_dim = find_packed_dim(strides);
return bufi_to_tidx(bufi, strides, packed_dim);
/*
* bufi_to_tidx but assumes that the tensor is contiguous
*/
ivec4 contiguous_bufi_to_tidx(int bufi, const ivec4 strides) {
ivec4 idx;
for (int i = 3; i >= 0; i--) {
idx[i] = bufi / strides[i];
bufi %= strides[i];
}
return idx;
}

int tidx_to_bufi(const ivec4 tidx, ivec4 strides) {
Expand Down Expand Up @@ -269,12 +253,22 @@ ivec3 lpos_to_pos(const ivec3 lpos, const ivec4 axis_map) {
* e.g. 0x11021, 1 -> ivec4(1, 2, 0, 1)
*/
#define unhash_axis_map(hash) \
ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf))
(ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf)))

/*
*
*/
#define unhash_dim_order(hash) \
(ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf)))

#define unhash_packed_dim(hash) int(hash >> 16 & 0xf)

#define DEFAULT_LAYOUT 0x02210

#define DEFAULT_DIM_ORDER 0x03210

#define DEFAULT_DIM_ORDER_IVEC4 ivec4(0, 1, 2, 3)

/************************
* Deprecated Functions *
************************/
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ void main() {
return;
}

const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, 0);
const ivec4 out_tidx = contiguous_bufi_to_tidx(out_bufi, out_strides);

const FLOAT_T scale = t_scales[out_tidx.x];

Expand Down
17 changes: 8 additions & 9 deletions backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ ${define_required_extensions(DTYPE)}

layout(std430) buffer;

${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(1, "r", "nchw_in", DTYPE, STORAGE)}
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "nchw_in", DTYPE, STORAGE)}

$if USE_PUSH_CONST:
layout(push_constant) uniform restrict Block {
Expand All @@ -20,15 +20,14 @@ $if USE_PUSH_CONST:
int numel;
};
$else:
${layout_declare_ubo(2, "ivec4", "out_sizes")}
${layout_declare_ubo(3, "ivec4", "out_strides")}
${layout_declare_ubo(4, "int", "numel")}
${layout_declare_ubo(B, "ivec4", "out_sizes")}
${layout_declare_ubo(B, "ivec4", "out_strides")}
${layout_declare_ubo(B, "int", "numel")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

// This constant is unused in this shader but is kept so that the signature is
// consistent with nchw_to_image.
${layout_declare_spec_const(C, "int", "UNUSED_layout", "0")}
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_DIM_ORDER")}
const lowp ivec4 out_dim_order = unhash_dim_order(out_layout);
${layout_declare_spec_const(C, "int", "transpose_hw", "0")}

void main() {
Expand All @@ -37,7 +36,7 @@ void main() {
return;
}

ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides);
ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);

ivec4 sizes = out_sizes;
if (transpose_hw == 1) {
Expand Down
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/select.glslh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#ifndef SELECT_GLSLH
#define SELECT_GLSLH

#ifndef USING_BUFFER

/*
* Enable the fast path if a texel loaded from the input texture can be used as
* is to store to the output texture. The following conditions must be met:
Expand All @@ -29,6 +31,8 @@ bool can_use_fast_path() {
return true;
}

#endif // USING_BUFFER

/*
* Given an output tensor index, return the corresponding input tensor index for
* the select operator. This is done by "inserting" the select index at the
Expand Down
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/slice.glslh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#ifndef SLICE_GLSLH
#define SLICE_GLSLH

#ifndef USING_BUFFER

/**
* Enable the fast path if a texel loaded from the input texture can be used as
* is to store to the output texture. The following conditions must be met:
Expand All @@ -26,6 +28,8 @@ bool can_use_fast_path() {
return true;
}

#endif // USING_BUFFER

/*
* Converts output tensor indices to input tensor indices for the slice operation.
* This function maps the output indices to the corresponding input indices based on
Expand Down
8 changes: 5 additions & 3 deletions backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ layout(push_constant) uniform restrict Block {
int selected_dim;
};

${layout_declare_spec_const(C, "int", "out_packed_dim", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "in_packed_dim", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}

const lowp ivec4 out_dim_order = unhash_dim_order(out_layout);

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

Expand All @@ -50,7 +52,7 @@ void main() {
return;
}

const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);

const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
Expand Down
28 changes: 8 additions & 20 deletions backends/vulkan/runtime/graph/ops/glsl/where.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -37,40 +37,28 @@ $if STORAGE == "buffer":
${layout_declare_ubo(B, "ivec4", "cond_strides")}
${layout_declare_ubo(B, "ivec4", "self_strides")}
${layout_declare_ubo(B, "ivec4", "other_strides")}

${layout_declare_spec_const(C, "int", "out_packed_dim", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "cond_packed_dim", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "self_packed_dim", "DEFAULT_LAYOUT")}
${layout_declare_spec_const(C, "int", "other_packed_dim", "DEFAULT_LAYOUT")}
$else:
${layout_declare_ubo(B, "ivec3", "out_limits")}

${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_DIM_ORDER")}

const lowp ivec4 out_dim_order = unhash_dim_order(out_layout);

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

#ifdef USING_BUFFER

void main() {
int out_bufi = int(gl_GlobalInvocationID.x);
// ivec4 tidx = ivec4(gl_GlobalInvocationID, 0);
// int out_bufi = tidx_to_bufi(tidx, out_strides);
// int cond_bufi = tidx_to_bufi(tidx, cond_strides);
// int self_bufi = tidx_to_bufi(tidx, self_strides);
// int other_bufi = tidx_to_bufi(tidx, other_strides);
if (out_bufi >= out_numl) {
return;
}

const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
out_bufi = tidx_to_bufi(out_tidx, out_strides);

const ivec4 cond_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
const int cond_bufi = tidx_to_bufi(cond_tidx, cond_strides);

const ivec4 self_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
const int self_bufi = tidx_to_bufi(self_tidx, self_strides);
const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);

const ivec4 other_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
const int other_bufi = tidx_to_bufi(other_tidx, other_strides);
const int cond_bufi = tidx_to_bufi(out_tidx, cond_strides);
const int self_bufi = tidx_to_bufi(out_tidx, self_strides);
const int other_bufi = tidx_to_bufi(out_tidx, other_strides);

COND_T cond = t_condition[cond_bufi] ;
T v_self = t_self[self_bufi];
Expand Down
6 changes: 3 additions & 3 deletions backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ void add_binary_op_buffer_node(
PushConstantDataInfo(&alpha_val, sizeof(float)),
}},
// Specialization Constants
{graph.packed_dim_of(out),
graph.packed_dim_of(in1),
graph.packed_dim_of(in2)},
{graph.hashed_layout_of(out),
graph.hashed_layout_of(in1),
graph.hashed_layout_of(in2)},
// Resize Args
{},
// Resizing Logic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ void check_linear_qcsnw_args(
VK_CHECK_COND(
utils::val_at(-1, scales_sizes) == utils::val_at(-2, qmat2_sizes));
}

if (graph.is_buffer_storage(out)) {
VK_CHECK_COND(graph.is_contiguous(out));
}
}

void resize_linear_qcsnw_node(
Expand Down
16 changes: 5 additions & 11 deletions backends/vulkan/runtime/graph/ops/impl/Transfer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ void add_transfer_copy_node(
} transfer_params{static_cast<int32_t>(dim_whcn)};

std::vector<PushConstantDataInfo> push_constants;
vkapi::SpecVarList spec_vars;

if (graph.is_buffer_storage(out)) {
push_constants = {
Expand All @@ -64,23 +63,18 @@ void add_transfer_copy_node(
graph.strides_pc_of(in),
graph.numel_pc_of(out),
PushConstantDataInfo(&transfer_params, sizeof(transfer_params))};

spec_vars = {
graph.packed_dim_of(out),
graph.packed_dim_of(in),
};
} else {
push_constants = {
graph.sizes_pc_of(out),
graph.sizes_pc_of(in),
PushConstantDataInfo(&transfer_params, sizeof(transfer_params))};

spec_vars = {
graph.hashed_layout_of(out),
graph.hashed_layout_of(in),
};
}

vkapi::SpecVarList spec_vars = {
graph.hashed_layout_of(out),
graph.hashed_layout_of(in),
};

// Determine the shader directly
std::string kernel_name;
if (transfer_type == TransferType::SELECT) {
Expand Down
7 changes: 2 additions & 5 deletions backends/vulkan/runtime/graph/ops/impl/Where.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ void add_where_texture_node(
// Push Constants
{},
// Specialization Constants
{graph.packed_dim_of(out)},
{graph.hashed_layout_of(out)},
// Resize Arguments
{},
// Resizing Logic
Expand Down Expand Up @@ -96,10 +96,7 @@ void add_where_buffer_node(
// Push Constants
{},
// Specialization Constants
{graph.packed_dim_of(out),
graph.packed_dim_of(cond),
graph.packed_dim_of(self),
graph.packed_dim_of(other)},
{graph.hashed_layout_of(out)},
// Resize Arguments
{},
// Resizing Logic
Expand Down
6 changes: 5 additions & 1 deletion backends/vulkan/test/op_tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,17 @@ def get_binary_elementwise_inputs():
((S, S1, S2), (S, S1, 1), 2.0),
((S, S1, S2), (S, 1, S2), 2.0),
((XS, S, S1, S2), (XS, S, 1, 1), 2.0),
((3, 64, 1), (1, 64, 1)),
]
)
test_suite.layouts = [
"utils::kWidthPacked",
"utils::kChannelsPacked",
]
test_suite.storage_types = ["utils::kBuffer", "utils::kTexture3D"]
test_suite.storage_types = [
"utils::kBuffer",
"utils::kTexture3D",
]
return test_suite


Expand Down
3 changes: 2 additions & 1 deletion backends/vulkan/test/utils/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ void record_nchw_to_buffer_op(
vkapi::VulkanBuffer& src_buffer,
api::vTensor& v_dst) {
vkapi::PipelineBarrier pipeline_barrier{};
vkapi::SpecVarList specialization_constants = {v_dst.hashed_layout()};

context->submit_compute_job(
get_nchw_to_tensor_shader(v_dst, true, false),
pipeline_barrier,
{uint32_t(v_dst.numel()), 1, 1},
{64, 1, 1},
{},
specialization_constants,
VK_NULL_HANDLE,
0,
v_dst.buffer(
Expand Down
Loading