diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index d3d32266d8b..8554ab9ed90 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -454,7 +454,6 @@ vTensor::vTensor( sizes_uniform_(), strides_uniform_(), numel_uniform_(), - axis_map_uniform_(), logical_limits_uniform_(), // Construct Tensor storage storage_( @@ -501,7 +500,6 @@ vTensor::vTensor( sizes_uniform_(), strides_uniform_(), numel_uniform_(), - axis_map_uniform_(), logical_limits_uniform_(), // Construct Tensor storage storage_(context, image) { @@ -527,7 +525,6 @@ vTensor::vTensor(vTensor& other) sizes_uniform_(), strides_uniform_(), numel_uniform_(), - axis_map_uniform_(), logical_limits_uniform_(), // Copy Tensor storage storage_(other.storage_) {} @@ -553,7 +550,6 @@ vTensor::vTensor( sizes_uniform_(), strides_uniform_(), numel_uniform_(), - axis_map_uniform_(), logical_limits_uniform_(), // Copy Tensor storage storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) { @@ -630,14 +626,6 @@ const vkapi::BufferBindInfo vTensor::strides_ubo() { return vkapi::BufferBindInfo(strides_uniform_.buffer()); } -const vkapi::BufferBindInfo vTensor::axis_map_ubo() { - if (!axis_map_uniform_.buffer()) { - axis_map_uniform_ = - ParamsBuffer(storage_.context_, utils::make_ivec4(axis_map_)); - } - return vkapi::BufferBindInfo(axis_map_uniform_.buffer()); -} - const vkapi::BufferBindInfo vTensor::logical_limits_ubo() { if (!logical_limits_uniform_.buffer()) { logical_limits_uniform_ = ParamsBuffer(storage_.context_, logical_limits_); @@ -710,9 +698,6 @@ void vTensor::update_metadata() { if (numel_uniform_.buffer()) { numel_uniform_.update(numel_); } - if (axis_map_uniform_.buffer()) { - axis_map_uniform_.update(utils::make_ivec4(axis_map_)); - } if (logical_limits_uniform_.buffer()) { logical_limits_uniform_.update(logical_limits_); } diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index bd83e600385..35b74915d27 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -308,7 +308,6 @@ class vTensor final { ParamsBuffer sizes_uniform_; ParamsBuffer strides_uniform_; ParamsBuffer numel_uniform_; - ParamsBuffer axis_map_uniform_; ParamsBuffer logical_limits_uniform_; vTensorStorage storage_; @@ -430,6 +429,19 @@ class vTensor final { return axis_map_; } + /* + * Returns a single int32_t that contains the values of the axis map and the + * packed dimension packed into a single int32_t, such that it can be used as + * a specialization constant in a compute shader. This allows for the SPIR-V + * to bytecode compilation to perform compile-time unfolding on the axis map. + * Each element of the axis map and the value of the packed dimension take up + * 4 bits in the packed int32_t. + */ + inline int32_t hashed_layout() const { + return axis_map_.at(0) + (axis_map_.at(1) << 4) + (axis_map_.at(2) << 8) + + (axis_map_.at(3) << 12) + (packed_dim_ << 16); + } + /* * Return true if the tensor's axis map is {0, 1, 2, concat_dim}. This means * that the width dim is mapped to the width axis of the texture, the height @@ -463,12 +475,6 @@ class vTensor final { */ const vkapi::BufferBindInfo strides_ubo(); - /* - * Returns a GPU buffer containing the texture axis mapping for each dimension - * of the tensor, in WHCN dimension order. - */ - const vkapi::BufferBindInfo axis_map_ubo(); - /* * Returns a GPU buffer containing the logical limits of the tensor. See the * comments for logical_limits() for more context. diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index f2d971a56b3..cabf4e7a882 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -318,6 +318,10 @@ class ComputeGraph final { return values_.at(idx).toConstTensor().estimate_memory_layout(); } + inline int32_t hashed_layout_of(const ValueRef idx) const { + return values_.at(idx).toConstTensor().hashed_layout(); + } + inline int32_t packed_dim_of(const ValueRef idx) const { return values_.at(idx).toConstTensor().packed_dim(); } @@ -338,10 +342,6 @@ class ComputeGraph final { return values_.at(idx).toTensor().numel_ubo(); } - inline vkapi::BufferBindInfo axis_map_ubo(const ValueRef idx) { - return values_.at(idx).toTensor().axis_map_ubo(); - } - inline bool has_standard_axis_map(const ValueRef idx) { return values_.at(idx).toTensor().has_standard_axis_map(); } diff --git a/backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl index 3d9bf885df6..a4ed494fe6d 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl @@ -16,8 +16,6 @@ $if MAT2_IS_TRANSPOSED: $if HAS_BIAS: #define HAS_BIAS -#include "indexing_utils.h" - ${layout_declare_tensor(B, "w", "out_tensor", DTYPE, "texture3d")} ${layout_declare_tensor(B, "r", "mat1_tensor", DTYPE, "texture3d")} ${layout_declare_tensor(B, "r", "mat2_tensor", DTYPE, "texture3d")} @@ -25,22 +23,32 @@ $if HAS_BIAS: ${layout_declare_tensor(B, "r", "bias_tensor", DTYPE, "texture3d")} ${layout_declare_ubo(B, "ivec4", "out_sizes")} ${layout_declare_ubo(B, "ivec3", "out_limits")} -${layout_declare_ubo(B, "ivec4", "out_axis_map")} ${layout_declare_ubo(B, "ivec4", "mat1_sizes")} -${layout_declare_ubo(B, "ivec4", "mat1_axis_map")} ${layout_declare_ubo(B, "ivec4", "mat2_sizes")} -${layout_declare_ubo(B, "ivec4", "mat2_axis_map")} $if HAS_BIAS: ${layout_declare_ubo(B, "ivec4", "bias_sizes")} - ${layout_declare_ubo(B, "ivec4", "bias_axis_map")} ${layout_declare_ubo(B, "float", "alpha", "float", "beta")} +#include "indexing_utils.h" + layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int out_packed_dim = C_DIM; -layout(constant_id = 4) const int mat1_packed_dim = W_DIM; -layout(constant_id = 5) const int mat2_packed_dim = H_DIM; -layout(constant_id = 6) const int bias_packed_dim = W_DIM; +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); +const lowp int out_packed_dim = unhash_packed_dim(out_layout); + +${layout_declare_spec_const(C, "int", "mat1_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 mat1_axis_map = unhash_axis_map(mat1_layout); +const lowp int mat1_packed_dim = unhash_packed_dim(mat1_layout); + +${layout_declare_spec_const(C, "int", "mat2_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 mat2_axis_map = unhash_axis_map(mat2_layout); +const lowp int mat2_packed_dim = unhash_packed_dim(mat2_layout); + +$if HAS_BIAS: + ${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")} + const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout); + const lowp int bias_packed_dim = unhash_packed_dim(bias_layout); #ifdef HAS_BIAS vec4 get_bias_texel_W_packed(ivec3 logical_pos) { diff --git a/backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl b/backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl index ad794d6db49..05c227f302c 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl @@ -19,27 +19,35 @@ $if BATCH_MODE: $if HAS_BIAS: #define HAS_BIAS -#include "indexing_utils.h" - ${layout_declare_tensor(B, "w", "out_tensor", DTYPE, "texture3d")} ${layout_declare_tensor(B, "r", "mat1_tensor", DTYPE, "texture3d")} ${layout_declare_tensor(B, "r", "mat2_tensor", DTYPE, "texture3d")} $if HAS_BIAS: ${layout_declare_tensor(B, "r", "bias_tensor", DTYPE, "texture3d")} ${layout_declare_ubo(B, "ivec4", "out_sizes")} -${layout_declare_ubo(B, "ivec4", "out_axis_map")} ${layout_declare_ubo(B, "ivec4", "mat1_sizes")} -${layout_declare_ubo(B, "ivec4", "mat1_axis_map")} ${layout_declare_ubo(B, "ivec4", "mat2_sizes")} -${layout_declare_ubo(B, "ivec4", "mat2_axis_map")} $if HAS_BIAS: ${layout_declare_ubo(B, "ivec4", "bias_sizes")} - ${layout_declare_ubo(B, "ivec4", "bias_axis_map")} ${layout_declare_ubo(B, "float", "alpha", "float", "beta")} +#include "indexing_utils.h" + layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int out_packed_dim = C_DIM; +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); +const lowp int out_packed_dim = unhash_packed_dim(out_layout); + +${layout_declare_spec_const(C, "int", "mat1_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 mat1_axis_map = unhash_axis_map(mat1_layout); + +${layout_declare_spec_const(C, "int", "mat2_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 mat2_axis_map = unhash_axis_map(mat2_layout); + +$if HAS_BIAS: + ${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")} + const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout); // To convince the SPIR-V compiler to unroll the loops optimally, need this // macro diff --git a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl index 3103c92aea1..be0e1bfa20a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl @@ -14,26 +14,31 @@ #define op(X, Y, A) ${OPERATOR} -#include "broadcasting_utils.h" -#include "indexing_utils.h" - layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)} ${layout_declare_ubo(B, "ivec4", "out_sizes")} -${layout_declare_ubo(B, "ivec4", "out_axis_map")} ${layout_declare_ubo(B, "ivec4", "in_sizes")} -${layout_declare_ubo(B, "ivec4", "in_axis_map")} ${layout_declare_ubo(B, "ivec4", "other_sizes")} -${layout_declare_ubo(B, "ivec4", "other_axis_map")} ${layout_declare_ubo(B, "ivec2", "broadcast_params")} ${layout_declare_ubo(B, "float", "alpha")} +#include "broadcasting_utils.h" +#include "indexing_utils.h" + layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); +const lowp int packed_dim = unhash_packed_dim(out_layout); + +${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); + +${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 other_axis_map = unhash_axis_map(other_layout); void main() { const ivec3 lpos = ivec3(gl_GlobalInvocationID); diff --git a/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl index 4fd6e2f14aa..34e80b6ec11 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl @@ -21,12 +21,13 @@ layout(std430) buffer; ${layout_declare_buffer(B, "w", "nchw_out", "int")} ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} ${layout_declare_ubo(B, "ivec4", "tensor_sizes")} -${layout_declare_ubo(B, "ivec4", "axis_map")} ${layout_declare_ubo(B, "int", "out_numel")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; +${layout_declare_spec_const(C, "int", "t_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 axis_map = unhash_axis_map(t_layout); +const lowp int packed_dim = unhash_packed_dim(t_layout); void main() { const int out_buf_idx = int(gl_GlobalInvocationID.x); diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv1d.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv1d.glsl index fe6d7ba7a96..e4880d8a229 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv1d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv1d.glsl @@ -14,8 +14,6 @@ #define op(X, A, B) ${OPERATOR} -#include "indexing_utils.h" - layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} @@ -26,17 +24,26 @@ ${layout_declare_tensor(B, "r", "bias_in", DTYPE, STORAGE)} ${layout_declare_ubo(B, "ivec3", "out_limits")} ${layout_declare_ubo(B, "ivec4", "in_sizes")} -${layout_declare_ubo(B, "ivec4", "out_axis_map")} -${layout_declare_ubo(B, "ivec4", "in_axis_map")} -${layout_declare_ubo(B, "ivec4", "kernel_axis_map")} -${layout_declare_ubo(B, "ivec4", "bias_axis_map")} - ${layout_declare_ubo(B,"int", "kernel_size", "int", "stride", "int", "padding", "int", "dilation", "int", "in_group_size", "int", "out_group_size")} ${layout_declare_ubo(B, "float", "out_min", "float", "out_max")} +#include "indexing_utils.h" + layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); + +${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); + +${layout_declare_spec_const(C, "int", "kernel_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 kernel_axis_map = unhash_axis_map(kernel_layout); + +${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout); + // Let us define // // input = (N, in_C, in_L), diff --git a/backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl b/backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl index f02049dc2a8..862ccdad304 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl @@ -14,17 +14,14 @@ layout(std430) buffer; -#include "indexing_utils.h" +${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} +${layout_declare_tensor(B, "r", "existing_out", DTYPE, STORAGE)} +${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} -${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)} -${layout_declare_tensor(1, "r", "existing_out", DTYPE, STORAGE)} -${layout_declare_tensor(2, "r", "t_in", DTYPE, STORAGE)} +${layout_declare_ubo(B, "ivec4", "out_sizes")} +${layout_declare_ubo(B, "ivec4", "in_sizes")} -${layout_declare_ubo(3, "ivec4", "out_sizes")} -${layout_declare_ubo(4, "ivec4", "out_axis_map")} -${layout_declare_ubo(5, "ivec4", "in_sizes")} -${layout_declare_ubo(6, "ivec4", "in_axis_map")} -layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs { +layout(set = 0, binding = 5) uniform PRECISION restrict CopyArgs { // Operates on (x, y, z) logical extents. ivec3 range; // Analogus to range variable in copy. It defines the # of channel being @@ -35,9 +32,16 @@ layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs { int src_channel_offset; }; +#include "indexing_utils.h" + layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); +const lowp int packed_dim = unhash_packed_dim(out_layout); + +${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); void main() { // Note: Unlike other shaders, the range is often not equal to the destination diff --git a/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl b/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl index 7781fcb2656..3dbc59e041a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl @@ -12,19 +12,23 @@ ${define_active_storage_type(STORAGE)} -#include "indexing_utils.h" - layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} ${layout_declare_ubo(B, "ivec3", "range", "ivec3", "src_offset", "ivec3", "dst_offset")} -${layout_declare_ubo(B, "ivec4", "out_axis_map")} -${layout_declare_ubo(B, "ivec4", "in_axis_map")} + +#include "indexing_utils.h" layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); + +${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); + void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); diff --git a/backends/vulkan/runtime/graph/ops/glsl/embedding.glsl b/backends/vulkan/runtime/graph/ops/glsl/embedding.glsl index 0a3eeee257f..5c3de756348 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/embedding.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/embedding.glsl @@ -14,19 +14,24 @@ layout(std430) buffer; -#include "indexing_utils.h" - ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "t_in", "int", STORAGE)} ${layout_declare_tensor(B, "r", "t_weight", DTYPE, STORAGE)} ${layout_declare_ubo(B, "ivec4", "sizes")} -${layout_declare_ubo(B, "ivec4", "out_axis_map")} -${layout_declare_ubo(B, "ivec4", "in_axis_map")} -${layout_declare_ubo(B, "ivec4", "weight_axis_map")} + +#include "indexing_utils.h" layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); +const lowp int packed_dim = unhash_packed_dim(out_layout); + +${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); + +${layout_declare_spec_const(C, "int", "weight_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 weight_axis_map = unhash_axis_map(weight_layout); void main() { const ivec3 out_lpos = ivec3(gl_GlobalInvocationID); diff --git a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl index be3901799f8..f7d2770faf0 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl @@ -15,8 +15,6 @@ ${define_active_storage_type(STORAGE)} -#include "indexing_utils.h" - ${define_required_extensions(DTYPE)} layout(std430) buffer; @@ -24,11 +22,14 @@ layout(std430) buffer; ${layout_declare_buffer(B, "w", "nchw_out", DTYPE)} ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} ${layout_declare_ubo(B, "ivec4", "sizes")} -${layout_declare_ubo(B, "ivec4", "axis_map")} + +#include "indexing_utils.h" layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; +${layout_declare_spec_const(C, "int", "t_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 axis_map = unhash_axis_map(t_layout); +const lowp int packed_dim = unhash_packed_dim(t_layout); void write_out_texel(VEC4_T texel, ivec4 tensor_idx) { const ivec4 buf_indices = tidx_to_nchwi( diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h index 26342bcd2ba..09f53fe779a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h +++ b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h @@ -232,12 +232,20 @@ ivec3 lpos_to_pos(const ivec3 lpos, const ivec4 axis_map) { imageStore(im, lpos_to_pos(lpos, axis_map), texel) #endif -// Converts hashed axis mapping and packed dim to a ivec4 -// e.g. 0x000102, 2 -> ivec4(0, 1, 2, 2) -// e.g. 0x010200, 1 -> ivec4(1, 2, 0, 1) -#define UNHASH_AXIS_MAP(hash, packed_dim) \ - ivec4(hash >> 16, (hash >> 8) & 0xFF, hash & 0xFF, packed_dim) -#define DEFAULT_AXIS_MAP_HASH 0x000102 +/* + * Converts hashed layout to a ivec4 containing the axis map data and an int + * containing the packed dim respectively. Each value takes up 4 bits in the + * packed int, and values are read from least significant half byte (right-most) + * to most significant half byte (left-most). + * e.g. 0x20122, 2 -> ivec4(0, 1, 2, 2) + * e.g. 0x11021, 1 -> ivec4(1, 2, 0, 1) + */ +#define unhash_axis_map(hash) \ + ivec4(hash & 0xf, (hash >> 4) & 0xf, (hash >> 8 & 0xf), (hash >> 12 & 0xf)) + +#define unhash_packed_dim(hash) int(hash >> 16 & 0xf) + +#define DEFAULT_LAYOUT 0x02210 /************************ * Deprecated Functions * diff --git a/backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl b/backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl index 03500b2d085..f984821600b 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl @@ -31,13 +31,13 @@ ${layout_declare_ubo(B, "float", "epsilon")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -${layout_declare_spec_const(C, "int", "in_axis_map_hash", "DEFAULT_AXIS_MAP_HASH")} -${layout_declare_spec_const(C, "int", "in_packed_dim", "C_DIM")} -const ivec4 in_axis_map = UNHASH_AXIS_MAP(in_axis_map_hash, in_packed_dim); +${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); +const lowp int in_packed_dim = unhash_packed_dim(in_layout); -${layout_declare_spec_const(C, "int", "out_axis_map_hash", "DEFAULT_AXIS_MAP_HASH")} -${layout_declare_spec_const(C, "int", "out_packed_dim", "C_DIM")} -const ivec4 out_axis_map = UNHASH_AXIS_MAP(out_axis_map_hash, out_packed_dim); +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); +const lowp int out_packed_dim = unhash_packed_dim(out_layout); void main() { const ivec3 lpos = ivec3(gl_GlobalInvocationID); diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl index 8a3ef68528f..25113887dca 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl @@ -23,11 +23,12 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_buffer(B, "r", "nchw_in", "int")} ${layout_declare_ubo(B, "ivec4", "sizes")} -${layout_declare_ubo(B, "ivec4", "axis_map")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; +${layout_declare_spec_const(C, "int", "t_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 axis_map = unhash_axis_map(t_layout); +const lowp int packed_dim = unhash_packed_dim(t_layout); /* * Extends sign of int8 diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl index ea4e0d300cc..bf498f34d5b 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl @@ -20,7 +20,7 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; // This constant is unused in this shader but is kept so that the signature is // consistent with nchw_to_image. -layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM; +${layout_declare_spec_const(C, "int", "UNUSED_layout", "0")} void main() { int out_bufi = int(gl_GlobalInvocationID.x); diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl index b86a59fc234..bde846289ef 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl @@ -15,20 +15,21 @@ ${define_active_storage_type(STORAGE)} -#include "indexing_utils.h" - ${define_required_extensions(DTYPE)} layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} -${layout_declare_buffer(B, "r", "nchw_in", DTYPE)} +${layout_declare_buffer(B, "r", "buf_in", DTYPE)} ${layout_declare_ubo(B, "ivec4", "sizes")} -${layout_declare_ubo(B, "ivec4", "axis_map")} + +#include "indexing_utils.h" layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; +${layout_declare_spec_const(C, "int", "t_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 axis_map = unhash_axis_map(t_layout); +const lowp int packed_dim = unhash_packed_dim(t_layout); VEC4_T read_texel(ivec4 tidx) { const ivec4 buf_indices = tidx_to_nchwi( @@ -38,16 +39,16 @@ VEC4_T read_texel(ivec4 tidx) { VEC4_T texel = VEC4_T(0); if (tidx[packed_dim] < sizes[packed_dim]) { - texel.x = SCALAR_T(nchw_in[buf_indices.x]); + texel.x = SCALAR_T(buf_in[buf_indices.x]); } if (tidx[packed_dim] + 1 < sizes[packed_dim]) { - texel.y = SCALAR_T(nchw_in[buf_indices.y]); + texel.y = SCALAR_T(buf_in[buf_indices.y]); } if (tidx[packed_dim] + 2 < sizes[packed_dim]) { - texel.z = SCALAR_T(nchw_in[buf_indices.z]); + texel.z = SCALAR_T(buf_in[buf_indices.z]); } if (tidx[packed_dim] + 3 < sizes[packed_dim]) { - texel.w = SCALAR_T(nchw_in[buf_indices.w]); + texel.w = SCALAR_T(buf_in[buf_indices.w]); } return texel; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/repeat_interleave.glsl b/backends/vulkan/runtime/graph/ops/glsl/repeat_interleave.glsl index 3ade1f10cba..1a8e677a38f 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/repeat_interleave.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/repeat_interleave.glsl @@ -19,15 +19,19 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "tout", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "tin", DTYPE, STORAGE)} ${layout_declare_ubo(B, "ivec3", "tin_limits")} -${layout_declare_ubo(B, "ivec4", "tin_axis_map")} -${layout_declare_ubo(B, "ivec4", "tout_axis_map")} + +#include "indexing_utils.h" layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int nrepeats = 1; -layout(constant_id = 4) const int repeat_dim = 1; +${layout_declare_spec_const(C, "int", "tout_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 tout_axis_map = unhash_axis_map(tout_layout); -#include "indexing_utils.h" +${layout_declare_spec_const(C, "int", "tin_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 tin_axis_map = unhash_axis_map(tin_layout); + +${layout_declare_spec_const(C, "int", "nrepeats", "1")} +${layout_declare_spec_const(C, "int", "repeat_dim", "1")} void main() { const ivec3 tin_lpos = ivec3(gl_GlobalInvocationID); diff --git a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp index c055431a84b..33f73cd6dad 100644 --- a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp @@ -84,15 +84,12 @@ void add_binary_op_node( {{arg1, arg2}, vkapi::MemoryAccessType::READ}}, // Shader params buffers {t_out->sizes_ubo(), - t_out->axis_map_ubo(), t_in1->sizes_ubo(), - t_in1->axis_map_ubo(), t_in2->sizes_ubo(), - t_in2->axis_map_ubo(), graph.create_params_buffer(broadcast_params), graph.create_params_buffer(alpha_val)}, // Specialization Constants - {SV(t_out->packed_dim())}, + {t_out->hashed_layout(), t_in1->hashed_layout(), t_in2->hashed_layout()}, // Resizing Logic resize_binary_op_node, {})); diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 43568622f84..880d48e25ed 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -106,9 +106,9 @@ ValueRef prepack_biases( graph.create_local_wg_size(v), vref, v, - {t->sizes_ubo(), t->axis_map_ubo()}, + {t->sizes_ubo()}, // Specialization constants - {SV(t->packed_dim())})); + {t->hashed_layout()})); return v; } @@ -479,15 +479,14 @@ void add_conv1d_node( { t_out->logical_limits_ubo(), t_in->sizes_ubo(), - t_out->axis_map_ubo(), - t_in->axis_map_ubo(), - t_weight->axis_map_ubo(), - t_bias->axis_map_ubo(), graph.create_params_buffer(kernel_params), graph.create_params_buffer(out_params), }, // Specialization Constants - {}, + {t_out->hashed_layout(), + t_in->hashed_layout(), + t_weight->hashed_layout(), + t_bias->hashed_layout()}, // Resizing Logic resize_conv1d_node, {weight, stride, padding, dilation})); diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp index 18f337cb102..15cfce2a014 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp @@ -52,17 +52,15 @@ void add_copy_offset_node( graph.create_local_wg_size(out), // Inputs and Outputs { - {out, vkapi::MemoryAccessType::WRITE}, - {in, vkapi::MemoryAccessType::READ}, + {out, vkapi::kWrite}, + {in, vkapi::kRead}, }, // Parameter buffers { graph.create_params_buffer(offset_params), - t_out->axis_map_ubo(), - t_in->axis_map_ubo(), }, // Specialization Constants - {})); + {graph.hashed_layout_of(out), graph.hashed_layout_of(in)})); } void add_copy_channel_offset_node( @@ -169,13 +167,11 @@ void add_copy_channel_offset_node( // Parameter buffers { t_out->sizes_ubo(), - t_out->axis_map_ubo(), t_in->sizes_ubo(), - t_in->axis_map_ubo(), graph.create_params_buffer(channel_offset_params), }, // Specialization Constants - {})); + {graph.hashed_layout_of(out), graph.hashed_layout_of(in)})); } } diff --git a/backends/vulkan/runtime/graph/ops/impl/Embedding.cpp b/backends/vulkan/runtime/graph/ops/impl/Embedding.cpp index beaeed59baa..05ebd3d1a60 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Embedding.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Embedding.cpp @@ -46,14 +46,13 @@ void add_embedding_node( VK_KERNEL_FROM_STR(kernel_name), graph.create_global_wg_size(out), graph.create_local_wg_size(out), - {{out, vkapi::MemoryAccessType::WRITE}, - {{in, weight}, vkapi::MemoryAccessType::READ}}, + {{out, vkapi::kWrite}, {{in, weight}, vkapi::kRead}}, { t_out->sizes_ubo(), - t_out->axis_map_ubo(), - t_in->axis_map_ubo(), - t_weight->axis_map_ubo(), - })); + }, + {t_out->hashed_layout(), + t_in->hashed_layout(), + t_weight->hashed_layout()})); } void embedding(ComputeGraph& graph, const std::vector& args) { diff --git a/backends/vulkan/runtime/graph/ops/impl/Linear.cpp b/backends/vulkan/runtime/graph/ops/impl/Linear.cpp index 74afce1abe3..e2d6fc25519 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Linear.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Linear.cpp @@ -119,20 +119,16 @@ void add_addmm_naive_node( { graph.sizes_ubo(out), graph.logical_limits_ubo(out), - graph.axis_map_ubo(out), graph.sizes_ubo(mat1), - graph.axis_map_ubo(mat1), graph.sizes_ubo(mat2), - graph.axis_map_ubo(mat2), graph.sizes_ubo(self), - graph.axis_map_ubo(self), graph.create_params_buffer(params), }, // Specialization Constants - {graph.packed_dim_of(out), - graph.packed_dim_of(mat1), - graph.packed_dim_of(mat2), - graph.packed_dim_of(self)}, + {graph.hashed_layout_of(out), + graph.hashed_layout_of(mat1), + graph.hashed_layout_of(mat2), + graph.hashed_layout_of(self)}, // Resizing Logic resize_addmm_node, {mat2_is_transposed})); @@ -215,17 +211,16 @@ void add_addmm_optimized_node( // Shader params buffers { graph.sizes_ubo(out), - graph.axis_map_ubo(out), graph.sizes_ubo(mat1_W_packed), - graph.axis_map_ubo(mat1_W_packed), graph.sizes_ubo(mat2_packed), - graph.axis_map_ubo(mat2_packed), graph.sizes_ubo(self), - graph.axis_map_ubo(self), graph.create_params_buffer(params), }, // Specialization Constants - {graph.packed_dim_of(out)}, + {graph.hashed_layout_of(out), + graph.hashed_layout_of(mat1_W_packed), + graph.hashed_layout_of(mat2_packed), + graph.hashed_layout_of(self)}, // Resizing Logic resize_addmm_node, {mat2_is_transposed})); diff --git a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp index 71e9033cec2..8ca9858d884 100644 --- a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp @@ -135,16 +135,13 @@ void add_matmul_naive_texture3d_node( { graph.sizes_ubo(out), graph.logical_limits_ubo(out), - graph.axis_map_ubo(out), graph.sizes_ubo(mat1), - graph.axis_map_ubo(mat1), graph.sizes_ubo(mat2), - graph.axis_map_ubo(mat2), }, // Specialization Constants - {graph.packed_dim_of(out), - graph.packed_dim_of(mat1), - graph.packed_dim_of(mat2)}, + {graph.hashed_layout_of(out), + graph.hashed_layout_of(mat1), + graph.hashed_layout_of(mat2)}, // Resizing Logic resize_matmul_node, {mat2_is_transposed})); @@ -224,14 +221,13 @@ void add_matmul_optimized_node( // Shader params buffers { graph.sizes_ubo(out), - graph.axis_map_ubo(out), graph.sizes_ubo(mat1_W_packed), - graph.axis_map_ubo(mat1_W_packed), graph.sizes_ubo(mat2_packed), - graph.axis_map_ubo(mat2_packed), }, // Specialization Constants - {graph.packed_dim_of(out)}, + {graph.hashed_layout_of(out), + graph.hashed_layout_of(mat1_W_packed), + graph.hashed_layout_of(mat2_packed)}, // Resizing Logic resize_matmul_node, {mat2_is_transposed})); diff --git a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp index 1509f35014d..b1cc8c8084a 100644 --- a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp @@ -113,10 +113,8 @@ void add_native_layer_norm_node( }, // Specialization Constants { - hash_axis_map(t_input->axis_map()), - t_input->packed_dim(), - hash_axis_map(t_out->axis_map()), - t_out->packed_dim(), + t_input->hashed_layout(), + t_out->hashed_layout(), }, // Resizing Logic resize_native_layer_norm_node, diff --git a/backends/vulkan/runtime/graph/ops/impl/RepeatInterleave.cpp b/backends/vulkan/runtime/graph/ops/impl/RepeatInterleave.cpp index 16c13664561..5e4608a65bb 100644 --- a/backends/vulkan/runtime/graph/ops/impl/RepeatInterleave.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/RepeatInterleave.cpp @@ -63,11 +63,12 @@ void add_repeat_interleave_node( {{out, vkapi::MemoryAccessType::WRITE}, {in, vkapi::MemoryAccessType::READ}}, // Parameter buffers - {graph.logical_limits_ubo(in), - graph.axis_map_ubo(in), - graph.axis_map_ubo(out)}, + {graph.logical_limits_ubo(in)}, // Specialization Constants - {nrepeats, repeat_dim}, + {graph.hashed_layout_of(out), + graph.hashed_layout_of(in), + nrepeats, + repeat_dim}, // Resizing Logic resize_repeat_interleave_node, {num_repeats, dim})); diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp index 15045ccca27..80a1e706e83 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp @@ -35,7 +35,7 @@ void add_staging_to_tensor_node( graph.strides_ubo(out_tensor), graph.numel_ubo(out_tensor)}); } else { - ubos.append({graph.sizes_ubo(out_tensor), graph.axis_map_ubo(out_tensor)}); + ubos.append({graph.sizes_ubo(out_tensor)}); } graph.execute_nodes().emplace_back(new DispatchNode( @@ -44,12 +44,11 @@ void add_staging_to_tensor_node( graph.create_global_wg_size(out_tensor), graph.create_local_wg_size(out_tensor), // Input and Outputs - {{out_tensor, vkapi::MemoryAccessType::WRITE}, - {in_staging, vkapi::MemoryAccessType::READ}}, + {{out_tensor, vkapi::kWrite}, {in_staging, vkapi::kRead}}, // Parameter Buffers ubos, // Specialization Constants - {SV(graph.packed_dim_of(out_tensor))}, + {graph.hashed_layout_of(out_tensor)}, // Resizing Logic nullptr, {})); @@ -81,7 +80,7 @@ void add_tensor_to_staging_node( graph.strides_ubo(in_tensor), graph.numel_ubo(in_tensor)}); } else { - ubos.append({graph.sizes_ubo(in_tensor), graph.axis_map_ubo(in_tensor)}); + ubos.append({graph.sizes_ubo(in_tensor)}); } // Normally, the image_to_nchw shader is structured so that each thread reads @@ -104,12 +103,11 @@ void add_tensor_to_staging_node( global_wg_size, graph.create_local_wg_size(global_wg_size), // Input and Outputs - {{out_staging, vkapi::MemoryAccessType::WRITE}, - {in_tensor, vkapi::MemoryAccessType::READ}}, + {{out_staging, vkapi::kWrite}, {in_tensor, vkapi::kRead}}, // Parameter Buffers ubos, // Specialization Constants - {SV(graph.packed_dim_of(in_tensor))})); + {graph.hashed_layout_of(in_tensor)})); } void add_prepack_standard_node( @@ -126,7 +124,7 @@ void add_prepack_standard_node( graph.strides_ubo(tensor), graph.numel_ubo(tensor)}); } else { - ubos.append({graph.sizes_ubo(tensor), graph.axis_map_ubo(tensor)}); + ubos.append({graph.sizes_ubo(tensor)}); } graph.prepack_nodes().emplace_back(new PrepackNode( @@ -140,7 +138,7 @@ void add_prepack_standard_node( // Parameter Buffers ubos, // Specialization Constants - {SV(graph.packed_dim_of(tensor))})); + {graph.hashed_layout_of(tensor)})); } ValueRef prepack_standard( diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h index 508cc2538a0..c9eeb0efe08 100644 --- a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h +++ b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h @@ -79,18 +79,4 @@ T nchw_dim_to_whcn_dim(const T& nchw_dim, const int64_t ndim) { return ndim - 1 - nchw_dim; } -// -// Tensor axis map utilities -// - -// Converts ivec4 axis map to a single int32_t, to be able to pass it as a -// specialization constant instead of a ubo. This allows for the spir-v to -// bytecode compilation to perform compile-time folding on the axis map. -// Only converts the first 3 indices, as the last index is the packed dim, -// which is passed separately. -// Example: ivec4(0, 1, 2, 2) -> 0x000102 -inline int32_t hash_axis_map(const std::vector& axis_map) { - return (axis_map.at(0) << 16) + (axis_map.at(1) << 8) + axis_map.at(2); -} - } // namespace vkcompute diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index 73e2f049a33..6124f0b71e0 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -70,7 +70,7 @@ void record_nchw_to_image_op( vkapi::VulkanBuffer& src_buffer, api::vTensor& v_dst) { vkapi::PipelineBarrier pipeline_barrier{}; - vkapi::SpecVarList specialization_constants = {SV(v_dst.packed_dim())}; + vkapi::SpecVarList specialization_constants = {v_dst.hashed_layout()}; context->submit_compute_job( get_nchw_to_tensor_shader( @@ -86,8 +86,7 @@ void record_nchw_to_image_op( vkapi::PipelineStage::COMPUTE, vkapi::MemoryAccessType::WRITE), src_buffer, - v_dst.sizes_ubo(), - v_dst.axis_map_ubo()); + v_dst.sizes_ubo()); } void record_image_to_nchw_op( @@ -95,7 +94,7 @@ void record_image_to_nchw_op( api::vTensor& v_src, vkapi::VulkanBuffer& dst_buffer) { vkapi::PipelineBarrier pipeline_barrier{}; - vkapi::SpecVarList specialization_constants = {SV(v_src.packed_dim())}; + vkapi::SpecVarList specialization_constants = {v_src.hashed_layout()}; context->submit_compute_job( get_tensor_to_nchw_shader(v_src), @@ -107,8 +106,7 @@ void record_image_to_nchw_op( 0, dst_buffer, v_src.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE), - v_src.sizes_ubo(), - v_src.axis_map_ubo()); + v_src.sizes_ubo()); } void record_bitw8_image_to_nchw_nobitw8buffer_op( @@ -128,13 +126,12 @@ void record_bitw8_image_to_nchw_nobitw8buffer_op( pipeline_barrier, global_wg_size, adaptive_work_group_size(global_wg_size), - {v_src.packed_dim()}, + {v_src.hashed_layout()}, VK_NULL_HANDLE, 0, dst_buffer.buffer(), v_src.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE), v_src.sizes_ubo(), - v_src.axis_map_ubo(), v_src.numel_ubo()); } @@ -337,7 +334,7 @@ void record_matmul_texture3d( pipeline_barrier, global_wg_size, {8, 8, 1}, - {out.packed_dim(), mat1.packed_dim(), mat2.packed_dim()}, + {out.hashed_layout(), mat1.hashed_layout(), mat2.hashed_layout()}, VK_NULL_HANDLE, 0, out.image( @@ -348,11 +345,8 @@ void record_matmul_texture3d( mat2.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE), out.sizes_ubo(), out.logical_limits_ubo(), - out.axis_map_ubo(), mat1.sizes_ubo(), - mat1.axis_map_ubo(), - mat2.sizes_ubo(), - mat2.axis_map_ubo()); + mat2.sizes_ubo()); } // diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 87cafd10a7e..1d40fe1bb59 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -1588,9 +1588,8 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { /*shared_object_idx = */ 4); // +2: t.sizes_ubo() for each staging shader - // +2: t.axis_map_ubo() for each staging shader // +2: staging buffer for each input tensor - expected_vma_allocation_count += 6; + expected_vma_allocation_count += 4; EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); ValueRef c = graph.add_tensor( @@ -1603,8 +1602,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { // +2: alpha UBO, broadcast UBO for arithmetic shader // +1: t.sizes_ubo() for arithmetic shader output c - // +1: t.axis_map_ubo() for arithmetic shader output c - expected_vma_allocation_count += 4; + expected_vma_allocation_count += 3; EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); IOValueRef d = graph.add_input_tensor( @@ -1613,9 +1611,8 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { /*shared_object_idx = */ 2); // +1: t.sizes_ubo() uniform buffer for staging shader - // +1: t.axis_map_ubo() uniform buffer for staging shader // +1: staging buffer for the input tensor - expected_vma_allocation_count += 3; + expected_vma_allocation_count += 2; EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); ValueRef e = graph.add_tensor( @@ -1628,8 +1625,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { // +2: alpha UBO, broadcast UBO for arithmetic shader // +1: t.sizes_ubo() for arithmetic shader output e - // +1: t.axis_map_ubo() for arithmetic shader output e - expected_vma_allocation_count += 4; + expected_vma_allocation_count += 3; EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); IOValueRef out = {};