diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 0712062a37e..62b53f9a76c 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -260,6 +260,26 @@ vkapi::VulkanImage allocate_image( return vkapi::VulkanImage(); } + // TODO(ssjia): change to always check that the image extents do not exceed + // physical limits. Adding the check now based on `maxImageDimension3D` will + // cause some existing models to break. Anecdotally, on Adreno and + // SwiftShader devices, using 3D textures that exceed `maxImageDimension3D` + // appears to be ok. So we need to figure out if is it undefined behaviour + // or if there's a better way to figure out what the limit is. For now, only + // check during debug build so that we can detect when exceeding physical + // limits could be a potential cause for model outputs to be wrong. In the + // meantime, the threshold for using texture storage can be configured at + // export time. +#ifdef VULKAN_DEBUG + uint32_t max_extent = storage_type == utils::kTexture3D + ? adapter_ptr->max_texture3d_dim() + : adapter_ptr->max_texture2d_dim(); + + VK_CHECK_COND( + image_extents[0] <= max_extent && image_extents[1] <= max_extent && + image_extents[2] <= max_extent); +#endif + VkSampler sampler = adapter_ptr->sampler_cache().retrieve(sampler_props); return adapter_ptr->vma().create_image( @@ -291,6 +311,8 @@ vkapi::VulkanBuffer allocate_buffer( return vkapi::VulkanBuffer(); } + VK_CHECK_COND(numel <= context_ptr->adapter_ptr()->max_buffer_numel()); + return adapter_ptr->vma().create_storage_buffer( element_size(dtype) * numel, allocate_memory); } diff --git a/backends/vulkan/runtime/graph/ops/glsl/pack_int4_linear_weight_transposed_interleaved.glsl b/backends/vulkan/runtime/graph/ops/glsl/pack_int4_linear_weight_transposed_interleaved.glsl index c7fcdcc775a..a1309fb9198 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/pack_int4_linear_weight_transposed_interleaved.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/pack_int4_linear_weight_transposed_interleaved.glsl @@ -109,8 +109,8 @@ void main() { in_vals[r][0] = get_first(in_val_packed); in_vals[r][1] = get_second(in_val_packed); } else { - in_vals[r][0] = uint8_t(254); - in_vals[r][1] = uint8_t(254); + in_vals[r][0] = uint8_t(0); + in_vals[r][1] = uint8_t(0); } } @@ -131,6 +131,6 @@ void main() { t_qmat2[packed_pos.y * stride + packed_pos.x] = out_tex_1; t_qmat2[(packed_pos.y + 1) * stride + packed_pos.x] = out_tex_2; $else: - imageStore(t_qmat2, ivec3(packed_pos.xy, 0), out_tex_1); - imageStore(t_qmat2, ivec3(packed_pos.x, packed_pos.y + 1, 0), out_tex_2); + imageStore(t_qmat2, packed_pos.xy, out_tex_1); + imageStore(t_qmat2, ivec2(packed_pos.x, packed_pos.y + 1), out_tex_2); } diff --git a/backends/vulkan/runtime/graph/ops/glsl/pack_int4_linear_weight_transposed_interleaved.yaml b/backends/vulkan/runtime/graph/ops/glsl/pack_int4_linear_weight_transposed_interleaved.yaml index 168b18fffe4..dffd260b3d8 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/pack_int4_linear_weight_transposed_interleaved.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/pack_int4_linear_weight_transposed_interleaved.yaml @@ -6,8 +6,10 @@ pack_int4_linear_weight_transposed_interleaved: parameter_names_with_default_values: - STORAGE: texture3d + STORAGE: texture2d + generate_variant_forall: + STORAGE: + - VALUE: texture2d + - VALUE: buffer shader_variants: - - NAME: pack_int4_linear_weight_transposed_interleaved_texture3d - - NAME: pack_int4_linear_weight_transposed_interleaved_buffer - STORAGE: buffer + - NAME: pack_int4_linear_weight_transposed_interleaved diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl index 29f2934f957..56261e5a040 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl @@ -21,7 +21,7 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, OUT_STORAGE, is_scalar_array=False)} ${layout_declare_tensor(B, "r", "t_mat1", DTYPE, IN_STORAGE, is_scalar_array=False)} ${layout_declare_tensor(B, "r", "t_qmat2", "uint8", WEIGHT_STORAGE, is_scalar_array=False)} -${layout_declare_tensor(B, "r", "t_qparams", DTYPE, "texture3D")} +${layout_declare_tensor(B, "r", "t_qparams", DTYPE, "buffer", is_scalar_array=False)} layout(push_constant) uniform restrict Block { ivec4 out_sizes; @@ -79,13 +79,23 @@ void main() { $if WEIGHT_STORAGE == "buffer": const int qmat2_stride = qmat2_sizes.x >> 2; + $if PARAMS_STORAGE == "buffer": + const int qparams_y_stride = out_sizes.x >> 2; + const int qparams_z_stride = qparams_y_stride * 2; for (int block_idx = 0; block_idx < num_blocks; ++block_idx) { - scales[0] = texelFetch(t_qparams, ivec3(out_col_texel_idx, 0, block_idx), 0); - zeros[0] = texelFetch(t_qparams, ivec3(out_col_texel_idx, 1, block_idx), 0); + $if PARAMS_STORAGE == "buffer": + scales[0] = t_qparams[block_idx * qparams_z_stride + out_col_texel_idx]; + zeros[0] = t_qparams[block_idx * qparams_z_stride + out_col_texel_idx + qparams_y_stride]; - scales[1] = texelFetch(t_qparams, ivec3(out_col_texel_idx + 1, 0, block_idx), 0); - zeros[1] = texelFetch(t_qparams, ivec3(out_col_texel_idx + 1, 1, block_idx), 0); + scales[1] = t_qparams[block_idx * qparams_z_stride + out_col_texel_idx + 1]; + zeros[1] = t_qparams[block_idx * qparams_z_stride + out_col_texel_idx + 1 + qparams_y_stride]; + $else: + scales[0] = texelFetch(t_qparams, ivec3(out_col_texel_idx, 0, block_idx), 0); + zeros[0] = texelFetch(t_qparams, ivec3(out_col_texel_idx, 1, block_idx), 0); + + scales[1] = texelFetch(t_qparams, ivec3(out_col_texel_idx + 1, 0, block_idx), 0); + zeros[1] = texelFetch(t_qparams, ivec3(out_col_texel_idx + 1, 1, block_idx), 0); for (int g_idx = 0; g_idx < group_size; g_idx += 4) { const int k = block_idx * group_size + g_idx; @@ -101,7 +111,7 @@ void main() { $else: const uvec4 packed_weight_tex = texelFetch( t_qmat2, - ivec3(gl_GlobalInvocationID.x, k + comp, 0), + ivec2(gl_GlobalInvocationID.x, k + comp), 0); const uvec4 weight_tex_1 = (packed_weight_tex & 0xF0) >> 4; diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.yaml b/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.yaml index fac9c25c220..b58b5d2cf55 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.yaml @@ -9,12 +9,11 @@ q_4w_linear: DTYPE: float OUT_STORAGE: texture3d IN_STORAGE: texture3d - WEIGHT_STORAGE: texture3d + WEIGHT_STORAGE: texture2d + PARAMS_STORAGE: buffer shader_variants: - - NAME: q_4w_linear_texture3d_texture3d_texture3d_float - - NAME: q_4w_linear_texture3d_buffer_texture3d_float - IN_STORAGE: buffer - - NAME: q_4w_linear_buffer_buffer_texture3d_float + - NAME: q_4w_linear_texture3d_texture3d_texture2d_float + - NAME: q_4w_linear_buffer_buffer_texture2d_float OUT_STORAGE: buffer IN_STORAGE: buffer - NAME: q_4w_linear_buffer_buffer_buffer_float diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearGroupwiseInt4.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearGroupwiseInt4.cpp index b795e574291..dc84b945f39 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearGroupwiseInt4.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearGroupwiseInt4.cpp @@ -83,10 +83,9 @@ ValueRef prepack_int4_linear_weight_transposed_interleaved( const int64_t N = qmat2_orig_sizes.at(ndim - 2); const int64_t N_div2 = N / int64_t(2); - utils::StorageType storage_type = utils::kTexture3D; - utils::uvec3 max_extents = - graph.context()->adapter_ptr()->max_texture_extents(); - if (N_div2 > max_extents[0] * 4 || K > max_extents[1]) { + utils::StorageType storage_type = utils::kTexture2D; + uint32_t max_extent = graph.context()->adapter_ptr()->max_texture2d_dim(); + if (N_div2 > max_extent * 4 || K > max_extent) { storage_type = utils::kBuffer; } @@ -133,7 +132,7 @@ void add_q_4w_linear_node( prepack_int4_linear_weight_transposed_interleaved(graph, mat2_data); ValueRef scales_and_zeros = prepack_standard_hw_transposed( - graph, scales_and_zeros_data, utils::kTexture3D, utils::kWidthPacked); + graph, scales_and_zeros_data, utils::kBuffer, utils::kWidthPacked); std::string kernel_name = "q_4w_linear"; add_storage_type_suffix(kernel_name, graph.storage_type_of(out)); diff --git a/backends/vulkan/runtime/vk_api/Adapter.h b/backends/vulkan/runtime/vk_api/Adapter.h index 68371d3eebf..d73ed1bc0ce 100644 --- a/backends/vulkan/runtime/vk_api/Adapter.h +++ b/backends/vulkan/runtime/vk_api/Adapter.h @@ -211,11 +211,16 @@ class Adapter final { return physical_device_.min_ubo_alignment; } - inline utils::uvec3 max_texture_extents() const { - return { - physical_device_.properties.limits.maxImageDimension1D, - physical_device_.properties.limits.maxImageDimension2D, - physical_device_.properties.limits.maxImageDimension3D}; + inline uint32_t max_texture2d_dim() const { + return physical_device_.properties.limits.maxImageDimension2D; + } + + inline uint32_t max_texture3d_dim() const { + return physical_device_.properties.limits.maxImageDimension3D; + } + + inline uint32_t max_buffer_numel() const { + return physical_device_.properties.limits.maxStorageBufferRange; } // Command Buffer Submission