Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,26 @@ vkapi::VulkanImage allocate_image(
return vkapi::VulkanImage();
}

// TODO(ssjia): change to always check that the image extents do not exceed
// physical limits. Adding the check now based on `maxImageDimension3D` will
// cause some existing models to break. Anecdotally, on Adreno and
// SwiftShader devices, using 3D textures that exceed `maxImageDimension3D`
// appears to be ok. So we need to figure out if is it undefined behaviour
// or if there's a better way to figure out what the limit is. For now, only
// check during debug build so that we can detect when exceeding physical
// limits could be a potential cause for model outputs to be wrong. In the
// meantime, the threshold for using texture storage can be configured at
// export time.
#ifdef VULKAN_DEBUG
uint32_t max_extent = storage_type == utils::kTexture3D
? adapter_ptr->max_texture3d_dim()
: adapter_ptr->max_texture2d_dim();

VK_CHECK_COND(
image_extents[0] <= max_extent && image_extents[1] <= max_extent &&
image_extents[2] <= max_extent);
#endif

VkSampler sampler = adapter_ptr->sampler_cache().retrieve(sampler_props);

return adapter_ptr->vma().create_image(
Expand Down Expand Up @@ -291,6 +311,8 @@ vkapi::VulkanBuffer allocate_buffer(
return vkapi::VulkanBuffer();
}

VK_CHECK_COND(numel <= context_ptr->adapter_ptr()->max_buffer_numel());

return adapter_ptr->vma().create_storage_buffer(
element_size(dtype) * numel, allocate_memory);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ void main() {
in_vals[r][0] = get_first(in_val_packed);
in_vals[r][1] = get_second(in_val_packed);
} else {
in_vals[r][0] = uint8_t(254);
in_vals[r][1] = uint8_t(254);
in_vals[r][0] = uint8_t(0);
in_vals[r][1] = uint8_t(0);
}
}

Expand All @@ -131,6 +131,6 @@ void main() {
t_qmat2[packed_pos.y * stride + packed_pos.x] = out_tex_1;
t_qmat2[(packed_pos.y + 1) * stride + packed_pos.x] = out_tex_2;
$else:
imageStore(t_qmat2, ivec3(packed_pos.xy, 0), out_tex_1);
imageStore(t_qmat2, ivec3(packed_pos.x, packed_pos.y + 1, 0), out_tex_2);
imageStore(t_qmat2, packed_pos.xy, out_tex_1);
imageStore(t_qmat2, ivec2(packed_pos.x, packed_pos.y + 1), out_tex_2);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

pack_int4_linear_weight_transposed_interleaved:
parameter_names_with_default_values:
STORAGE: texture3d
STORAGE: texture2d
generate_variant_forall:
STORAGE:
- VALUE: texture2d
- VALUE: buffer
shader_variants:
- NAME: pack_int4_linear_weight_transposed_interleaved_texture3d
- NAME: pack_int4_linear_weight_transposed_interleaved_buffer
STORAGE: buffer
- NAME: pack_int4_linear_weight_transposed_interleaved
22 changes: 16 additions & 6 deletions backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ layout(std430) buffer;
${layout_declare_tensor(B, "w", "t_out", DTYPE, OUT_STORAGE, is_scalar_array=False)}
${layout_declare_tensor(B, "r", "t_mat1", DTYPE, IN_STORAGE, is_scalar_array=False)}
${layout_declare_tensor(B, "r", "t_qmat2", "uint8", WEIGHT_STORAGE, is_scalar_array=False)}
${layout_declare_tensor(B, "r", "t_qparams", DTYPE, "texture3D")}
${layout_declare_tensor(B, "r", "t_qparams", DTYPE, "buffer", is_scalar_array=False)}

layout(push_constant) uniform restrict Block {
ivec4 out_sizes;
Expand Down Expand Up @@ -79,13 +79,23 @@ void main() {

$if WEIGHT_STORAGE == "buffer":
const int qmat2_stride = qmat2_sizes.x >> 2;
$if PARAMS_STORAGE == "buffer":
const int qparams_y_stride = out_sizes.x >> 2;
const int qparams_z_stride = qparams_y_stride * 2;

for (int block_idx = 0; block_idx < num_blocks; ++block_idx) {
scales[0] = texelFetch(t_qparams, ivec3(out_col_texel_idx, 0, block_idx), 0);
zeros[0] = texelFetch(t_qparams, ivec3(out_col_texel_idx, 1, block_idx), 0);
$if PARAMS_STORAGE == "buffer":
scales[0] = t_qparams[block_idx * qparams_z_stride + out_col_texel_idx];
zeros[0] = t_qparams[block_idx * qparams_z_stride + out_col_texel_idx + qparams_y_stride];

scales[1] = texelFetch(t_qparams, ivec3(out_col_texel_idx + 1, 0, block_idx), 0);
zeros[1] = texelFetch(t_qparams, ivec3(out_col_texel_idx + 1, 1, block_idx), 0);
scales[1] = t_qparams[block_idx * qparams_z_stride + out_col_texel_idx + 1];
zeros[1] = t_qparams[block_idx * qparams_z_stride + out_col_texel_idx + 1 + qparams_y_stride];
$else:
scales[0] = texelFetch(t_qparams, ivec3(out_col_texel_idx, 0, block_idx), 0);
zeros[0] = texelFetch(t_qparams, ivec3(out_col_texel_idx, 1, block_idx), 0);

scales[1] = texelFetch(t_qparams, ivec3(out_col_texel_idx + 1, 0, block_idx), 0);
zeros[1] = texelFetch(t_qparams, ivec3(out_col_texel_idx + 1, 1, block_idx), 0);

for (int g_idx = 0; g_idx < group_size; g_idx += 4) {
const int k = block_idx * group_size + g_idx;
Expand All @@ -101,7 +111,7 @@ void main() {
$else:
const uvec4 packed_weight_tex = texelFetch(
t_qmat2,
ivec3(gl_GlobalInvocationID.x, k + comp, 0),
ivec2(gl_GlobalInvocationID.x, k + comp),
0);

const uvec4 weight_tex_1 = (packed_weight_tex & 0xF0) >> 4;
Expand Down
9 changes: 4 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/q_4w_linear.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@ q_4w_linear:
DTYPE: float
OUT_STORAGE: texture3d
IN_STORAGE: texture3d
WEIGHT_STORAGE: texture3d
WEIGHT_STORAGE: texture2d
PARAMS_STORAGE: buffer
shader_variants:
- NAME: q_4w_linear_texture3d_texture3d_texture3d_float
- NAME: q_4w_linear_texture3d_buffer_texture3d_float
IN_STORAGE: buffer
- NAME: q_4w_linear_buffer_buffer_texture3d_float
- NAME: q_4w_linear_texture3d_texture3d_texture2d_float
- NAME: q_4w_linear_buffer_buffer_texture2d_float
OUT_STORAGE: buffer
IN_STORAGE: buffer
- NAME: q_4w_linear_buffer_buffer_buffer_float
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,9 @@ ValueRef prepack_int4_linear_weight_transposed_interleaved(
const int64_t N = qmat2_orig_sizes.at(ndim - 2);
const int64_t N_div2 = N / int64_t(2);

utils::StorageType storage_type = utils::kTexture3D;
utils::uvec3 max_extents =
graph.context()->adapter_ptr()->max_texture_extents();
if (N_div2 > max_extents[0] * 4 || K > max_extents[1]) {
utils::StorageType storage_type = utils::kTexture2D;
uint32_t max_extent = graph.context()->adapter_ptr()->max_texture2d_dim();
if (N_div2 > max_extent * 4 || K > max_extent) {
storage_type = utils::kBuffer;
}

Expand Down Expand Up @@ -133,7 +132,7 @@ void add_q_4w_linear_node(
prepack_int4_linear_weight_transposed_interleaved(graph, mat2_data);

ValueRef scales_and_zeros = prepack_standard_hw_transposed(
graph, scales_and_zeros_data, utils::kTexture3D, utils::kWidthPacked);
graph, scales_and_zeros_data, utils::kBuffer, utils::kWidthPacked);

std::string kernel_name = "q_4w_linear";
add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
Expand Down
15 changes: 10 additions & 5 deletions backends/vulkan/runtime/vk_api/Adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,16 @@ class Adapter final {
return physical_device_.min_ubo_alignment;
}

inline utils::uvec3 max_texture_extents() const {
return {
physical_device_.properties.limits.maxImageDimension1D,
physical_device_.properties.limits.maxImageDimension2D,
physical_device_.properties.limits.maxImageDimension3D};
inline uint32_t max_texture2d_dim() const {
return physical_device_.properties.limits.maxImageDimension2D;
}

inline uint32_t max_texture3d_dim() const {
return physical_device_.properties.limits.maxImageDimension3D;
}

inline uint32_t max_buffer_numel() const {
return physical_device_.properties.limits.maxStorageBufferRange;
}

// Command Buffer Submission
Expand Down
Loading