Skip to content

Commit 3f57c82

Browse files
author
Italo Nicola
committed
Vulkan: Add Q4_K Adreno variant for mul_mat_vec
1 parent d9f4efe commit 3f57c82

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,25 @@ void calc_superblock(const uint a_offset, const uint b_offset, const uint v_im,
2424

2525
const uint32_t scale_0_4_l = (scale4_u32 << 16) | scale0_u32;
2626
const uint32_t scale_0_4_h = (scale_0_4_l & 0xC0C0C0C0) >> 2;
27+
28+
#if defined(ADRENO)
29+
const vec4 scale_0_4_l_f = vec4(
30+
float((scale_0_4_l >> 0) & 0x3Fu),
31+
float((scale_0_4_l >> 8) & 0x3Fu),
32+
float((scale_0_4_l >> 16) & 0x3Fu),
33+
float((scale_0_4_l >> 24) & 0x3Fu)
34+
);
35+
36+
const vec4 scale8_f = vec4(
37+
float(((((scale8_u32 << 12) | scale8_u32) & 0x0F0F0F0Fu) | scale_0_4_h) >> 0 & 0xFFu),
38+
float(((((scale8_u32 << 12) | scale8_u32) & 0x0F0F0F0Fu) | scale_0_4_h) >> 8 & 0xFFu),
39+
float(((((scale8_u32 << 12) | scale8_u32) & 0x0F0F0F0Fu) | scale_0_4_h) >> 16 & 0xFFu),
40+
float(((((scale8_u32 << 12) | scale8_u32) & 0x0F0F0F0Fu) | scale_0_4_h) >> 24 & 0xFFu)
41+
);
42+
#else
2743
const vec4 scale_0_4_l_f = vec4(unpack8(scale_0_4_l & 0x3F3F3F3F));
2844
const vec4 scale8_f = vec4(unpack8((((scale8_u32 << 12) | scale8_u32) & 0x0F0F0F0F) | scale_0_4_h));
45+
#endif
2946

3047
const FLOAT_TYPE sc0 = scale_0_4_l_f.x;
3148
const FLOAT_TYPE sc1 = scale_0_4_l_f.y;
@@ -44,10 +61,17 @@ void calc_superblock(const uint a_offset, const uint b_offset, const uint v_im,
4461
const uint32_t qs64_u32_lo4 = qs64_u32 & 0x0F0F0F0F;
4562
const uint32_t qs64_u32_hi4 = (qs64_u32 >> 4) & 0x0F0F0F0F;
4663

64+
#if defined(ADRENO)
65+
const vec4 qs0_lo4 = vec4(float(qs0_u32_lo4 & 0xFFu), float((qs0_u32_lo4 >> 8) & 0xFFu), float((qs0_u32_lo4 >> 16) & 0xFFu), float((qs0_u32_lo4 >> 24) & 0xFFu));
66+
const vec4 qs64_lo4 = vec4(float(qs64_u32_lo4 & 0xFFu), float((qs64_u32_lo4 >> 8) & 0xFFu), float((qs64_u32_lo4 >> 16) & 0xFFu), float((qs64_u32_lo4 >> 24) & 0xFFu));
67+
const vec4 qs0_hi4 = vec4(float(qs0_u32_hi4 & 0xFFu), float((qs0_u32_hi4 >> 8) & 0xFFu), float((qs0_u32_hi4 >> 16) & 0xFFu), float((qs0_u32_hi4 >> 24) & 0xFFu));
68+
const vec4 qs64_hi4 = vec4(float(qs64_u32_hi4 & 0xFFu), float((qs64_u32_hi4 >> 8) & 0xFFu), float((qs64_u32_hi4 >> 16) & 0xFFu), float((qs64_u32_hi4 >> 24) & 0xFFu));
69+
#else
4770
const vec4 qs0_lo4 = vec4(unpack8(qs0_u32_lo4));
4871
const vec4 qs64_lo4 = vec4(unpack8(qs64_u32_lo4));
4972
const vec4 qs0_hi4 = vec4(unpack8(qs0_u32_hi4));
5073
const vec4 qs64_hi4 = vec4(unpack8(qs64_u32_hi4));
74+
#endif
5175

5276
const FLOAT_TYPE q4_0 = qs0_lo4.x;
5377
const FLOAT_TYPE q4_1 = qs0_lo4.y;
@@ -66,7 +90,11 @@ void calc_superblock(const uint a_offset, const uint b_offset, const uint v_im,
6690
const FLOAT_TYPE q4_14 = qs64_hi4.z;
6791
const FLOAT_TYPE q4_15 = qs64_hi4.w;
6892

93+
#if defined(ADRENO)
94+
for (uint j = 0; j < NUM_COLS; ++j) {
95+
#else
6996
[[unroll]] for (uint j = 0; j < NUM_COLS; ++j) {
97+
#endif
7098
vec4 by10 = vec4(data_b_v4[(j*p.batch_stride_b + b_offset + y1_idx) / 4 ]);
7199
vec4 by132 = vec4(data_b_v4[(j*p.batch_stride_b + b_offset + y1_idx) / 4 + 8]);
72100
vec4 by20 = vec4(data_b_v4[(j*p.batch_stride_b + b_offset + y2_idx) / 4 ]);

ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,7 @@ void process_shaders() {
774774
#ifdef GGML_VULKAN_BUILD_ADRENO_SHADERS
775775
std::cout << "ggml_vulkan: Generating Adreno-supported shaders" << std::endl;
776776

777-
std::vector<std::string> adreno_shader_types = {"f32", "f16", "q4_0", "q4_1", "q6_k", "q8_0"};
777+
std::vector<std::string> adreno_shader_types = {"f32", "f16", "q4_0", "q4_k", "q4_1", "q5_k", "q6_k", "q8_0"};
778778
std::string device_suffix = "adreno_";
779779
auto adreno_base_dict = merge_maps(base_dict, {{"ADRENO", "1"}});
780780

0 commit comments

Comments
 (0)