Skip to content

Commit d15d177

Browse files
authored
vulkan: faster q6_k matmul (ggml-org#17813)
* q6_k faster mul mat * 8 values * fix comment * switch to two at a time * start ci for .glsl files
1 parent 77ad854 commit d15d177

File tree

3 files changed

+13
-7
lines changed

3 files changed

+13
-7
lines changed

.github/workflows/build.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ on:
2020
'**/*.swift',
2121
'**/*.m',
2222
'**/*.metal',
23-
'**/*.comp'
23+
'**/*.comp',
24+
'**/*.glsl'
2425
]
2526

2627
pull_request:
@@ -40,7 +41,8 @@ on:
4041
'**/*.swift',
4142
'**/*.m',
4243
'**/*.metal',
43-
'**/*.comp'
44+
'**/*.comp',
45+
'**/*.glsl'
4446
]
4547

4648
concurrency:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
/out/
5555
/tmp/
5656
/autogen-*.md
57+
/common/build-info.cpp
5758

5859
# Deprecated
5960

ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,17 +244,20 @@ void load_a_to_shmem(const uint pos_a, const uint row, const uint col, const uin
244244
const uint iqs = idx % 128; // 0..127
245245

246246
const uint n = iqs / 64; // 0,1
247-
const uint b = (iqs % 64) / 32; // 0,1
247+
const uint b = ((iqs % 64) / 32) * 4; // 0,4
248248
const uint is_b = (iqs % 16) / 8; // 0,1
249249
const uint qhshift = ((iqs % 64) / 16) * 2; // 0,2,4,6
250250
const uint is = 8 * n + qhshift + is_b; // 0..15
251-
const uint qsi = n * 64 + (iqs % 32) * 2; // 0,2,4..126
252-
const uint qhi = n * 32 + (iqs % 16) * 2; // 0,2,4..62
251+
const uint qsi = n * 32 + (iqs % 32); // 0..63
252+
const uint qhi = n * 16 + (iqs % 16); // 0..31
253253

254254
const float dscale = float(data_a[ib].d) * float(data_a[ib].scales[is]);
255255

256-
buf_a[buf_idx] = FLOAT_TYPE_VEC2(dscale * float(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32),
257-
dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));
256+
const uint ql = (uint(data_a_packed16[ib].ql[qsi]) >> b) & 0x0F0F;
257+
const uint qh = (uint(data_a_packed16[ib].qh[qhi]) >> qhshift) & 0x0303;
258+
const vec2 q = (vec2(unpack8(ql | (qh << 4)).xy) - 32) * dscale;
259+
260+
buf_a[buf_idx] = FLOAT_TYPE_VEC2(q.x, q.y);
258261
#elif defined(DATA_A_IQ1_S)
259262
const uint idx = pos_a + col * p.stride_a / LOAD_VEC_A + row;
260263
const uint buf_idx = col * SHMEM_STRIDE + row * LOAD_VEC_A / 2;

0 commit comments

Comments
 (0)