Skip to content

Commit a5f4119

Browse files
committed
vulkan: fix mul_mat_vec_iq1_s formatting
1 parent 254098a commit a5f4119

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,44 +10,44 @@ FLOAT_TYPE temp[NUM_COLS][NUM_ROWS];
1010
void calc_superblock(const uint a_offset, const uint b_offset, const uint ib32, const uint i,
1111
const uint num_blocks_per_row, const uint first_row, const uint num_rows) {
1212
const uint y_idx_base = i * QUANT_K + 32 * ib32;
13-
[[unroll]] for (uint j = 0; j < NUM_COLS; ++j) {
13+
[[unroll]] for (uint j = 0; j < NUM_COLS; ++j) {
1414
const uint base_b_idx = (j * p.batch_stride_b + b_offset + y_idx_base) / 4;
15-
[[unroll]] for (uint l = 0; l < 4; ++l) {
15+
[[unroll]] for (uint l = 0; l < 4; ++l) {
1616
const vec4 b_val_0 = vec4(data_b_v4[base_b_idx + 2 * l]);
1717
const vec4 b_val_1 = vec4(data_b_v4[base_b_idx + 2 * l + 1]);
1818

1919
// index for data_a
2020
uint ibi = a_offset / QUANT_K + first_row * num_blocks_per_row + i;
2121

22-
[[unroll]] for (uint n = 0; n < num_rows; ++n) {
22+
[[unroll]] for (uint n = 0; n < num_rows; ++n) {
2323
const float d = float(data_a[ibi].d);
2424
const uint qh = data_a[ibi].qh[ib32];
2525

2626
const float dl = d * float(2 * bitfieldExtract(qh, 12, 3) + 1);
2727
const uint qs = data_a[ibi].qs[4 * ib32 + l];
28-
const uint idxhi = bitfieldExtract(qh, 3 * int(l), 3);
28+
const uint idxhi = bitfieldExtract(qh, 3 * int(l), 3);
2929
const uint16_t grid = uint16_t(iq1s_grid[qs | (idxhi << 8)]);
3030

3131
const float delta_val = ((qh & 0x8000) != 0) ? -IQ1S_DELTA : IQ1S_DELTA;
32-
const vec4 delta_v = vec4(delta_val);
32+
const vec4 delta_v = vec4(delta_val);
3333
const vec4 fbits0 = vec4(
3434
float(bitfieldExtract(grid, 0, 2)),
3535
float(bitfieldExtract(grid, 2, 2)),
3636
float(bitfieldExtract(grid, 4, 2)),
3737
float(bitfieldExtract(grid, 6, 2))
38-
);
38+
);
3939
const vec4 fbits1 = vec4(
4040
float(bitfieldExtract(grid, 8, 2)),
4141
float(bitfieldExtract(grid, 10, 2)),
4242
float(bitfieldExtract(grid, 12, 2)),
4343
float(bitfieldExtract(grid, 14, 2))
4444
);
45-
45+
4646
vec4 sum_v = fma(b_val_0, fbits0 + delta_v, vec4(0.0));
4747
sum_v = fma(b_val_1, fbits1 + delta_v, sum_v);
48-
FLOAT_TYPE sum = dot(sum_v, vec4(1.0));
49-
50-
temp[j][n] = fma(dl, sum, temp[j][n]);
48+
FLOAT_TYPE sum = dot(sum_v, vec4(1.0));
49+
50+
temp[j][n] = fma(dl, sum, temp[j][n]);
5151
ibi += num_blocks_per_row;
5252
}
5353
}

0 commit comments

Comments
 (0)