@@ -304,6 +304,42 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
304
304
}
305
305
#endif
306
306
307
+ #if defined(DATA_A_IQ4_XS)
308
+ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
309
+ const uint ib32 = iqs / 32;
310
+ const uint iq = 16 * ib32 + (iqs % 16);
311
+
312
+ const uint sl = (data_a[a_offset + ib].scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF;
313
+ const uint sh = (data_a[a_offset + ib].scales_h >> (2 * ib32)) & 3;
314
+ const uint qshift = (iqs & 16) >> 2;
315
+ u8vec2 qs = u8vec2(data_a[a_offset + ib].qs[iq], data_a[a_offset + ib].qs[iq + 1]);
316
+ qs = (qs >> qshift) & uint8_t(0xF);
317
+
318
+ const float dl = float(int(sl | (sh << 4)) - 32);
319
+ return dl * vec2(kvalues_iq4nl[qs.x], kvalues_iq4nl[qs.y]);
320
+ }
321
+ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
322
+ const uint ib32 = iqs / 32;
323
+ const uint iq = 16 * ib32 + (iqs % 16);
324
+
325
+ const uint sl = (data_a[a_offset + ib].scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF;
326
+ const uint sh = (data_a[a_offset + ib].scales_h >> (2 * ib32)) & 3;
327
+ const uint qshift = (iqs & 16) >> 2;
328
+ u8vec4 qs = u8vec4(
329
+ data_a[a_offset + ib].qs[iq + 0],
330
+ data_a[a_offset + ib].qs[iq + 1],
331
+ data_a[a_offset + ib].qs[iq + 2],
332
+ data_a[a_offset + ib].qs[iq + 3]
333
+ );
334
+ qs = (qs >> qshift) & uint8_t(0xF);
335
+
336
+ const float dl = float(int(sl | (sh << 4)) - 32);
337
+ return dl * vec4(
338
+ kvalues_iq4nl[qs.x], kvalues_iq4nl[qs.y],
339
+ kvalues_iq4nl[qs.z], kvalues_iq4nl[qs.w]);
340
+ }
341
+ #endif
342
+
307
343
#if defined(DATA_A_IQ4_NL)
308
344
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
309
345
const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
@@ -321,7 +357,7 @@ vec2 get_dm(uint ib, uint a_offset) {
321
357
}
322
358
#endif
323
359
324
- #if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL)
360
+ #if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined( DATA_A_IQ4_NL)
325
361
vec2 get_dm(uint ib, uint a_offset) {
326
362
return vec2(float(data_a[a_offset + ib].d), 0);
327
363
}
0 commit comments