@@ -304,6 +304,42 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
304304}
305305#endif
306306
307+ #if defined(DATA_A_IQ4_XS)
308+ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
309+ const uint ib32 = iqs / 32;
310+ const uint iq = 16 * ib32 + (iqs % 16);
311+
312+ const uint sl = (data_a[a_offset + ib].scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF;
313+ const uint sh = (data_a[a_offset + ib].scales_h >> (2 * ib32)) & 3;
314+ const uint qshift = (iqs & 16) >> 2;
315+ u8vec2 qs = u8vec2(data_a[a_offset + ib].qs[iq], data_a[a_offset + ib].qs[iq + 1]);
316+ qs = (qs >> qshift) & uint8_t(0xF);
317+
318+ const float dl = float(int(sl | (sh << 4)) - 32);
319+ return dl * vec2(kvalues_iq4nl[qs.x], kvalues_iq4nl[qs.y]);
320+ }
321+ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
322+ const uint ib32 = iqs / 32;
323+ const uint iq = 16 * ib32 + (iqs % 16);
324+
325+ const uint sl = (data_a[a_offset + ib].scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF;
326+ const uint sh = (data_a[a_offset + ib].scales_h >> (2 * ib32)) & 3;
327+ const uint qshift = (iqs & 16) >> 2;
328+ u8vec4 qs = u8vec4(
329+ data_a[a_offset + ib].qs[iq + 0],
330+ data_a[a_offset + ib].qs[iq + 1],
331+ data_a[a_offset + ib].qs[iq + 2],
332+ data_a[a_offset + ib].qs[iq + 3]
333+ );
334+ qs = (qs >> qshift) & uint8_t(0xF);
335+
336+ const float dl = float(int(sl | (sh << 4)) - 32);
337+ return dl * vec4(
338+ kvalues_iq4nl[qs.x], kvalues_iq4nl[qs.y],
339+ kvalues_iq4nl[qs.z], kvalues_iq4nl[qs.w]);
340+ }
341+ #endif
342+
307343#if defined(DATA_A_IQ4_NL)
308344vec2 dequantize(uint ib, uint iqs, uint a_offset) {
309345 const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
@@ -321,7 +357,7 @@ vec2 get_dm(uint ib, uint a_offset) {
321357}
322358#endif
323359
324- #if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL)
360+ #if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined( DATA_A_IQ4_NL)
325361vec2 get_dm(uint ib, uint a_offset) {
326362 return vec2(float(data_a[a_offset + ib].d), 0);
327363}
0 commit comments