|  | 
|  | 1 | + | 
|  | 2 | +#include "types.comp" | 
|  | 3 | + | 
|  | 4 | +layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ4_0 { | 
|  | 5 | +   block_q4_0_packed16 block; | 
|  | 6 | +}; | 
|  | 7 | + | 
|  | 8 | +float16_t dequantFuncQ4_0(const in decodeBufQ4_0 bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 9 | +{ | 
|  | 10 | +    const float16_t d = bl.block.d; | 
|  | 11 | +    const uint idx = coordInBlock[1]; | 
|  | 12 | +    const uint shift = (idx & 0x10) >> 2; | 
|  | 13 | +    uint32_t qs = unpack8(uint32_t(bl.block.qs[(idx & 0xE) >> 1]))[idx & 1]; | 
|  | 14 | +    qs >>= shift; | 
|  | 15 | +    qs &= 0xF; | 
|  | 16 | +    float16_t ret = (float16_t(qs) - float16_t(8)) * d; | 
|  | 17 | +    return ret; | 
|  | 18 | +} | 
|  | 19 | + | 
|  | 20 | +layout(buffer_reference, std430, buffer_reference_align = 4) buffer decodeBufQ4_1 { | 
|  | 21 | +   block_q4_1 block; | 
|  | 22 | +}; | 
|  | 23 | + | 
|  | 24 | +float16_t dequantFuncQ4_1(const in decodeBufQ4_1 bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 25 | +{ | 
|  | 26 | +    const float16_t d = bl.block.d; | 
|  | 27 | +    const float16_t m = bl.block.m; | 
|  | 28 | +    const uint idx = coordInBlock[1]; | 
|  | 29 | +    const uint iqs = idx & 0xF; | 
|  | 30 | +    const uint shift = (idx & 0x10) >> 2; | 
|  | 31 | +    uint32_t qs = bl.block.qs[iqs]; | 
|  | 32 | +    qs >>= shift; | 
|  | 33 | +    qs &= 0xF; | 
|  | 34 | +    float16_t ret = float16_t(qs) * d + m; | 
|  | 35 | +    return ret; | 
|  | 36 | +} | 
|  | 37 | + | 
|  | 38 | +layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ5_0 { | 
|  | 39 | +   block_q5_0 block; | 
|  | 40 | +}; | 
|  | 41 | + | 
|  | 42 | +float16_t dequantFuncQ5_0(const in decodeBufQ5_0 bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 43 | +{ | 
|  | 44 | +    const float16_t d = bl.block.d; | 
|  | 45 | +    const uint idx = coordInBlock[1]; | 
|  | 46 | +    const uint iqs = idx & 0xF; | 
|  | 47 | + | 
|  | 48 | +    const uint uint_qh = uint(bl.block.qh[1]) << 16 | bl.block.qh[0]; | 
|  | 49 | +    const uint qh = ((uint_qh >> idx) << 4) & 0x10; | 
|  | 50 | + | 
|  | 51 | +    const uint shift = (idx & 0x10) >> 2; | 
|  | 52 | +    uint32_t qs = bl.block.qs[iqs]; | 
|  | 53 | +    qs >>= shift; | 
|  | 54 | +    qs &= 0xF; | 
|  | 55 | + | 
|  | 56 | +    float16_t ret = (float16_t(qs | qh) - float16_t(16)) * d; | 
|  | 57 | +    return ret; | 
|  | 58 | +} | 
|  | 59 | + | 
|  | 60 | +layout(buffer_reference, std430, buffer_reference_align = 8) buffer decodeBufQ5_1 { | 
|  | 61 | +   block_q5_1 block; | 
|  | 62 | +}; | 
|  | 63 | + | 
|  | 64 | +float16_t dequantFuncQ5_1(const in decodeBufQ5_1 bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 65 | +{ | 
|  | 66 | +    const float16_t d = bl.block.d; | 
|  | 67 | +    const float16_t m = bl.block.m; | 
|  | 68 | +    const uint idx = coordInBlock[1]; | 
|  | 69 | +    const uint iqs = idx & 0xF; | 
|  | 70 | + | 
|  | 71 | +    const uint uint_qh = bl.block.qh; | 
|  | 72 | +    const uint qh = ((uint_qh >> idx) << 4) & 0x10; | 
|  | 73 | + | 
|  | 74 | +    const uint shift = (idx & 0x10) >> 2; | 
|  | 75 | +    uint32_t qs = bl.block.qs[iqs]; | 
|  | 76 | +    qs >>= shift; | 
|  | 77 | +    qs &= 0xF; | 
|  | 78 | + | 
|  | 79 | +    float16_t ret = float16_t(qs | qh) * d + m; | 
|  | 80 | +    return ret; | 
|  | 81 | +} | 
|  | 82 | + | 
|  | 83 | +layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ8_0 { | 
|  | 84 | +   block_q8_0_packed16 block; | 
|  | 85 | +}; | 
|  | 86 | + | 
|  | 87 | +float16_t dequantFuncQ8_0(const in decodeBufQ8_0 bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 88 | +{ | 
|  | 89 | +    const float16_t d = bl.block.d; | 
|  | 90 | +    const uint idx = coordInBlock[1]; | 
|  | 91 | +    const uint iqs = idx; | 
|  | 92 | + | 
|  | 93 | +    // Load 16b and select the byte for this element | 
|  | 94 | +    int32_t qs = unpack8(int32_t(bl.block.qs[(iqs & 0x1E) >> 1]))[iqs & 1]; | 
|  | 95 | +    float16_t ret = float16_t(qs) * d; | 
|  | 96 | +    return ret; | 
|  | 97 | +} | 
|  | 98 | + | 
|  | 99 | +layout(buffer_reference, std430, buffer_reference_align = 4) buffer decodeBufQ2_K { | 
|  | 100 | +   block_q2_K block; | 
|  | 101 | +}; | 
|  | 102 | + | 
|  | 103 | +float16_t dequantFuncQ2_K(const in decodeBufQ2_K bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 104 | +{ | 
|  | 105 | +    const f16vec2 d = bl.block.d; | 
|  | 106 | +    const uint idx = coordInBlock[1]; | 
|  | 107 | +    const uint iqs = idx; | 
|  | 108 | + | 
|  | 109 | +    const uint qsi = (iqs / 128) * 32 + (iqs % 32);     // 0..31 | 
|  | 110 | +    const uint scalesi = iqs / 16;                      // 0..15 | 
|  | 111 | +    const uint qsshift = ((iqs % 128) / 32) * 2;        // 0,2,4,6 | 
|  | 112 | + | 
|  | 113 | +    uint32_t qs = bl.block.qs[qsi]; | 
|  | 114 | +    const uint scales = bl.block.scales[scalesi]; | 
|  | 115 | +    float16_t ret = d.x * float16_t(scales & 0xF) * float16_t((qs >> qsshift) & 3) - d.y * float16_t(scales >> 4); | 
|  | 116 | +    return ret; | 
|  | 117 | +} | 
|  | 118 | + | 
|  | 119 | +layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ3_K { | 
|  | 120 | +   block_q3_K block; | 
|  | 121 | +}; | 
|  | 122 | + | 
|  | 123 | +float16_t dequantFuncQ3_K(const in decodeBufQ3_K bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 124 | +{ | 
|  | 125 | +    const uint idx = coordInBlock[1]; | 
|  | 126 | +    const uint iqs = idx; | 
|  | 127 | + | 
|  | 128 | +    const uint n = iqs / 128;                    // 0,1 | 
|  | 129 | +    const uint qsi = n * 32 + (iqs % 32);        // 0..63 | 
|  | 130 | +    const uint hmi =          (iqs % 32);        // 0..31 | 
|  | 131 | +    const uint j = (iqs % 128) / 8;              // 0..15 | 
|  | 132 | +    const uint is = iqs / 16;                    // 0..15 | 
|  | 133 | +    const uint halfsplit = ((iqs % 128) / 32);   // 0,1,2,3 | 
|  | 134 | +    const uint qsshift = halfsplit * 2;          // 0,2,4,6 | 
|  | 135 | +    const uint m = 1 << (4 * n + halfsplit);     // 1,2,4,8,16,32,64,128 | 
|  | 136 | + | 
|  | 137 | +    uint32_t scaleidx0 = (is < 8) ? is : (is-8); | 
|  | 138 | +    uint32_t scaleidx0shift = (is < 8) ? 0 : 4; | 
|  | 139 | +    uint32_t scaleidx1 = is + 8 - (is/4)*4; | 
|  | 140 | +    uint32_t scaleidx1shift = (is/4)*2; | 
|  | 141 | + | 
|  | 142 | +    const int8_t us = int8_t(((bl.block.scales[scaleidx0] >> scaleidx0shift) & 0xF) | (((bl.block.scales[scaleidx1] >> scaleidx1shift) & 3) << 4)); | 
|  | 143 | + | 
|  | 144 | +    const float16_t dl = bl.block.d * float16_t(us - 32); | 
|  | 145 | + | 
|  | 146 | +    float16_t ret = dl * float16_t(int8_t((bl.block.qs[qsi    ] >> qsshift) & 3) - (((bl.block.hmask[hmi    ] & m) != 0) ? 0 : 4)); | 
|  | 147 | + | 
|  | 148 | +    return ret; | 
|  | 149 | +} | 
|  | 150 | + | 
|  | 151 | +layout(buffer_reference, std430, buffer_reference_align = 16) buffer decodeBufQ4_K { | 
|  | 152 | +   block_q4_K block; | 
|  | 153 | +}; | 
|  | 154 | + | 
|  | 155 | +float16_t dequantFuncQ4_K(const in decodeBufQ4_K bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 156 | +{ | 
|  | 157 | +    const uint idx = coordInBlock[1]; | 
|  | 158 | +    const uint iqs = idx; | 
|  | 159 | + | 
|  | 160 | +    const uint n = iqs / 64;                   // 0,1,2,3 | 
|  | 161 | +    const uint b = (iqs % 64) / 32;            // 0,1 | 
|  | 162 | +    const uint is = (idx & 0xE0) >> 5;         // 0..7 | 
|  | 163 | +    const uint qsi = n * 32 + (iqs % 32);      // 0..127 | 
|  | 164 | + | 
|  | 165 | +    const f16vec2 loadd = bl.block.d; | 
|  | 166 | + | 
|  | 167 | +    uint32_t sc; | 
|  | 168 | +    uint32_t mbyte; | 
|  | 169 | + | 
|  | 170 | +    uint32_t scidx0 = (is < 4) ? is : (is + 4); | 
|  | 171 | +    uint32_t scidx1 = (is < 4) ? is : (is - 4); | 
|  | 172 | +    uint32_t scidxmask1 = (is < 4) ? 0x30 : 0xC0; | 
|  | 173 | +    uint32_t scidxshift1 = (is < 4) ? 0 : 2; | 
|  | 174 | +    uint32_t mbidx0 = is + 4; | 
|  | 175 | +    uint32_t mbidx1 = (is < 4) ? is + 4 : is; | 
|  | 176 | +    uint32_t mbidxmask0 = (is < 4) ? 0xF : 0xF0; | 
|  | 177 | +    uint32_t mbidxshift0 = (is < 4) ? 0 : 4; | 
|  | 178 | +    uint32_t mbidxmask1 = (is < 4) ? 0x30 : 0xC0; | 
|  | 179 | +    uint32_t mbidxshift1 = (is < 4) ? 0 : 2; | 
|  | 180 | + | 
|  | 181 | +    sc    = uint8_t((bl.block.scales[scidx0] & 0xF)                         | ((bl.block.scales[scidx1] & scidxmask1) >> scidxshift1)); | 
|  | 182 | +    mbyte = uint8_t(((bl.block.scales[mbidx0] & mbidxmask0) >> mbidxshift0) | ((bl.block.scales[mbidx1] & mbidxmask1) >> mbidxshift1)); | 
|  | 183 | + | 
|  | 184 | +    const float16_t d = loadd.x * float16_t(sc); | 
|  | 185 | +    const float16_t m = loadd.y * float16_t(mbyte); | 
|  | 186 | + | 
|  | 187 | +    uint32_t dmask = 0xF << (b * 4); | 
|  | 188 | + | 
|  | 189 | +    float16_t ret = d * float16_t((bl.block.qs[qsi    ] & dmask) >> (b * 4)) - m; | 
|  | 190 | + | 
|  | 191 | +    return ret; | 
|  | 192 | +} | 
|  | 193 | + | 
|  | 194 | +layout(buffer_reference, std430, buffer_reference_align = 16) buffer decodeBufQ5_K { | 
|  | 195 | +   block_q5_K block; | 
|  | 196 | +}; | 
|  | 197 | + | 
|  | 198 | +float16_t dequantFuncQ5_K(const in decodeBufQ5_K bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 199 | +{ | 
|  | 200 | +    const uint idx = coordInBlock[1]; | 
|  | 201 | +    const uint iqs = idx; | 
|  | 202 | + | 
|  | 203 | +    const uint n = iqs / 64;                   // 0,1,2,3 | 
|  | 204 | +    const uint b = (iqs % 64) / 32;            // 0,1 | 
|  | 205 | +    const uint is = (idx & 0xE0) >> 5;         // 0..7 | 
|  | 206 | +    const uint qsi = n * 32 + (iqs % 32);      // 0..127 | 
|  | 207 | +    const uint qhi = (iqs % 32);               // 0..31 | 
|  | 208 | + | 
|  | 209 | +    const uint8_t hm = uint8_t(1 << (iqs / 32)); | 
|  | 210 | + | 
|  | 211 | +    const f16vec2 loadd = bl.block.d; | 
|  | 212 | + | 
|  | 213 | +    uint32_t sc; | 
|  | 214 | +    uint32_t mbyte; | 
|  | 215 | + | 
|  | 216 | +    uint32_t scidx0 = (is < 4) ? is : (is + 4); | 
|  | 217 | +    uint32_t scidx1 = (is < 4) ? is : (is - 4); | 
|  | 218 | +    uint32_t scidxmask1 = (is < 4) ? 0x30 : 0xC0; | 
|  | 219 | +    uint32_t scidxshift1 = (is < 4) ? 0 : 2; | 
|  | 220 | +    uint32_t mbidx0 = is + 4; | 
|  | 221 | +    uint32_t mbidx1 = (is < 4) ? is + 4 : is; | 
|  | 222 | +    uint32_t mbidxmask0 = (is < 4) ? 0xF : 0xF0; | 
|  | 223 | +    uint32_t mbidxshift0 = (is < 4) ? 0 : 4; | 
|  | 224 | +    uint32_t mbidxmask1 = (is < 4) ? 0x30 : 0xC0; | 
|  | 225 | +    uint32_t mbidxshift1 = (is < 4) ? 0 : 2; | 
|  | 226 | + | 
|  | 227 | +    sc    = uint8_t((bl.block.scales[scidx0] & 0xF)                         | ((bl.block.scales[scidx1] & scidxmask1) >> scidxshift1)); | 
|  | 228 | +    mbyte = uint8_t(((bl.block.scales[mbidx0] & mbidxmask0) >> mbidxshift0) | ((bl.block.scales[mbidx1] & mbidxmask1) >> mbidxshift1)); | 
|  | 229 | + | 
|  | 230 | +    const float16_t d = loadd.x * float16_t(sc); | 
|  | 231 | +    const float16_t m = loadd.y * float16_t(mbyte); | 
|  | 232 | + | 
|  | 233 | +    uint32_t dmask = 0xF << (b * 4); | 
|  | 234 | + | 
|  | 235 | +    float16_t ret = d * (float16_t((bl.block.qs[qsi    ] & dmask) >> (b * 4)) + float16_t((bl.block.qh[qhi    ] & hm) != 0 ? 16 : 0)) - m; | 
|  | 236 | + | 
|  | 237 | +    return ret; | 
|  | 238 | +} | 
|  | 239 | + | 
|  | 240 | +layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ6_K { | 
|  | 241 | +   block_q6_K block; | 
|  | 242 | +}; | 
|  | 243 | + | 
|  | 244 | +float16_t dequantFuncQ6_K(const in decodeBufQ6_K bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 245 | +{ | 
|  | 246 | +    const uint idx = coordInBlock[1]; | 
|  | 247 | +    const uint iqs = idx; | 
|  | 248 | + | 
|  | 249 | +    const uint n = iqs / 128;                   // 0,1 | 
|  | 250 | +    const uint b = (iqs % 128) / 64;            // 0,1 | 
|  | 251 | +    const uint is_b = (iqs % 32) / 16;          // 0,1 | 
|  | 252 | +    const uint qhshift = ((iqs % 128) / 32) * 2;// 0,2,4,6 | 
|  | 253 | +    const uint is = 8 * n + qhshift + is_b;     // 0..15 | 
|  | 254 | +    const uint qsi = n * 64 + (iqs % 64);       // 0..127 | 
|  | 255 | +    const uint qhi = n * 32 + (iqs % 32);       // 0..63 | 
|  | 256 | + | 
|  | 257 | +    const float16_t dscale = bl.block.d * float16_t(bl.block.scales[is]); | 
|  | 258 | + | 
|  | 259 | +    float16_t ret = dscale * float16_t(int8_t(((bl.block.ql[qsi    ] >> (b * 4)) & 0xF) | (((bl.block.qh[qhi    ] >> qhshift) & 3) << 4)) - 32); | 
|  | 260 | + | 
|  | 261 | +    return ret; | 
|  | 262 | +} | 
|  | 263 | + | 
|  | 264 | +#if defined(DATA_A_IQ4_NL) | 
|  | 265 | +layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ4_NL { | 
|  | 266 | +   block_iq4_nl block; | 
|  | 267 | +}; | 
|  | 268 | + | 
|  | 269 | +float16_t dequantFuncIQ4_NL(const in decodeBufIQ4_NL bl, const in uint blockCoords[2], const in uint coordInBlock[2]) | 
|  | 270 | +{ | 
|  | 271 | +    const float16_t d = bl.block.d; | 
|  | 272 | +    const uint idx = coordInBlock[1]; | 
|  | 273 | +    const uint iqs = idx & 0xF; | 
|  | 274 | +    const uint shift = (idx & 0x10) >> 2; | 
|  | 275 | +    uint32_t qs = bl.block.qs[iqs]; | 
|  | 276 | +    qs >>= shift; | 
|  | 277 | +    qs &= 0xF; | 
|  | 278 | +    float16_t ret = float16_t(kvalues_iq4nl[qs]) * d; | 
|  | 279 | +    return ret; | 
|  | 280 | +} | 
|  | 281 | +#endif | 
|  | 282 | + | 
|  | 283 | +#if defined(DATA_A_Q4_0) | 
|  | 284 | +#define dequantFuncA dequantFuncQ4_0 | 
|  | 285 | +#elif defined(DATA_A_Q4_1) | 
|  | 286 | +#define dequantFuncA dequantFuncQ4_1 | 
|  | 287 | +#elif defined(DATA_A_Q5_0) | 
|  | 288 | +#define dequantFuncA dequantFuncQ5_0 | 
|  | 289 | +#elif defined(DATA_A_Q5_1) | 
|  | 290 | +#define dequantFuncA dequantFuncQ5_1 | 
|  | 291 | +#elif defined(DATA_A_Q8_0) | 
|  | 292 | +#define dequantFuncA dequantFuncQ8_0 | 
|  | 293 | +#elif defined(DATA_A_Q2_K) | 
|  | 294 | +#define dequantFuncA dequantFuncQ2_K | 
|  | 295 | +#elif defined(DATA_A_Q3_K) | 
|  | 296 | +#define dequantFuncA dequantFuncQ3_K | 
|  | 297 | +#elif defined(DATA_A_Q4_K) | 
|  | 298 | +#define dequantFuncA dequantFuncQ4_K | 
|  | 299 | +#elif defined(DATA_A_Q5_K) | 
|  | 300 | +#define dequantFuncA dequantFuncQ5_K | 
|  | 301 | +#elif defined(DATA_A_Q6_K) | 
|  | 302 | +#define dequantFuncA dequantFuncQ6_K | 
|  | 303 | +#elif defined(DATA_A_IQ4_NL) | 
|  | 304 | +#define dequantFuncA dequantFuncIQ4_NL | 
|  | 305 | +#endif | 
0 commit comments