|
| 1 | +// Defines CLOCK_MONOTONIC on Linux |
| 2 | +#define _POSIX_C_SOURCE 199309L |
| 3 | + |
1 | 4 | #include "ggml.h" |
2 | 5 |
|
3 | 6 | #if defined(_MSC_VER) || defined(__MINGW32__) |
@@ -400,16 +403,63 @@ static inline __m128i packNibbles( __m256i bytes ) |
400 | 403 | // method 5 |
401 | 404 | // blocks of QK elements |
402 | 405 | // represented with a single float (delta) and QK/2 8-bit ints (i.e QK 4-bit signed integer factors) |
| 406 | + |
| 407 | +// reference implementation for deterministic creation of model files |
| 408 | +static void quantize_row_q4_0_reference(const float * restrict x, void * restrict y, int k) { |
| 409 | + assert(k % QK == 0); |
| 410 | + const int nb = k / QK; |
| 411 | + |
| 412 | + const size_t bs = sizeof(float) + QK/2; |
| 413 | + |
| 414 | + uint8_t * restrict pd = ((uint8_t *)y + 0*bs); |
| 415 | + uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float)); |
| 416 | + |
| 417 | + uint8_t pp[QK/2]; |
| 418 | + |
| 419 | + for (int i = 0; i < nb; i++) { |
| 420 | + float amax = 0.0f; // absolute max |
| 421 | + |
| 422 | + for (int l = 0; l < QK; l++) { |
| 423 | + const float v = x[i*QK + l]; |
| 424 | + amax = MAX(amax, fabsf(v)); |
| 425 | + } |
| 426 | + |
| 427 | + const float d = amax / ((1 << 3) - 1); |
| 428 | + const float id = d ? 1.0f/d : 0.0f; |
| 429 | + |
| 430 | + *(float *)pd = d; |
| 431 | + pd += bs; |
| 432 | + |
| 433 | + for (int l = 0; l < QK; l += 2) { |
| 434 | + const float v0 = x[i*QK + l + 0]*id; |
| 435 | + const float v1 = x[i*QK + l + 1]*id; |
| 436 | + |
| 437 | + const uint8_t vi0 = ((int8_t) (round(v0))) + 8; |
| 438 | + const uint8_t vi1 = ((int8_t) (round(v1))) + 8; |
| 439 | + |
| 440 | + assert(vi0 >= 0 && vi0 < 16); |
| 441 | + assert(vi1 >= 0 && vi1 < 16); |
| 442 | + |
| 443 | + pp[l/2] = vi0 | (vi1 << 4); |
| 444 | + } |
| 445 | + |
| 446 | + memcpy(pb, pp, sizeof(pp)); |
| 447 | + pb += bs; |
| 448 | + } |
| 449 | +} |
| 450 | + |
403 | 451 | void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { |
404 | 452 | assert(k % QK == 0); |
405 | 453 |
|
| 454 | +#if __ARM_NEON || defined(__AVX2__) || defined(__wasm_simd128__) |
406 | 455 | const int nb = k / QK; |
407 | 456 | const size_t bs = sizeof(float) + QK/2; |
408 | 457 |
|
409 | 458 | uint8_t * restrict pd = ((uint8_t *)y + 0*bs); |
410 | 459 | uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float)); |
411 | 460 |
|
412 | 461 | uint8_t pp[QK/2]; |
| 462 | +#endif |
413 | 463 |
|
414 | 464 | #if __ARM_NEON |
415 | 465 | #if QK == 32 |
@@ -566,36 +616,7 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { |
566 | 616 | #endif |
567 | 617 | #else |
568 | 618 | // scalar |
569 | | - for (int i = 0; i < nb; i++) { |
570 | | - float amax = 0.0f; // absolute max |
571 | | - |
572 | | - for (int l = 0; l < QK; l++) { |
573 | | - const float v = x[i*QK + l]; |
574 | | - amax = MAX(amax, fabsf(v)); |
575 | | - } |
576 | | - |
577 | | - const float d = amax / ((1 << 3) - 1); |
578 | | - const float id = d ? 1.0f/d : 0.0f; |
579 | | - |
580 | | - *(float *)pd = d; |
581 | | - pd += bs; |
582 | | - |
583 | | - for (int l = 0; l < QK; l += 2) { |
584 | | - const float v0 = x[i*QK + l + 0]*id; |
585 | | - const float v1 = x[i*QK + l + 1]*id; |
586 | | - |
587 | | - const uint8_t vi0 = ((int8_t) (round(v0))) + 8; |
588 | | - const uint8_t vi1 = ((int8_t) (round(v1))) + 8; |
589 | | - |
590 | | - assert(vi0 >= 0 && vi0 < 16); |
591 | | - assert(vi1 >= 0 && vi1 < 16); |
592 | | - |
593 | | - pp[l/2] = vi0 | (vi1 << 4); |
594 | | - } |
595 | | - |
596 | | - memcpy(pb, pp, sizeof(pp)); |
597 | | - pb += bs; |
598 | | - } |
| 619 | + quantize_row_q4_0_reference(x, y, k); |
599 | 620 | #endif |
600 | 621 | } |
601 | 622 |
|
@@ -10702,119 +10723,60 @@ enum ggml_opt_result ggml_opt( |
10702 | 10723 |
|
10703 | 10724 | //////////////////////////////////////////////////////////////////////////////// |
10704 | 10725 |
|
10705 | | -size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist) { |
| 10726 | +size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int qk, int64_t * hist) { |
10706 | 10727 | const int nb = k / qk; |
10707 | 10728 | const size_t bs = (sizeof(float) + sizeof(uint8_t)*qk/2); |
10708 | 10729 | const size_t row_size = nb*bs; |
10709 | 10730 |
|
10710 | 10731 | assert(k % qk == 0); |
10711 | 10732 |
|
10712 | | - const size_t pp_size = qk / 2; |
10713 | | - uint8_t * pp = (uint8_t *) alloca(pp_size); |
10714 | | - |
10715 | 10733 | char * pdst = (char *) dst; |
10716 | 10734 |
|
10717 | 10735 | for (int j = 0; j < n; j += k) { |
10718 | 10736 | uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); |
10719 | 10737 | uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); |
10720 | 10738 |
|
10721 | | - for (int i = 0; i < nb; i++) { |
10722 | | - float amax = 0.0f; // absolute max |
10723 | | - |
10724 | | - { |
10725 | | - for (int l = 0; l < qk; l++) { |
10726 | | - const float v = src[j + i*qk + l]; |
10727 | | - amax = MAX(amax, fabsf(v)); |
10728 | | - } |
10729 | | - |
10730 | | - const float d = amax / ((1 << 3) - 1); |
10731 | | - const float id = d ? 1.0f/d : 0.0f; |
10732 | | - |
10733 | | - *(float *) pd = d; |
10734 | | - pd += bs; |
| 10739 | + quantize_row_q4_0_reference(src + j, pd, k); |
10735 | 10740 |
|
10736 | | - for (int l = 0; l < qk; l += 2) { |
10737 | | - const float v0 = (src[j + i*qk + l + 0])*id; |
10738 | | - const float v1 = (src[j + i*qk + l + 1])*id; |
10739 | | - |
10740 | | - const uint8_t vi0 = ((int8_t) (round(v0))) + 8; |
10741 | | - const uint8_t vi1 = ((int8_t) (round(v1))) + 8; |
10742 | | - |
10743 | | - assert(vi0 >= 0 && vi0 < 16); |
10744 | | - assert(vi1 >= 0 && vi1 < 16); |
10745 | | - |
10746 | | - hist[vi0]++; |
10747 | | - hist[vi1]++; |
10748 | | - |
10749 | | - pp[l/2] = vi0 | (vi1 << 4); |
10750 | | - } |
| 10741 | + for (int i = 0; i < nb; i++) { |
| 10742 | + for (int l = 0; l < qk; l += 2) { |
| 10743 | + const uint8_t vi0 = pb[l/2] & 0xF; |
| 10744 | + const uint8_t vi1 = pb[l/2] >> 4; |
10751 | 10745 |
|
10752 | | - memcpy(pb, pp, pp_size); |
10753 | | - pb += bs; |
| 10746 | + hist[vi0]++; |
| 10747 | + hist[vi1]++; |
10754 | 10748 | } |
| 10749 | + pb += bs; |
10755 | 10750 | } |
10756 | 10751 | } |
10757 | 10752 |
|
10758 | 10753 | return (n/k)*row_size; |
10759 | 10754 | } |
10760 | 10755 |
|
10761 | | -size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist) { |
| 10756 | +size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int qk, int64_t * hist) { |
10762 | 10757 | const int nb = k / qk; |
10763 | 10758 | const size_t bs = (2*sizeof(float) + sizeof(uint8_t)*qk/2); |
10764 | 10759 | const size_t row_size = nb*bs; |
10765 | 10760 |
|
10766 | 10761 | assert(k % qk == 0); |
10767 | 10762 |
|
10768 | | - const size_t pp_size = qk / 2; |
10769 | | - uint8_t * pp = (uint8_t *) alloca(pp_size); |
10770 | | - |
10771 | 10763 | char * pdst = (char *) dst; |
10772 | 10764 |
|
10773 | 10765 | for (int j = 0; j < n; j += k) { |
10774 | 10766 | uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); |
10775 | | - uint8_t * pm = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); |
10776 | 10767 | uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + 2*sizeof(float)); |
10777 | 10768 |
|
10778 | | - //printf("n = %d, k = %d, nb = %d, row_size = %d, j = %d, pm = %p, pd = %p, pb = %p\n", n, k, nb, row_size, j, pm, pd, pb); |
| 10769 | + quantize_row_q4_1(src + j, pd, k); |
10779 | 10770 |
|
10780 | 10771 | for (int i = 0; i < nb; i++) { |
10781 | | - float min = FLT_MAX; |
10782 | | - float max = -FLT_MAX; |
10783 | | - |
10784 | | - { |
10785 | | - for (int l = 0; l < qk; l++) { |
10786 | | - const float v = src[j + i*qk + l]; |
10787 | | - if (v < min) min = v; |
10788 | | - if (v > max) max = v; |
10789 | | - } |
10790 | | - |
10791 | | - const float d = (max - min) / ((1 << 4) - 1); |
10792 | | - const float id = d ? 1.0f/d : 0.0f; |
10793 | | - |
10794 | | - *(float *) pd = d; |
10795 | | - *(float *) pm = min; |
10796 | | - pd += bs; |
10797 | | - pm += bs; |
10798 | | - |
10799 | | - for (int l = 0; l < qk; l += 2) { |
10800 | | - const float v0 = (src[j + i*qk + l + 0] - min)*id; |
10801 | | - const float v1 = (src[j + i*qk + l + 1] - min)*id; |
10802 | | - |
10803 | | - const uint8_t vi0 = round(v0); |
10804 | | - const uint8_t vi1 = round(v1); |
10805 | | - |
10806 | | - assert(vi0 >= 0 && vi0 < 16); |
10807 | | - assert(vi1 >= 0 && vi1 < 16); |
10808 | | - |
10809 | | - hist[vi0]++; |
10810 | | - hist[vi1]++; |
10811 | | - |
10812 | | - pp[l/2] = vi0 | (vi1 << 4); |
10813 | | - } |
| 10772 | + for (int l = 0; l < qk; l += 2) { |
| 10773 | + const uint8_t vi0 = pb[l/2] & 0xF; |
| 10774 | + const uint8_t vi1 = pb[l/2] >> 4; |
10814 | 10775 |
|
10815 | | - memcpy(pb, pp, pp_size); |
10816 | | - pb += bs; |
| 10776 | + hist[vi0]++; |
| 10777 | + hist[vi1]++; |
10817 | 10778 | } |
| 10779 | + pb += bs; |
10818 | 10780 | } |
10819 | 10781 | } |
10820 | 10782 |
|
|
0 commit comments