@@ -1606,28 +1606,28 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16061606 .nrows = 1,
16071607 .row_meta_size = 0,
16081608 },
1609- [GGML_TYPE_IQ4_XS_R4 ] = {
1610- .type_name = "iq4_xs_r4 ",
1609+ [GGML_TYPE_IQ4_XS_R8 ] = {
1610+ .type_name = "iq4_xs_r8 ",
16111611 .blck_size = QK_K,
16121612 .type_size = sizeof(block_iq4_xs),
16131613 .is_quantized = true,
1614- .to_float = (ggml_to_float_t) dequantize_row_iq4_xs_r4 ,
1615- .from_float = quantize_row_iq4_xs_r4 ,
1616- .from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_r4_ref ,
1617- .vec_dot = vec_dot_iq4_xs_r4_q8_k ,
1614+ .to_float = (ggml_to_float_t) dequantize_row_iq4_xs_r8 ,
1615+ .from_float = quantize_row_iq4_xs_r8 ,
1616+ .from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_r8_ref ,
1617+ .vec_dot = vec_dot_iq4_xs_r8_q8_k ,
16181618 .vec_dot_type = GGML_TYPE_Q8_K32,
16191619 .nrows = 1,
16201620 .row_meta_size = 0,
16211621 },
1622- [GGML_TYPE_Q4_0_R4 ] = {
1623- .type_name = "q4_0_r4 ",
1622+ [GGML_TYPE_Q4_0_R8 ] = {
1623+ .type_name = "q4_0_r8 ",
16241624 .blck_size = QK4_NL,
16251625 .type_size = sizeof(block_iq4_nl),
16261626 .is_quantized = true,
1627- .to_float = (ggml_to_float_t) dequantize_row_q4_0_r4 ,
1628- .from_float = quantize_row_q4_0_r4 ,
1629- .from_float_ref = (ggml_from_float_t)quantize_row_q4_0_r4_ref ,
1630- .vec_dot = vec_dot_q4_0_r4_q8_0 ,
1627+ .to_float = (ggml_to_float_t) dequantize_row_q4_0_r8 ,
1628+ .from_float = quantize_row_q4_0_r8 ,
1629+ .from_float_ref = (ggml_from_float_t)quantize_row_q4_0_r8_ref ,
1630+ .vec_dot = vec_dot_q4_0_r8_q8_0 ,
16311631#if GGML_USE_IQK_MULMAT
16321632#if defined __AVX2__
16331633 .vec_dot_type = GGML_TYPE_Q8_1_X4,
@@ -1640,15 +1640,15 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16401640 .nrows = 1,
16411641 .row_meta_size = 0,
16421642 },
1643- [GGML_TYPE_Q8_0_R4 ] = {
1644- .type_name = "q8_0_r4 ",
1643+ [GGML_TYPE_Q8_0_R8 ] = {
1644+ .type_name = "q8_0_r8 ",
16451645 .blck_size = QK8_0,
16461646 .type_size = sizeof(block_q8_0),
16471647 .is_quantized = true,
1648- .to_float = (ggml_to_float_t) dequantize_row_q8_0_r4 ,
1649- .from_float = quantize_row_q8_0_r4 ,
1650- .from_float_ref = (ggml_from_float_t)quantize_row_q8_0_r4_ref ,
1651- .vec_dot = vec_dot_q8_0_r4_q8_0 ,
1648+ .to_float = (ggml_to_float_t) dequantize_row_q8_0_r8 ,
1649+ .from_float = quantize_row_q8_0_r8 ,
1650+ .from_float_ref = (ggml_from_float_t)quantize_row_q8_0_r8_ref ,
1651+ .vec_dot = vec_dot_q8_0_r8_q8_0 ,
16521652#if GGML_USE_IQK_MULMAT
16531653#if defined __AVX2__
16541654 .vec_dot_type = GGML_TYPE_Q8_1_X4,
@@ -4390,11 +4390,11 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
43904390 case GGML_FTYPE_MOSTLY_IQ2_BN_R4: wtype = GGML_TYPE_IQ2_BN_R4;break;
43914391 case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
43924392 case GGML_FTYPE_MOSTLY_IQ4_NL_R4: wtype = GGML_TYPE_IQ4_NL_R4;break;
4393- case GGML_FTYPE_MOSTLY_IQ4_XS_R4 : wtype = GGML_TYPE_IQ4_XS_R4 ;break;
4394- case GGML_FTYPE_MOSTLY_Q4_0_R4 : wtype = GGML_TYPE_Q4_0_R4 ; break;
4393+ case GGML_FTYPE_MOSTLY_IQ4_XS_R8 : wtype = GGML_TYPE_IQ4_XS_R8 ;break;
4394+ case GGML_FTYPE_MOSTLY_Q4_0_R8 : wtype = GGML_TYPE_Q4_0_R8 ; break;
43954395 case GGML_FTYPE_MOSTLY_Q5_0_R4: wtype = GGML_TYPE_Q5_0_R4; break;
43964396 case GGML_FTYPE_MOSTLY_Q6_0_R4: wtype = GGML_TYPE_Q6_0_R4; break;
4397- case GGML_FTYPE_MOSTLY_Q8_0_R4 : wtype = GGML_TYPE_Q8_0_R4 ; break;
4397+ case GGML_FTYPE_MOSTLY_Q8_0_R8 : wtype = GGML_TYPE_Q8_0_R8 ; break;
43984398 case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
43994399 case GGML_FTYPE_MOSTLY_IQ4_KS: wtype = GGML_TYPE_IQ4_KS; break;
44004400 case GGML_FTYPE_MOSTLY_IQ4_KS_R4: wtype = GGML_TYPE_IQ4_KS_R4;break;
@@ -10938,12 +10938,12 @@ static void ggml_compute_forward_add(
1093810938 case GGML_TYPE_IQ2_BN_R4:
1093910939 case GGML_TYPE_IQ4_NL:
1094010940 case GGML_TYPE_IQ4_NL_R4:
10941- case GGML_TYPE_IQ4_XS_R4 :
10942- case GGML_TYPE_Q4_0_R4 :
10941+ case GGML_TYPE_IQ4_XS_R8 :
10942+ case GGML_TYPE_Q4_0_R8 :
1094310943 case GGML_TYPE_Q5_0_R4:
1094410944 case GGML_TYPE_Q6_0_R4:
1094510945 case GGML_TYPE_I2_S:
10946- case GGML_TYPE_Q8_0_R4 :
10946+ case GGML_TYPE_Q8_0_R8 :
1094710947 case GGML_TYPE_IQ4_XS:
1094810948 case GGML_TYPE_IQ4_KS:
1094910949 case GGML_TYPE_IQ4_KS_R4:
@@ -11408,12 +11408,12 @@ static void ggml_compute_forward_add1(
1140811408 case GGML_TYPE_IQ2_BN_R4:
1140911409 case GGML_TYPE_IQ4_NL:
1141011410 case GGML_TYPE_IQ4_NL_R4:
11411- case GGML_TYPE_IQ4_XS_R4 :
11412- case GGML_TYPE_Q4_0_R4 :
11411+ case GGML_TYPE_IQ4_XS_R8 :
11412+ case GGML_TYPE_Q4_0_R8 :
1141311413 case GGML_TYPE_Q5_0_R4:
1141411414 case GGML_TYPE_Q6_0_R4:
1141511415 case GGML_TYPE_I2_S:
11416- case GGML_TYPE_Q8_0_R4 :
11416+ case GGML_TYPE_Q8_0_R8 :
1141711417 case GGML_TYPE_IQ4_XS:
1141811418 case GGML_TYPE_IQ4_KS:
1141911419 case GGML_TYPE_IQ4_KS_R4:
@@ -11575,12 +11575,12 @@ static void ggml_compute_forward_acc(
1157511575 case GGML_TYPE_IQ2_BN_R4:
1157611576 case GGML_TYPE_IQ4_NL:
1157711577 case GGML_TYPE_IQ4_NL_R4:
11578- case GGML_TYPE_IQ4_XS_R4 :
11579- case GGML_TYPE_Q4_0_R4 :
11578+ case GGML_TYPE_IQ4_XS_R8 :
11579+ case GGML_TYPE_Q4_0_R8 :
1158011580 case GGML_TYPE_Q5_0_R4:
1158111581 case GGML_TYPE_Q6_0_R4:
1158211582 case GGML_TYPE_I2_S:
11583- case GGML_TYPE_Q8_0_R4 :
11583+ case GGML_TYPE_Q8_0_R8 :
1158411584 case GGML_TYPE_IQ4_XS:
1158511585 case GGML_TYPE_IQ4_KS:
1158611586 case GGML_TYPE_IQ4_KS_R4:
@@ -14815,12 +14815,12 @@ static void ggml_compute_forward_out_prod(
1481514815 case GGML_TYPE_IQ2_BN_R4:
1481614816 case GGML_TYPE_IQ4_NL:
1481714817 case GGML_TYPE_IQ4_NL_R4:
14818- case GGML_TYPE_IQ4_XS_R4 :
14819- case GGML_TYPE_Q4_0_R4 :
14818+ case GGML_TYPE_IQ4_XS_R8 :
14819+ case GGML_TYPE_Q4_0_R8 :
1482014820 case GGML_TYPE_Q5_0_R4:
1482114821 case GGML_TYPE_Q6_0_R4:
1482214822 case GGML_TYPE_I2_S:
14823- case GGML_TYPE_Q8_0_R4 :
14823+ case GGML_TYPE_Q8_0_R8 :
1482414824 case GGML_TYPE_IQ4_XS:
1482514825 case GGML_TYPE_IQ4_KS:
1482614826 case GGML_TYPE_IQ4_KS_R4:
@@ -15222,12 +15222,12 @@ static void ggml_compute_forward_set(
1522215222 case GGML_TYPE_IQ2_BN_R4:
1522315223 case GGML_TYPE_IQ4_NL:
1522415224 case GGML_TYPE_IQ4_NL_R4:
15225- case GGML_TYPE_IQ4_XS_R4 :
15226- case GGML_TYPE_Q4_0_R4 :
15225+ case GGML_TYPE_IQ4_XS_R8 :
15226+ case GGML_TYPE_Q4_0_R8 :
1522715227 case GGML_TYPE_Q5_0_R4:
1522815228 case GGML_TYPE_Q6_0_R4:
1522915229 case GGML_TYPE_I2_S:
15230- case GGML_TYPE_Q8_0_R4 :
15230+ case GGML_TYPE_Q8_0_R8 :
1523115231 case GGML_TYPE_IQ4_XS:
1523215232 case GGML_TYPE_IQ4_KS:
1523315233 case GGML_TYPE_IQ4_KS_R4:
@@ -15523,12 +15523,12 @@ static void ggml_compute_forward_get_rows(
1552315523 case GGML_TYPE_IQ2_BN_R4:
1552415524 case GGML_TYPE_IQ4_NL:
1552515525 case GGML_TYPE_IQ4_NL_R4:
15526- case GGML_TYPE_IQ4_XS_R4 :
15527- case GGML_TYPE_Q4_0_R4 :
15526+ case GGML_TYPE_IQ4_XS_R8 :
15527+ case GGML_TYPE_Q4_0_R8 :
1552815528 case GGML_TYPE_Q5_0_R4:
1552915529 case GGML_TYPE_Q6_0_R4:
1553015530 case GGML_TYPE_I2_S:
15531- case GGML_TYPE_Q8_0_R4 :
15531+ case GGML_TYPE_Q8_0_R8 :
1553215532 case GGML_TYPE_IQ4_XS:
1553315533 case GGML_TYPE_IQ4_KS:
1553415534 case GGML_TYPE_IQ4_KS_R4:
@@ -16153,12 +16153,12 @@ static void ggml_compute_forward_clamp(
1615316153 case GGML_TYPE_IQ2_BN_R4:
1615416154 case GGML_TYPE_IQ4_NL:
1615516155 case GGML_TYPE_IQ4_NL_R4:
16156- case GGML_TYPE_IQ4_XS_R4 :
16157- case GGML_TYPE_Q4_0_R4 :
16156+ case GGML_TYPE_IQ4_XS_R8 :
16157+ case GGML_TYPE_Q4_0_R8 :
1615816158 case GGML_TYPE_Q5_0_R4:
1615916159 case GGML_TYPE_Q6_0_R4:
1616016160 case GGML_TYPE_I2_S:
16161- case GGML_TYPE_Q8_0_R4 :
16161+ case GGML_TYPE_Q8_0_R8 :
1616216162 case GGML_TYPE_IQ4_XS:
1616316163 case GGML_TYPE_IQ4_KS:
1616416164 case GGML_TYPE_IQ4_KS_R4:
@@ -23028,11 +23028,11 @@ size_t ggml_quantize_chunk(
2302823028 case GGML_TYPE_IQ2_BN_R4:result = quantize_iq2_bn_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2302923029 case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303023030 case GGML_TYPE_IQ4_NL_R4: result = quantize_iq4_nl_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23031- case GGML_TYPE_IQ4_XS_R4 : result = quantize_iq4_xs_r4 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23032- case GGML_TYPE_Q4_0_R4 : result = quantize_q4_0_r4 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23031+ case GGML_TYPE_IQ4_XS_R8 : result = quantize_iq4_xs_r8 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23032+ case GGML_TYPE_Q4_0_R8 : result = quantize_q4_0_r8 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303323033 case GGML_TYPE_Q5_0_R4: result = quantize_q5_0_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303423034 case GGML_TYPE_Q6_0_R4: result = quantize_q6_0_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23035- case GGML_TYPE_Q8_0_R4 : result = quantize_q8_0_r4 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23035+ case GGML_TYPE_Q8_0_R8 : result = quantize_q8_0_r8 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303623036 case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303723037 case GGML_TYPE_IQ4_KS: result = quantize_iq4_ks (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303823038 case GGML_TYPE_IQ4_KS_R4:result = quantize_iq4_ks_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
0 commit comments