@@ -1202,13 +1202,26 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
12021202 .type_size = sizeof(block_iq1_m),
12031203 .is_quantized = true,
12041204 .to_float = (ggml_to_float_t) dequantize_row_iq1_m,
1205- .from_float = NULL ,
1206- .from_float_ref = NULL ,
1205+ .from_float = quantize_row_iq1_m ,
1206+ .from_float_ref = (ggml_from_float_t)quantize_row_iq1_m_ref ,
12071207 .vec_dot = ggml_vec_dot_iq1_m_q8_K,
12081208 .vec_dot_type = GGML_TYPE_Q8_K,
12091209 .nrows = 1,
12101210 .row_meta_size = 0,
12111211 },
1212+ [GGML_TYPE_IQ1_M_R4] = {
1213+ .type_name = "iq1_m_r4",
1214+ .blck_size = 32,
1215+ .type_size = sizeof(block_iq1_m_r4)/4,
1216+ .is_quantized = true,
1217+ .to_float = (ggml_to_float_t) dequantize_row_iq1_m_r4,
1218+ .from_float = quantize_row_iq1_m_r4,
1219+ .from_float_ref = (ggml_from_float_t)quantize_row_iq1_m_r4_ref,
1220+ .vec_dot = vec_dot_iq1_m_r4_q8_k,
1221+ .vec_dot_type = GGML_TYPE_Q8_0_X4,
1222+ .nrows = 1,
1223+ .row_meta_size = 2,
1224+ },
12121225 [GGML_TYPE_IQ1_BN] = {
12131226 .type_name = "iq1_bn",
12141227 .blck_size = QK_IQ1BN,
@@ -4401,6 +4414,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
44014414 case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
44024415 case GGML_FTYPE_MOSTLY_IQ2_S_R4: wtype = GGML_TYPE_IQ2_S_R4; break;
44034416 case GGML_FTYPE_MOSTLY_IQ1_S_R4: wtype = GGML_TYPE_IQ1_S_R4; break;
4417+ case GGML_FTYPE_MOSTLY_IQ1_M_R4: wtype = GGML_TYPE_IQ1_M_R4; break;
44044418 case GGML_FTYPE_MOSTLY_Q4_0_4_4: wtype = GGML_TYPE_Q4_0_4_4; break;
44054419 case GGML_FTYPE_MOSTLY_Q4_0_4_8: wtype = GGML_TYPE_Q4_0_4_8; break;
44064420 case GGML_FTYPE_MOSTLY_Q4_0_8_8: wtype = GGML_TYPE_Q4_0_8_8; break;
@@ -10949,6 +10963,7 @@ static void ggml_compute_forward_add(
1094910963 case GGML_TYPE_IQ2_S:
1095010964 case GGML_TYPE_IQ2_S_R4:
1095110965 case GGML_TYPE_IQ1_S_R4:
10966+ case GGML_TYPE_IQ1_M_R4:
1095210967 case GGML_TYPE_Q4_0_4_4:
1095310968 case GGML_TYPE_Q4_0_4_8:
1095410969 case GGML_TYPE_Q4_0_8_8:
@@ -11418,6 +11433,7 @@ static void ggml_compute_forward_add1(
1141811433 case GGML_TYPE_IQ2_S:
1141911434 case GGML_TYPE_IQ2_S_R4:
1142011435 case GGML_TYPE_IQ1_S_R4:
11436+ case GGML_TYPE_IQ1_M_R4:
1142111437 case GGML_TYPE_Q4_0_4_4:
1142211438 case GGML_TYPE_Q4_0_4_8:
1142311439 case GGML_TYPE_Q4_0_8_8:
@@ -11584,6 +11600,7 @@ static void ggml_compute_forward_acc(
1158411600 case GGML_TYPE_IQ2_S:
1158511601 case GGML_TYPE_IQ2_S_R4:
1158611602 case GGML_TYPE_IQ1_S_R4:
11603+ case GGML_TYPE_IQ1_M_R4:
1158711604 case GGML_TYPE_Q4_0_4_4:
1158811605 case GGML_TYPE_Q4_0_4_8:
1158911606 case GGML_TYPE_Q4_0_8_8:
@@ -14823,6 +14840,7 @@ static void ggml_compute_forward_out_prod(
1482314840 case GGML_TYPE_IQ2_S:
1482414841 case GGML_TYPE_IQ2_S_R4:
1482514842 case GGML_TYPE_IQ1_S_R4:
14843+ case GGML_TYPE_IQ1_M_R4:
1482614844 case GGML_TYPE_Q4_0_4_4:
1482714845 case GGML_TYPE_Q4_0_4_8:
1482814846 case GGML_TYPE_Q4_0_8_8:
@@ -15229,6 +15247,7 @@ static void ggml_compute_forward_set(
1522915247 case GGML_TYPE_IQ2_S:
1523015248 case GGML_TYPE_IQ2_S_R4:
1523115249 case GGML_TYPE_IQ1_S_R4:
15250+ case GGML_TYPE_IQ1_M_R4:
1523215251 case GGML_TYPE_Q4_0_4_4:
1523315252 case GGML_TYPE_Q4_0_4_8:
1523415253 case GGML_TYPE_Q4_0_8_8:
@@ -15529,6 +15548,7 @@ static void ggml_compute_forward_get_rows(
1552915548 case GGML_TYPE_IQ2_S:
1553015549 case GGML_TYPE_IQ2_S_R4:
1553115550 case GGML_TYPE_IQ1_S_R4:
15551+ case GGML_TYPE_IQ1_M_R4:
1553215552 case GGML_TYPE_Q4_0_4_4:
1553315553 case GGML_TYPE_Q4_0_4_8:
1553415554 case GGML_TYPE_Q4_0_8_8:
@@ -16158,6 +16178,7 @@ static void ggml_compute_forward_clamp(
1615816178 case GGML_TYPE_IQ2_S:
1615916179 case GGML_TYPE_IQ2_S_R4:
1616016180 case GGML_TYPE_IQ1_S_R4:
16181+ case GGML_TYPE_IQ1_M_R4:
1616116182 case GGML_TYPE_Q8_K:
1616216183 case GGML_TYPE_Q8_K64:
1616316184 case GGML_TYPE_Q8_K16:
@@ -22914,6 +22935,7 @@ void ggml_quantize_init(enum ggml_type type) {
2291422935 case GGML_TYPE_IQ2_S:
2291522936 case GGML_TYPE_IQ1_S:
2291622937 case GGML_TYPE_IQ1_M: iq2xs_init_impl(type); break;
22938+ case GGML_TYPE_IQ1_M_R4:iq2xs_init_impl(GGML_TYPE_IQ1_M); break;
2291722939 case GGML_TYPE_IQ1_S_R4:iq2xs_init_impl(GGML_TYPE_IQ1_S); break;
2291822940 case GGML_TYPE_IQ3_XXS_R4:
2291922941 case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
@@ -22998,6 +23020,7 @@ size_t ggml_quantize_chunk(
2299823020 case GGML_TYPE_IQ2_S: result = quantize_iq2_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2299923021 case GGML_TYPE_IQ2_S_R4:result = quantize_iq2_s_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2300023022 case GGML_TYPE_IQ1_S_R4:result = quantize_iq1_s_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23023+ case GGML_TYPE_IQ1_M_R4:result = quantize_iq1_m_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2300123024 case GGML_TYPE_IQ1_S: result = quantize_iq1_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2300223025 case GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2300323026 case GGML_TYPE_IQ1_BN: result = quantize_iq1_bn (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
0 commit comments