@@ -837,6 +837,32 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
837837 .from_float_ref = (ggml_from_float_t )quantize_row_iq4_xs_ref ,
838838 .row_meta_size = 0 ,
839839 },
840+ [GGML_TYPE_IQ1_BN ] = {
841+ .type_name = "iq1_bn" ,
842+ .blck_size = QK_IQ1BN ,
843+ .type_size = sizeof (block_iq1_bn ),
844+ .is_quantized = true,
845+ .to_float = (ggml_to_float_t ) dequantize_row_iq1_bn ,
846+ // .from_float = quantize_row_iq1_bn,
847+ .from_float_ref = (ggml_from_float_t )quantize_row_iq1_bn_ref ,
848+ // .vec_dot = ggml_vec_dot_iq1_bn_q8_K64,
849+ // .vec_dot_type = GGML_TYPE_Q8_K64,
850+ // .nrows = 1,
851+ .row_meta_size = 2 ,
852+ },
853+ [GGML_TYPE_IQ2_BN ] = {
854+ .type_name = "iq2_bn" ,
855+ .blck_size = QK_IQ1BN ,
856+ .type_size = sizeof (block_iq2_bn ),
857+ .is_quantized = true,
858+ .to_float = (ggml_to_float_t ) dequantize_row_iq2_bn ,
859+ // .from_float = quantize_row_iq2_bn,
860+ .from_float_ref = (ggml_from_float_t )quantize_row_iq2_bn_ref ,
861+ // .vec_dot = vec_dot_iq2_bn_q8_K64,
862+ // .vec_dot_type = GGML_TYPE_Q8_K64,
863+ // .nrows = 1,
864+ .row_meta_size = 4 ,
865+ },
840866 [GGML_TYPE_IQ4_KS ] = {
841867 .type_name = "iq4_ks" ,
842868 .blck_size = QK_K ,
@@ -1522,6 +1548,8 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
15221548 case GGML_FTYPE_MOSTLY_IQ3_XXS : wtype = GGML_TYPE_IQ3_XXS ; break ;
15231549 case GGML_FTYPE_MOSTLY_IQ1_S : wtype = GGML_TYPE_IQ1_S ; break ;
15241550 case GGML_FTYPE_MOSTLY_IQ1_M : wtype = GGML_TYPE_IQ1_M ; break ;
1551+ case GGML_FTYPE_MOSTLY_IQ1_BN : wtype = GGML_TYPE_IQ1_BN ; break ;
1552+ case GGML_FTYPE_MOSTLY_IQ2_BN : wtype = GGML_TYPE_IQ2_BN ; break ;
15251553 case GGML_FTYPE_MOSTLY_IQ4_NL : wtype = GGML_TYPE_IQ4_NL ; break ;
15261554 case GGML_FTYPE_MOSTLY_IQ4_XS : wtype = GGML_TYPE_IQ4_XS ; break ;
15271555 case GGML_FTYPE_MOSTLY_IQ4_KS : wtype = GGML_TYPE_IQ4_KS ; break ;
@@ -6899,6 +6927,8 @@ size_t ggml_quantize_chunk(
68996927 case GGML_TYPE_IQ2_S : result = quantize_iq2_s (src + start , (char * ) dst + start_row * row_size , nrows , n_per_row , imatrix ); break ;
69006928 case GGML_TYPE_IQ1_S : result = quantize_iq1_s (src + start , (char * ) dst + start_row * row_size , nrows , n_per_row , imatrix ); break ;
69016929 case GGML_TYPE_IQ1_M : result = quantize_iq1_m (src + start , (char * ) dst + start_row * row_size , nrows , n_per_row , imatrix ); break ;
6930+ case GGML_TYPE_IQ1_BN : result = quantize_iq1_bn (src + start , (char * ) dst + start_row * row_size , nrows , n_per_row , imatrix ); break ;
6931+ case GGML_TYPE_IQ2_BN : result = quantize_iq2_bn (src + start , (char * ) dst + start_row * row_size , nrows , n_per_row , imatrix ); break ;
69026932 case GGML_TYPE_IQ4_NL : result = quantize_iq4_nl (src + start , (char * ) dst + start_row * row_size , nrows , n_per_row , imatrix ); break ;
69036933 case GGML_TYPE_IQ4_XS : result = quantize_iq4_xs (src + start , (char * ) dst + start_row * row_size , nrows , n_per_row , imatrix ); break ;
69046934 case GGML_TYPE_IQ4_KS : result = quantize_iq4_ks (src + start , (char * ) dst + start_row * row_size , nrows , n_per_row , imatrix ); break ;
0 commit comments