@@ -1862,7 +1862,6 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
18621862
18631863 std::vector<uint8_t > work (512 );
18641864 std::vector<float > conv_buf (512 );
1865- std::vector<int64_t > hist_all (1 << 4 , 0 );
18661865 size_t total_size_org = 0 ;
18671866 size_t total_size_new = 0 ;
18681867
@@ -1917,48 +1916,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
19171916 }
19181917 new_data = work.data ();
19191918
1920- std::vector<int64_t > hist_cur (1 << 4 , 0 );
1921-
1922- switch (new_type) {
1923- case GGML_TYPE_Q4_0: {
1924- new_size = ggml_quantize_q4_0 (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1925- } break ;
1926- case GGML_TYPE_Q4_1: {
1927- new_size = ggml_quantize_q4_1 (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1928- } break ;
1929- case GGML_TYPE_Q5_0: {
1930- new_size = ggml_quantize_q5_0 (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1931- } break ;
1932- case GGML_TYPE_Q5_1: {
1933- new_size = ggml_quantize_q5_1 (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1934- } break ;
1935- case GGML_TYPE_Q8_0: {
1936- new_size = ggml_quantize_q8_0 (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1937- } break ;
1938- case GGML_TYPE_Q2_K: {
1939- new_size = ggml_quantize_q2_K (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1940- } break ;
1941- case GGML_TYPE_Q3_K: {
1942- new_size = ggml_quantize_q3_K (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1943- } break ;
1944- case GGML_TYPE_Q4_K: {
1945- new_size = ggml_quantize_q4_K (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1946- } break ;
1947- case GGML_TYPE_Q5_K: {
1948- new_size = ggml_quantize_q5_K (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1949- } break ;
1950- case GGML_TYPE_Q6_K: {
1951- new_size = ggml_quantize_q6_K (f32_data, new_data, n_elms, cur->ne [0 ], hist_cur.data ());
1952- } break ;
1953- default : {
1954- fprintf (stderr, " %s: unsupported quantization type %d\n " , __func__, new_type);
1955- return false ;
1956- }
1957- }
1958-
1959- for (size_t j = 0 ; j < hist_cur.size (); ++j) {
1960- hist_all[j] += hist_cur[j];
1961- }
1919+ new_size = ggml_quantize_chunk (new_type, f32_data, new_data, 0 , n_elms/cur->ne [0 ], cur->ne [0 ], nullptr );
19621920 } else {
19631921 new_type = cur->type ;
19641922 new_data = cur->data ;
@@ -1993,17 +1951,6 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
19931951 {
19941952 printf (" %s: original size = %8.2f MB\n " , __func__, total_size_org / 1024.0 / 1024.0 );
19951953 printf (" %s: quantized size = %8.2f MB\n " , __func__, total_size_new / 1024.0 / 1024.0 );
1996-
1997- int64_t sum_all = 0 ;
1998- for (size_t i = 0 ; i < hist_all.size (); ++i) {
1999- sum_all += hist_all[i];
2000- }
2001-
2002- printf (" %s: hist: " , __func__);
2003- for (size_t i = 0 ; i < hist_all.size (); ++i) {
2004- printf (" %5.3f " , hist_all[i] / (float )sum_all);
2005- }
2006- printf (" \n " );
20071954 }
20081955
20091956 return true ;
0 commit comments