@@ -769,11 +769,9 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
769769 std::ofstream ofs (tmp, std::ios::binary | std::ios::trunc);
770770 if (!ofs) { return ; } // best-effort
771771 const float target_bpw = params->target_bpw ;
772- const uint8_t bias_mode = params->no_bias ? 1 : 0 ;
773772 ofs.write ((const char *)&file_magic, sizeof (file_magic));
774773 ofs.write ((const char *)&model_id, sizeof (model_id));
775774 ofs.write ((const char *)&target_bpw, sizeof (target_bpw));
776- ofs.write ((const char *)&bias_mode, sizeof (bias_mode));
777775 const uint64_t n = all_vec.size ();
778776 ofs.write ((const char *)&n, sizeof (n));
779777 for (const auto & ti : all_vec) {
@@ -814,11 +812,9 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
814812 uint32_t magic = 0 ;
815813 uint64_t id = 0 ;
816814 float bpw = 0 .0f ;
817- uint8_t bias = 0 ;
818815 ifs.read ((char *)&magic, sizeof (magic));
819816 ifs.read ((char *)&id, sizeof (id));
820817 ifs.read ((char *)&bpw, sizeof (bpw));
821- ifs.read ((char *)&bias, sizeof (bias));
822818 if (magic != file_magic) {
823819 LLAMA_LOG_WARN (" %s: invalid resume file, ignoring: %s\n " , func, checkpoint_file.c_str ());
824820 return out;
@@ -828,9 +824,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
828824 } else if (bpw != params->target_bpw ) {
829825 LLAMA_LOG_WARN (" %s: target bpw of %f does not match %f, ignoring: %s\n " , func, params->target_bpw , bpw, checkpoint_file.c_str ());
830826 return out;
831- } else if (bias != (params->no_bias ? 1 : 0 )) {
832- LLAMA_LOG_WARN (" %s: bias mode does not match, ignoring: %s\n " , func, checkpoint_file.c_str ());
833- return out;
834827 } else {
835828 LLAMA_LOG_INFO (" %s: resuming tensor quantization\n " , func);
836829 }
@@ -1319,13 +1312,11 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
13191312 std::vector<float > lambdas;
13201313 const float * values = values_sample.empty () ? nullptr : values_sample.data ();
13211314 const float * activations = activations_sample.empty () ? nullptr : activations_sample.data ();
1322- if (!params->no_bias ) {
1323- double acc = 0.0 ;
1324- int ns = 0 ;
1325- lambdas = estimate_lambda (values, activations, n_per_row, ne2);
1326- for (float l : lambdas) { acc += l; ++ns; }
1327- tensor_lambda = ns ? (float )(acc / ns) : 0 .0f ;
1328- }
1315+ double acc = 0.0 ;
1316+ int ns = 0 ;
1317+ lambdas = estimate_lambda (values, activations, n_per_row, ne2);
1318+ for (float l : lambdas) { acc += l; ++ns; }
1319+ tensor_lambda = ns ? (float )(acc / ns) : 0 .0f ;
13291320
13301321 // Evaluate candidates
13311322 std::vector<candidate_types> eval_candidates (compatible_candidates.size ());
@@ -1925,11 +1916,10 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
19251916 if (params->target_bpw != -1 .0f && !params->only_copy ) {
19261917 if (params->imatrix ) {
19271918 if (params->activations ) {
1928- LLAMA_LOG_INFO (" %s: imatrix with activations provided, target bpw quantization will be more accurate - " ,__func__);
1919+ LLAMA_LOG_INFO (" %s: imatrix with activations provided, target bpw quantization will be more accurate\n " ,__func__);
19291920 } else {
1930- LLAMA_LOG_WARN (" %s: imatrix without activations provided, target bpw quantization will be less accurate - " , __func__);
1921+ LLAMA_LOG_WARN (" %s: imatrix without activations provided, target bpw quantization will be less accurate\n " , __func__);
19311922 }
1932- LLAMA_LOG_INFO (" using %s error estimation\n " , params->no_bias ? " MSE only (no alignment bias)" : " alignment bias (default)" );
19331923 LLAMA_LOG_INFO (" %s: computing tensor quantization mix to achieve %.4f bpw\n " , __func__, params->target_bpw );
19341924 bpw_overrides = target_bpw_type (ml, read_data, model, tensors, mapped, values_data, activations_data, params, nthread);
19351925 } else {
0 commit comments