Skip to content

Commit 3a3d807

Browse files
committed
Remove bias mode computation
1 parent c11184a commit 3a3d807

File tree

1 file changed

+7
-17
lines changed

1 file changed

+7
-17
lines changed

src/llama-quant.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -769,11 +769,9 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
769769
std::ofstream ofs(tmp, std::ios::binary | std::ios::trunc);
770770
if (!ofs) { return; } // best-effort
771771
const float target_bpw = params->target_bpw;
772-
const uint8_t bias_mode = params->no_bias ? 1 : 0;
773772
ofs.write((const char *)&file_magic, sizeof(file_magic));
774773
ofs.write((const char *)&model_id, sizeof(model_id));
775774
ofs.write((const char *)&target_bpw, sizeof(target_bpw));
776-
ofs.write((const char *)&bias_mode, sizeof(bias_mode));
777775
const uint64_t n = all_vec.size();
778776
ofs.write((const char *)&n, sizeof(n));
779777
for (const auto & ti : all_vec) {
@@ -814,11 +812,9 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
814812
uint32_t magic = 0;
815813
uint64_t id = 0;
816814
float bpw = 0.0f;
817-
uint8_t bias = 0;
818815
ifs.read((char *)&magic, sizeof(magic));
819816
ifs.read((char *)&id, sizeof(id));
820817
ifs.read((char *)&bpw, sizeof(bpw));
821-
ifs.read((char *)&bias, sizeof(bias));
822818
if (magic != file_magic) {
823819
LLAMA_LOG_WARN("%s: invalid resume file, ignoring: %s\n", func, checkpoint_file.c_str());
824820
return out;
@@ -828,9 +824,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
828824
} else if (bpw != params->target_bpw) {
829825
LLAMA_LOG_WARN("%s: target bpw of %f does not match %f, ignoring: %s\n", func, params->target_bpw, bpw, checkpoint_file.c_str());
830826
return out;
831-
} else if (bias != (params->no_bias ? 1 : 0)) {
832-
LLAMA_LOG_WARN("%s: bias mode does not match, ignoring: %s\n", func, checkpoint_file.c_str());
833-
return out;
834827
} else {
835828
LLAMA_LOG_INFO("%s: resuming tensor quantization\n", func);
836829
}
@@ -1319,13 +1312,11 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
13191312
std::vector<float> lambdas;
13201313
const float * values = values_sample.empty() ? nullptr : values_sample.data();
13211314
const float * activations = activations_sample.empty() ? nullptr : activations_sample.data();
1322-
if (!params->no_bias) {
1323-
double acc = 0.0;
1324-
int ns = 0;
1325-
lambdas = estimate_lambda(values, activations, n_per_row, ne2);
1326-
for (float l : lambdas) { acc += l; ++ns; }
1327-
tensor_lambda = ns ? (float)(acc / ns) : 0.0f;
1328-
}
1315+
double acc = 0.0;
1316+
int ns = 0;
1317+
lambdas = estimate_lambda(values, activations, n_per_row, ne2);
1318+
for (float l : lambdas) { acc += l; ++ns; }
1319+
tensor_lambda = ns ? (float)(acc / ns) : 0.0f;
13291320

13301321
// Evaluate candidates
13311322
std::vector<candidate_types> eval_candidates(compatible_candidates.size());
@@ -1925,11 +1916,10 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
19251916
if (params->target_bpw != -1.0f && !params->only_copy) {
19261917
if (params->imatrix) {
19271918
if (params->activations) {
1928-
LLAMA_LOG_INFO("%s: imatrix with activations provided, target bpw quantization will be more accurate - ",__func__);
1919+
LLAMA_LOG_INFO("%s: imatrix with activations provided, target bpw quantization will be more accurate\n",__func__);
19291920
} else {
1930-
LLAMA_LOG_WARN("%s: imatrix without activations provided, target bpw quantization will be less accurate - ", __func__);
1921+
LLAMA_LOG_WARN("%s: imatrix without activations provided, target bpw quantization will be less accurate\n", __func__);
19311922
}
1932-
LLAMA_LOG_INFO("using %s error estimation\n", params->no_bias ? "MSE only (no alignment bias)" : "alignment bias (default)");
19331923
LLAMA_LOG_INFO("%s: computing tensor quantization mix to achieve %.4f bpw\n", __func__, params->target_bpw);
19341924
bpw_overrides = target_bpw_type(ml, read_data, model, tensors, mapped, values_data, activations_data, params, nthread);
19351925
} else {

0 commit comments

Comments
 (0)