Skip to content

Commit f8863b9

Browse files
committed
Minor refactoring
1 parent 5303212 commit f8863b9

File tree

1 file changed

+23
-25
lines changed

1 file changed

+23
-25
lines changed

src/llama-quant.cpp

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
694694
constexpr double epsilon = 1e-12;
695695
constexpr double infinity = std::numeric_limits<double>::infinity();
696696
constexpr uint32_t file_magic = 0x42505731; // BPW1
697+
constexpr uint64_t arbitrary_magic = 0xeabada55cafed00d;
697698
const char * func = __func__;
698699

699700
auto tensor_bytes = [](const ggml_tensor * t, const ggml_type typ) -> size_t {
@@ -731,7 +732,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
731732

732733
auto make_compatible = [&](const ggml_tensor * t, const ggml_type typ) -> ggml_type {
733734
if (is_compatible(t, typ)) { return typ; }
734-
ggml_type fb = fallback_type(typ);
735+
const ggml_type fb = fallback_type(typ);
735736
return is_compatible(t, fb) ? fb : GGML_TYPE_F16;
736737
};
737738

@@ -754,7 +755,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
754755
for (size_t i = 0; i < n; ++i) {
755756
h = (h << 5) + h + data[i];
756757
}
757-
return h ? h : 0xeabada55cafed00d;
758+
return h ? h : arbitrary_magic;
758759
};
759760

760761
auto metadata_id = [&](const gguf_context * ctx) -> uint64_t {
@@ -795,7 +796,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
795796
ofs.write((const char *)&n, sizeof(n));
796797
for (const auto & ti : all_vec) {
797798
const std::string name = ggml_get_name(ti.w->tensor);
798-
const uint32_t len = (uint32_t)name.size();
799+
const auto len = (uint32_t)name.size();
799800
ofs.write((const char *)&len, sizeof(len));
800801
ofs.write(name.data(), len);
801802

@@ -835,13 +836,14 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
835836
if (magic != file_magic) {
836837
LLAMA_LOG_WARN("%s: invalid resume file, ignoring: %s\n", func, checkpoint_file.c_str());
837838
return out;
838-
} else if (id != model_id) {
839+
}
840+
if (id != model_id) {
839841
LLAMA_LOG_WARN("%s: model ID mismatch, ignoring: %s\n", func, checkpoint_file.c_str());
840842
return out;
841-
} else {
842-
LLAMA_LOG_INFO("%s: state file found, resuming tensor quantization\n", func);
843843
}
844844

845+
LLAMA_LOG_INFO("%s: state file found, resuming tensor quantization\n", func);
846+
845847
uint64_t n = 0;
846848
ifs.read((char *)&n, sizeof(n));
847849
for (uint64_t i = 0; i < n; ++i) {
@@ -862,15 +864,15 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
862864
si.n_elements = (size_t)ne;
863865

864866
si.candidate.resize(cn);
865-
for (size_t j = 0; j < si.candidate.size(); ++j) {
867+
for (auto & s : si.candidate) {
866868
int32_t t = 0;
867869
uint64_t b = 0;
868870
ifs.read((char *)&t, sizeof(t));
869-
si.candidate[j].type = (ggml_type)t;
870-
ifs.read((char *)&si.candidate[j].bpw, sizeof(si.candidate[j].bpw));
871+
s.type = (ggml_type)t;
872+
ifs.read((char *)&s.bpw, sizeof(s.bpw));
871873
ifs.read((char *)&b, sizeof(b));
872-
si.candidate[j].bytes = (size_t)b;
873-
ifs.read((char *)&si.candidate[j].error, sizeof(si.candidate[j].error));
874+
s.bytes = (size_t)b;
875+
ifs.read((char *)&s.error, sizeof(s.error));
874876
}
875877

876878
out.emplace(std::move(name), std::move(si));
@@ -886,7 +888,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
886888
LLAMA_LOG_INFO("%s: deleting %s\n", func, checkpoint_file.c_str());
887889
std::remove(checkpoint_file.c_str());
888890
}
889-
890891
};
891892

892893
auto check_signal_handler = [&](const std::vector<tensor_info> & all_vec) {
@@ -1198,10 +1199,10 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
11981199
// Compute rows based on tensor shape and slice count
11991200
auto sample_rows = [](const int64_t n, const int64_t rows, const int64_t n2, const bool has_acts) -> int64_t {
12001201
const double tensor_budget = has_acts ? 1 * 1024 * 1024 : 0.5 * 1024 * 1024;
1201-
const double scale_rows = std::clamp(std::sqrt(std::max(1.0, (double)rows) / 4096.0), 0.5, 2.0); // favour more rows for large nrt
1202+
const double scale_rows = std::clamp(std::sqrt(std::max(1.0, (double)rows) / 4096.0), 0.5, 2.0); // favour more rows for large tensors
12021203
const double slice_budget = tensor_budget * scale_rows / std::max<int64_t>(1, n2);
12031204
const int64_t min_rows = has_acts ? 128 : 64;
1204-
const int64_t max_rows = 4096;
1205+
constexpr int64_t max_rows = 4096; // row limit to avoid excessive memory use
12051206
int64_t total_rows = std::llround(slice_budget / std::max<int64_t>(1, n));
12061207
total_rows = std::max<int64_t>(min_rows, std::min<int64_t>(total_rows, std::min<int64_t>(rows, max_rows)));
12071208
if (rows <= min_rows * 2) { total_rows = rows; }
@@ -1246,7 +1247,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
12461247
f32_sample.clear();
12471248
std::vector<float> row_buffer(n_per_row);
12481249
for (int64_t slice = 0; slice < ne2; ++slice) {
1249-
std::mt19937 rng(std::hash<std::string>{}(name) ^ 0xeabada55cafed00d ^ slice);
1250+
std::mt19937 rng(std::hash<std::string>{}(name) ^ arbitrary_magic ^ slice);
12501251
const int64_t rows_sample_max = std::max<int64_t>(1, std::min<int64_t>(nrows_total, rows_sample_per_expert));
12511252
const int64_t stride = std::max<int64_t>(1, nrows_total / rows_sample_max);
12521253
int64_t offset = 0;
@@ -1411,8 +1412,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
14111412
if (c.bytes == 0) { continue; }
14121413
const double final_err = bias_needed ? c.error : c.mse;
14131414
info.candidate.push_back(candidate_types{ c.type, c.bpw, c.bytes, final_err, c.mse, c.proj });
1414-
// LLAMA_LOG_INFO("\t%s: %35s \t%10s \t%1.4f bpw \t%10zu bytes \t mse: %1.8e \t err: %1.8e\n",
1415-
// func, name.c_str(), ggml_type_name(c.type), c.bpw, c.bytes, c.mse, final_err);
14161415
}
14171416

14181417
if (info.candidate.empty()) {
@@ -1445,16 +1444,15 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
14451444
if (candidates.size() < 3) { return; } // need at least 3 points to do convex hull
14461445

14471446
// Convex hull (lower envelope)
1447+
auto cross_product = [](const candidate_types & h0, const candidate_types & h1, const candidate_types & p) -> double {
1448+
const double dx1 = (double)h1.bytes - (double)h0.bytes;
1449+
const double dy1 = h1.error - h0.error;
1450+
const double dx2 = (double)p.bytes - (double)h0.bytes;
1451+
const double dy2 = p.error - h0.error;
1452+
return dx1 * dy2 - dx2 * dy1;
1453+
};
14481454
std::vector<candidate_types> hull; hull.reserve(candidates.size());
14491455
for (const auto & c : candidates) {
1450-
auto cross_product = [](const candidate_types & h0, const candidate_types & h1, const candidate_types & p) -> double {
1451-
const double dx1 = (double)h1.bytes - (double)h0.bytes;
1452-
const double dy1 = h1.error - h0.error;
1453-
const double dx2 = (double)p.bytes - (double)h0.bytes;
1454-
const double dy2 = p.error - h0.error;
1455-
return dx1 * dy2 - dx2 * dy1;
1456-
};
1457-
14581456
while (hull.size() >= 2) {
14591457
if (cross_product(hull[hull.size() - 2], hull[hull.size() - 1], c) <= epsilon) {
14601458
hull.pop_back();

0 commit comments

Comments
 (0)