@@ -694,6 +694,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
694694 constexpr double epsilon = 1e-12 ;
695695 constexpr double infinity = std::numeric_limits<double >::infinity ();
696696 constexpr uint32_t file_magic = 0x42505731 ; // BPW1
697+ constexpr uint64_t arbitrary_magic = 0xeabada55cafed00d ;
697698 const char * func = __func__;
698699
699700 auto tensor_bytes = [](const ggml_tensor * t, const ggml_type typ) -> size_t {
@@ -731,7 +732,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
731732
732733 auto make_compatible = [&](const ggml_tensor * t, const ggml_type typ) -> ggml_type {
733734 if (is_compatible (t, typ)) { return typ; }
734- ggml_type fb = fallback_type (typ);
735+ const ggml_type fb = fallback_type (typ);
735736 return is_compatible (t, fb) ? fb : GGML_TYPE_F16;
736737 };
737738
@@ -754,7 +755,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
754755 for (size_t i = 0 ; i < n; ++i) {
755756 h = (h << 5 ) + h + data[i];
756757 }
757- return h ? h : 0xeabada55cafed00d ;
758+ return h ? h : arbitrary_magic ;
758759 };
759760
760761 auto metadata_id = [&](const gguf_context * ctx) -> uint64_t {
@@ -795,7 +796,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
795796 ofs.write ((const char *)&n, sizeof (n));
796797 for (const auto & ti : all_vec) {
797798 const std::string name = ggml_get_name (ti.w ->tensor );
798- const uint32_t len = (uint32_t )name.size ();
799+ const auto len = (uint32_t )name.size ();
799800 ofs.write ((const char *)&len, sizeof (len));
800801 ofs.write (name.data (), len);
801802
@@ -835,13 +836,14 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
835836 if (magic != file_magic) {
836837 LLAMA_LOG_WARN (" %s: invalid resume file, ignoring: %s\n " , func, checkpoint_file.c_str ());
837838 return out;
838- } else if (id != model_id) {
839+ }
840+ if (id != model_id) {
839841 LLAMA_LOG_WARN (" %s: model ID mismatch, ignoring: %s\n " , func, checkpoint_file.c_str ());
840842 return out;
841- } else {
842- LLAMA_LOG_INFO (" %s: state file found, resuming tensor quantization\n " , func);
843843 }
844844
845+ LLAMA_LOG_INFO (" %s: state file found, resuming tensor quantization\n " , func);
846+
845847 uint64_t n = 0 ;
846848 ifs.read ((char *)&n, sizeof (n));
847849 for (uint64_t i = 0 ; i < n; ++i) {
@@ -862,15 +864,15 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
862864 si.n_elements = (size_t )ne;
863865
864866 si.candidate .resize (cn);
865- for (size_t j = 0 ; j < si.candidate . size (); ++j ) {
867+ for (auto & s : si.candidate ) {
866868 int32_t t = 0 ;
867869 uint64_t b = 0 ;
868870 ifs.read ((char *)&t, sizeof (t));
869- si. candidate [j] .type = (ggml_type)t;
870- ifs.read ((char *)&si. candidate [j]. bpw , sizeof (si. candidate [j] .bpw ));
871+ s .type = (ggml_type)t;
872+ ifs.read ((char *)&s. bpw , sizeof (s .bpw ));
871873 ifs.read ((char *)&b, sizeof (b));
872- si. candidate [j] .bytes = (size_t )b;
873- ifs.read ((char *)&si. candidate [j]. error , sizeof (si. candidate [j] .error ));
874+ s .bytes = (size_t )b;
875+ ifs.read ((char *)&s. error , sizeof (s .error ));
874876 }
875877
876878 out.emplace (std::move (name), std::move (si));
@@ -886,7 +888,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
886888 LLAMA_LOG_INFO (" %s: deleting %s\n " , func, checkpoint_file.c_str ());
887889 std::remove (checkpoint_file.c_str ());
888890 }
889-
890891 };
891892
892893 auto check_signal_handler = [&](const std::vector<tensor_info> & all_vec) {
@@ -1198,10 +1199,10 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
11981199 // Compute rows based on tensor shape and slice count
11991200 auto sample_rows = [](const int64_t n, const int64_t rows, const int64_t n2, const bool has_acts) -> int64_t {
12001201 const double tensor_budget = has_acts ? 1 * 1024 * 1024 : 0.5 * 1024 * 1024 ;
1201- const double scale_rows = std::clamp (std::sqrt (std::max (1.0 , (double )rows) / 4096.0 ), 0.5 , 2.0 ); // favour more rows for large nrt
1202+ const double scale_rows = std::clamp (std::sqrt (std::max (1.0 , (double )rows) / 4096.0 ), 0.5 , 2.0 ); // favour more rows for large tensors
12021203 const double slice_budget = tensor_budget * scale_rows / std::max<int64_t >(1 , n2);
12031204 const int64_t min_rows = has_acts ? 128 : 64 ;
1204- const int64_t max_rows = 4096 ;
1205+ constexpr int64_t max_rows = 4096 ; // row limit to avoid excessive memory use
12051206 int64_t total_rows = std::llround (slice_budget / std::max<int64_t >(1 , n));
12061207 total_rows = std::max<int64_t >(min_rows, std::min<int64_t >(total_rows, std::min<int64_t >(rows, max_rows)));
12071208 if (rows <= min_rows * 2 ) { total_rows = rows; }
@@ -1246,7 +1247,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
12461247 f32_sample.clear ();
12471248 std::vector<float > row_buffer (n_per_row);
12481249 for (int64_t slice = 0 ; slice < ne2; ++slice) {
1249- std::mt19937 rng (std::hash<std::string>{}(name) ^ 0xeabada55cafed00d ^ slice);
1250+ std::mt19937 rng (std::hash<std::string>{}(name) ^ arbitrary_magic ^ slice);
12501251 const int64_t rows_sample_max = std::max<int64_t >(1 , std::min<int64_t >(nrows_total, rows_sample_per_expert));
12511252 const int64_t stride = std::max<int64_t >(1 , nrows_total / rows_sample_max);
12521253 int64_t offset = 0 ;
@@ -1411,8 +1412,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
14111412 if (c.bytes == 0 ) { continue ; }
14121413 const double final_err = bias_needed ? c.error : c.mse ;
14131414 info.candidate .push_back (candidate_types{ c.type , c.bpw , c.bytes , final_err, c.mse , c.proj });
1414- // LLAMA_LOG_INFO("\t%s: %35s \t%10s \t%1.4f bpw \t%10zu bytes \t mse: %1.8e \t err: %1.8e\n",
1415- // func, name.c_str(), ggml_type_name(c.type), c.bpw, c.bytes, c.mse, final_err);
14161415 }
14171416
14181417 if (info.candidate .empty ()) {
@@ -1445,16 +1444,15 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
14451444 if (candidates.size () < 3 ) { return ; } // need at least 3 points to do convex hull
14461445
14471446 // Convex hull (lower envelope)
1447+ auto cross_product = [](const candidate_types & h0, const candidate_types & h1, const candidate_types & p) -> double {
1448+ const double dx1 = (double )h1.bytes - (double )h0.bytes ;
1449+ const double dy1 = h1.error - h0.error ;
1450+ const double dx2 = (double )p.bytes - (double )h0.bytes ;
1451+ const double dy2 = p.error - h0.error ;
1452+ return dx1 * dy2 - dx2 * dy1;
1453+ };
14481454 std::vector<candidate_types> hull; hull.reserve (candidates.size ());
14491455 for (const auto & c : candidates) {
1450- auto cross_product = [](const candidate_types & h0, const candidate_types & h1, const candidate_types & p) -> double {
1451- const double dx1 = (double )h1.bytes - (double )h0.bytes ;
1452- const double dy1 = h1.error - h0.error ;
1453- const double dx2 = (double )p.bytes - (double )h0.bytes ;
1454- const double dy2 = p.error - h0.error ;
1455- return dx1 * dy2 - dx2 * dy1;
1456- };
1457-
14581456 while (hull.size () >= 2 ) {
14591457 if (cross_product (hull[hull.size () - 2 ], hull[hull.size () - 1 ], c) <= epsilon) {
14601458 hull.pop_back ();
0 commit comments