Minor refactoring as per the contributors' guidelines

EAddario · EAddario · commit 35f45f19d1fe · 2025-03-19T23:10:04.000Z
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
@@ -7,7 +7,6 @@
 #include <string>
 #include <unordered_map>
 #include <fstream>
-#include <cmath>
 #include <cctype>
 
 struct quant_option {
@@ -16,7 +15,7 @@ struct quant_option {
     std::string desc;
 };
 
-static const std::vector<struct quant_option> QUANT_OPTIONS = {
+static const std::vector<quant_option> QUANT_OPTIONS = {
     { "Q4_0",     LLAMA_FTYPE_MOSTLY_Q4_0,     " 4.34G, +0.4685 ppl @ Llama-3-8B",  },
     { "Q4_1",     LLAMA_FTYPE_MOSTLY_Q4_1,     " 4.78G, +0.4511 ppl @ Llama-3-8B",  },
     { "Q5_0",     LLAMA_FTYPE_MOSTLY_Q5_0,     " 5.21G, +0.1316 ppl @ Llama-3-8B",  },
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
@@ -5,11 +5,9 @@
 #include "llama-model-loader.h"
 
 #include <algorithm>
-#include <cmath>
 #include <cstring>
 #include <cinttypes>
 #include <fstream>
-#include <mutex>
 #include <thread>
 #include <unordered_map>
 
@@ -48,7 +46,7 @@ struct quantize_state_impl {
 };
 
 static void llama_tensor_dequantize_impl(
-    struct ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
+    ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
     const size_t nelements, const int nthread
 ) {
     if (output.size() < nelements) {
@@ -536,7 +534,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
     model.load_hparams(ml);
     model.load_stats  (ml);
 
-    struct quantize_state_impl qs(model, params);
+    quantize_state_impl qs(model, params);
 
     if (params->only_copy) {
         ftype = ml.ftype;
@@ -661,7 +659,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
     // populate the original tensors so we get an initial meta data
     for (const auto * it : tensors) {
         uint16_t i_split = params->keep_split ? it->idx : 0;
-        struct ggml_tensor * tensor = it->tensor;
+        ggml_tensor * tensor = it->tensor;
         if (!ctx_outs[i_split]) {
             ctx_outs[i_split].reset(gguf_init_empty());
         }
@@ -710,7 +708,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
     new_ofstream(0);
     for (const auto * it : tensors) {
         const auto & weight = *it;
-        struct ggml_tensor * tensor = weight.tensor;
+        ggml_tensor * tensor = weight.tensor;
         if (weight.idx != cur_split && params->keep_split) {
             close_ofstream();
             new_ofstream(weight.idx);
@@ -776,7 +774,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
         // do not quantize relative position bias (T5)
         quantize &= name.find("attn_rel_b.weight") == std::string::npos;
 
-        enum ggml_type new_type;
+        ggml_type new_type;
         void * new_data;
         size_t new_size;
 
@@ -950,8 +948,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
 // interface implementation
 //
 
-struct llama_model_quantize_params llama_model_quantize_default_params() {
-    struct llama_model_quantize_params result = {
+llama_model_quantize_params llama_model_quantize_default_params() {
+    llama_model_quantize_params result = {
         /*.nthread                     =*/ 0,
         /*.ftype                       =*/ LLAMA_FTYPE_MOSTLY_Q5_1,
         /*.output_tensor_type          =*/ GGML_TYPE_COUNT,