Skip to content

Commit 35f45f1

Browse files
committed
Minor refactoring as per the contributors' guidelines
1 parent 9b3ccb5 commit 35f45f1

File tree

2 files changed

+8
-11
lines changed

2 files changed

+8
-11
lines changed

examples/quantize/quantize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include <string>
88
#include <unordered_map>
99
#include <fstream>
10-
#include <cmath>
1110
#include <cctype>
1211

1312
struct quant_option {
@@ -16,7 +15,7 @@ struct quant_option {
1615
std::string desc;
1716
};
1817

19-
static const std::vector<struct quant_option> QUANT_OPTIONS = {
18+
static const std::vector<quant_option> QUANT_OPTIONS = {
2019
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B", },
2120
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B", },
2221
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B", },

src/llama-quant.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,9 @@
55
#include "llama-model-loader.h"
66

77
#include <algorithm>
8-
#include <cmath>
98
#include <cstring>
109
#include <cinttypes>
1110
#include <fstream>
12-
#include <mutex>
1311
#include <thread>
1412
#include <unordered_map>
1513

@@ -48,7 +46,7 @@ struct quantize_state_impl {
4846
};
4947

5048
static void llama_tensor_dequantize_impl(
51-
struct ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
49+
ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
5250
const size_t nelements, const int nthread
5351
) {
5452
if (output.size() < nelements) {
@@ -536,7 +534,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
536534
model.load_hparams(ml);
537535
model.load_stats (ml);
538536

539-
struct quantize_state_impl qs(model, params);
537+
quantize_state_impl qs(model, params);
540538

541539
if (params->only_copy) {
542540
ftype = ml.ftype;
@@ -661,7 +659,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
661659
// populate the original tensors so we get an initial meta data
662660
for (const auto * it : tensors) {
663661
uint16_t i_split = params->keep_split ? it->idx : 0;
664-
struct ggml_tensor * tensor = it->tensor;
662+
ggml_tensor * tensor = it->tensor;
665663
if (!ctx_outs[i_split]) {
666664
ctx_outs[i_split].reset(gguf_init_empty());
667665
}
@@ -710,7 +708,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
710708
new_ofstream(0);
711709
for (const auto * it : tensors) {
712710
const auto & weight = *it;
713-
struct ggml_tensor * tensor = weight.tensor;
711+
ggml_tensor * tensor = weight.tensor;
714712
if (weight.idx != cur_split && params->keep_split) {
715713
close_ofstream();
716714
new_ofstream(weight.idx);
@@ -776,7 +774,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
776774
// do not quantize relative position bias (T5)
777775
quantize &= name.find("attn_rel_b.weight") == std::string::npos;
778776

779-
enum ggml_type new_type;
777+
ggml_type new_type;
780778
void * new_data;
781779
size_t new_size;
782780

@@ -950,8 +948,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
950948
// interface implementation
951949
//
952950

953-
struct llama_model_quantize_params llama_model_quantize_default_params() {
954-
struct llama_model_quantize_params result = {
951+
llama_model_quantize_params llama_model_quantize_default_params() {
952+
llama_model_quantize_params result = {
955953
/*.nthread =*/ 0,
956954
/*.ftype =*/ LLAMA_FTYPE_MOSTLY_Q5_1,
957955
/*.output_tensor_type =*/ GGML_TYPE_COUNT,

0 commit comments

Comments
 (0)