@@ -87,13 +87,17 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
8787//
8888[[noreturn]]
8989static void usage (const char * executable) {
90- printf (" usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n " , executable);
90+ printf (" usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--override-kv] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n " , executable);
9191 printf (" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n " );
9292 printf (" --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n " );
9393 printf (" --pure: Disable k-quant mixtures and quantize all tensors to the same type\n " );
9494 printf (" --imatrix file_name: use data in file_name as importance matrix for quant optimizations\n " );
9595 printf (" --include-weights tensor_name: use importance matrix for this/these tensor(s)\n " );
9696 printf (" --exclude-weights tensor_name: use importance matrix for this/these tensor(s)\n " );
97+ printf (" --output-tensor-type ggml_type: use this ggml_type for the output.weight tensor\n " );
98+ printf (" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n " );
99+ printf (" --override-kv KEY=TYPE:VALUE\n " );
100+ printf (" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n " );
97101 printf (" Note: --include-weights and --exclude-weights cannot be used together\n " );
98102 printf (" \n Allowed quantization types:\n " );
99103 for (auto & it : QUANT_OPTIONS) {
@@ -107,14 +111,14 @@ static void usage(const char * executable) {
107111 exit (1 );
108112}
109113
110- static void load_imatrix (const std::string& imatrix_file, std::unordered_map<std::string, std::vector<float >>& imatrix_data) {
114+ static void load_imatrix (const std::string & imatrix_file, std::unordered_map<std::string, std::vector<float >> & imatrix_data) {
111115 std::ifstream in (imatrix_file.c_str (), std::ios::binary);
112116 if (!in) {
113- printf (" %s: failed to open %s\n " ,__func__,imatrix_file.c_str ());
117+ printf (" %s: failed to open %s\n " ,__func__, imatrix_file.c_str ());
114118 return ;
115119 }
116120 int n_entries;
117- in.read ((char *)&n_entries, sizeof (n_entries));
121+ in.read ((char *)&n_entries, sizeof (n_entries));
118122 if (in.fail () || n_entries < 1 ) {
119123 printf (" %s: no data in file %s\n " , __func__, imatrix_file.c_str ());
120124 return ;
@@ -124,39 +128,39 @@ static void load_imatrix(const std::string& imatrix_file, std::unordered_map<std
124128 std::vector<char > name_as_vec (len+1 );
125129 in.read ((char *)name_as_vec.data (), len);
126130 if (in.fail ()) {
127- printf (" %s: failed reading name for entry %d from %s\n " ,__func__,i+1 ,imatrix_file.c_str ());
131+ printf (" %s: failed reading name for entry %d from %s\n " , __func__, i+1 , imatrix_file.c_str ());
128132 return ;
129133 }
130134 name_as_vec[len] = 0 ;
131135 std::string name{name_as_vec.data ()};
132- auto & e = imatrix_data[std::move (name)];
136+ auto & e = imatrix_data[std::move (name)];
133137 int ncall;
134- in.read ((char *)&ncall, sizeof (ncall));
138+ in.read ((char *)&ncall, sizeof (ncall));
135139 int nval;
136140 in.read ((char *)&nval, sizeof (nval));
137141 if (in.fail () || nval < 1 ) {
138- printf (" %s: failed reading number of values for entry %d\n " ,__func__,i);
142+ printf (" %s: failed reading number of values for entry %d\n " , __func__, i);
139143 imatrix_data = {};
140144 return ;
141145 }
142146 e.resize (nval);
143- in.read ((char *)e.data (), nval*sizeof (float ));
147+ in.read ((char *)e.data (), nval*sizeof (float ));
144148 if (in.fail ()) {
145- printf (" %s: failed reading data for entry %d\n " ,__func__,i);
149+ printf (" %s: failed reading data for entry %d\n " , __func__, i);
146150 imatrix_data = {};
147151 return ;
148152 }
149153 if (ncall > 0 ) {
150154 for (auto & v : e) v /= ncall;
151155 }
152156 }
153- printf (" %s: loaded %d importance matrix entries from %s\n " ,__func__,int (imatrix_data.size ()),imatrix_file.c_str ());
157+ printf (" %s: loaded %d importance matrix entries from %s\n " , __func__, int (imatrix_data.size ()), imatrix_file.c_str ());
154158}
155159
156- static void prepare_imatrix (const std::string& imatrix_file,
157- const std::vector<std::string>& included_weights,
158- const std::vector<std::string>& excluded_weights,
159- std::unordered_map<std::string, std::vector<float >>& imatrix_data) {
160+ static void prepare_imatrix (const std::string & imatrix_file,
161+ const std::vector<std::string> & included_weights,
162+ const std::vector<std::string> & excluded_weights,
163+ std::unordered_map<std::string, std::vector<float >> & imatrix_data) {
160164 if (!imatrix_file.empty ()) {
161165 load_imatrix (imatrix_file, imatrix_data);
162166 }
@@ -201,6 +205,43 @@ static ggml_type parse_ggml_type(const char * arg) {
201205 return result;
202206}
203207
208+ static bool parse_kv_override (const char * data, std::vector<llama_model_kv_override> & overrides) {
209+ const char * sep = strchr (data, ' =' );
210+ if (sep == nullptr || sep - data >= 128 ) {
211+ fprintf (stderr, " %s: malformed KV override '%s'\n " , __func__, data);
212+ return false ;
213+ }
214+ llama_model_kv_override kvo;
215+ std::strncpy (kvo.key , data, sep - data);
216+ kvo.key [sep - data] = 0 ;
217+ sep++;
218+ if (strncmp (sep, " int:" , 4 ) == 0 ) {
219+ sep += 4 ;
220+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
221+ kvo.int_value = std::atol (sep);
222+ } else if (strncmp (sep, " float:" , 6 ) == 0 ) {
223+ sep += 6 ;
224+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
225+ kvo.float_value = std::atof (sep);
226+ } else if (strncmp (sep, " bool:" , 5 ) == 0 ) {
227+ sep += 5 ;
228+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
229+ if (std::strcmp (sep, " true" ) == 0 ) {
230+ kvo.bool_value = true ;
231+ } else if (std::strcmp (sep, " false" ) == 0 ) {
232+ kvo.bool_value = false ;
233+ } else {
234+ fprintf (stderr, " %s: invalid boolean value for KV override '%s'\n " , __func__, data);
235+ return false ;
236+ }
237+ } else {
238+ fprintf (stderr, " %s: invalid type for KV override '%s'\n " , __func__, data);
239+ return false ;
240+ }
241+ overrides.emplace_back (std::move (kvo));
242+ return true ;
243+ }
244+
204245int main (int argc, char ** argv) {
205246 if (argc < 3 ) {
206247 usage (argv[0 ]);
@@ -211,6 +252,7 @@ int main(int argc, char ** argv) {
211252 int arg_idx = 1 ;
212253 std::string imatrix_file;
213254 std::vector<std::string> included_weights, excluded_weights;
255+ std::vector<llama_model_kv_override> kv_overrides;
214256
215257 for (; arg_idx < argc && strncmp (argv[arg_idx], " --" , 2 ) == 0 ; arg_idx++) {
216258 if (strcmp (argv[arg_idx], " --leave-output-tensor" ) == 0 ) {
@@ -227,6 +269,10 @@ int main(int argc, char ** argv) {
227269 } else {
228270 usage (argv[0 ]);
229271 }
272+ } else if (strcmp (argv[arg_idx], " --override-kv" ) == 0 ) {
273+ if (arg_idx == argc-1 || !parse_kv_override (argv[++arg_idx], kv_overrides)) {
274+ usage (argv[0 ]);
275+ }
230276 } else if (strcmp (argv[arg_idx], " --allow-requantize" ) == 0 ) {
231277 params.allow_requantize = true ;
232278 } else if (strcmp (argv[arg_idx], " --pure" ) == 0 ) {
@@ -267,6 +313,11 @@ int main(int argc, char ** argv) {
267313 if (!imatrix_data.empty ()) {
268314 params.imatrix = &imatrix_data;
269315 }
316+ if (!kv_overrides.empty ()) {
317+ kv_overrides.emplace_back ();
318+ kv_overrides.back ().key [0 ] = 0 ;
319+ params.kv_overrides = &kv_overrides;
320+ }
270321
271322 llama_backend_init ();
272323
@@ -288,8 +339,7 @@ int main(int argc, char ** argv) {
288339 if (ftype_str == " COPY" ) {
289340 params.only_copy = true ;
290341 }
291- }
292- else {
342+ } else {
293343 fname_out = argv[arg_idx];
294344 arg_idx++;
295345
0 commit comments