1010
1111static bool g_verbose = false ;
1212
13+ struct tensor_transformation {
14+ struct ggml_tensor * in;
15+ struct ggml_tensor * out;
16+ bool is_copy;
17+ };
18+
1319static std::string get_kv_str (struct gguf_context * ctx_gguf, const std::string & key){
1420 int id = gguf_find_key (ctx_gguf, key.c_str ());
1521 return id < 0 ? " " : std::string (gguf_get_val_str (ctx_gguf, id));
@@ -198,8 +204,7 @@ struct lora_merge_ctx {
198204 }
199205
200206 // mapping base tensor to out tensor (same shape with base, but different type)
201- // if out_tensor == nullptr, we only copy it
202- std::vector<std::pair<struct ggml_tensor *, struct ggml_tensor *>> base_to_out_tensors;
207+ std::vector<tensor_transformation> trans;
203208 for (auto & it : base_model.tensors ) {
204209 bool t_a = true ;
205210 bool t_b = true ;
@@ -212,14 +217,22 @@ struct lora_merge_ctx {
212217 // only copy
213218 struct ggml_tensor * cpy_tensor = ggml_dup_tensor (ctx_out_ggml, base_tensor);
214219 ggml_set_name (cpy_tensor, base_tensor->name );
215- base_to_out_tensors.push_back (std::make_pair (cpy_tensor, nullptr ));
220+ trans.push_back ({
221+ cpy_tensor,
222+ cpy_tensor,
223+ true ,
224+ });
216225 gguf_add_tensor (ctx_out, cpy_tensor);
217226 } else if (t_a && t_b) {
218227 // need merging
219228 struct ggml_tensor * out_tensor = ggml_new_tensor (
220229 ctx_out_ggml, get_out_tensor_type (base_tensor), GGML_MAX_DIMS, base_tensor->ne );
221230 ggml_set_name (out_tensor, base_tensor->name );
222- base_to_out_tensors.push_back (std::make_pair (base_tensor, out_tensor));
231+ trans.push_back ({
232+ base_tensor,
233+ out_tensor,
234+ false ,
235+ });
223236 gguf_add_tensor (ctx_out, out_tensor);
224237 } else {
225238 throw std::runtime_error (" tensor " + it.first + " missing either lora_a or lora_b" );
@@ -234,12 +247,12 @@ struct lora_merge_ctx {
234247
235248 // process base model tensors
236249 size_t n_merged = 0 ;
237- for (auto & it : base_to_out_tensors ) {
238- if (it.second != nullptr ) {
239- merge_tensor (it.first , it.second );
250+ for (auto & it : trans ) {
251+ if (! it.is_copy ) {
252+ merge_tensor (it.in , it.out );
240253 n_merged++;
241254 } else {
242- copy_tensor (it.first );
255+ copy_tensor (it.in );
243256 }
244257 }
245258
@@ -252,7 +265,7 @@ struct lora_merge_ctx {
252265 }
253266
254267 printf (" %s : merged %ld tensors with lora adapters\n " , __func__, n_merged);
255- printf (" %s : wrote %ld tensors to output file\n " , __func__, base_to_out_tensors .size ());
268+ printf (" %s : wrote %ld tensors to output file\n " , __func__, trans .size ());
256269 }
257270
258271 void copy_tensor (struct ggml_tensor * base) {
@@ -285,6 +298,10 @@ struct lora_merge_ctx {
285298 for (size_t i = 0 ; i < adapters.size (); ++i) {
286299 auto t_a = adapters[i]->get_tensor (name_lora_a);
287300 auto t_b = adapters[i]->get_tensor (name_lora_b);
301+ // TODO: add support for quantized lora
302+ if (ggml_is_quantized (t_a->type ) || ggml_is_quantized (t_b->type )) {
303+ throw std::runtime_error (" quantized LoRA adapters is not supported, please retry with f16 or f32" );
304+ }
288305 inp_a[i] = ggml_dup_tensor (ctx, t_a);
289306 inp_b[i] = ggml_dup_tensor (ctx, t_b);
290307 }
0 commit comments