@@ -1720,6 +1720,16 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
17201720 ggml_backend_buffer_type_t first_moved_from_buft = nullptr;
17211721 ggml_backend_buffer_type_t first_moved_to_buft = nullptr;
17221722
1723+ auto add_lora_tensors = [&](const std::string & lora_name, const std::string & tensor_name) -> void {
1724+ std::string base_name = tensor_name.substr(0, tensor_name.size() - 6);
1725+
1726+ ggml_tensor * lora_a = ml.get_tensor_meta((base_name + "<" + lora_name + ">lora_a").c_str());
1727+ ggml_tensor * lora_b = ml.get_tensor_meta((base_name + "<" + lora_name + ">lora_b").c_str());
1728+ loras[lora_name]->ab_map[tensor_name] = llama_adapter_lora_weight(lora_a, lora_b);
1729+
1730+ ml.n_created += 2;
1731+ };
1732+
17231733 auto create_tensor = [&](const LLM_TN_IMPL & tn, const std::initializer_list<int64_t> & ne, int flags) -> ggml_tensor * {
17241734 ggml_tensor * t_meta = ml.get_tensor_meta(tn.str().c_str());
17251735
@@ -2246,6 +2256,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
22462256 case LLM_ARCH_NOMIC_BERT_MOE:
22472257 case LLM_ARCH_JINA_BERT_V3:
22482258 {
2259+ std::vector<std::string> lora_names;
2260+
22492261 tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
22502262 type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, TENSOR_NOT_REQUIRED);
22512263
@@ -2262,6 +2274,31 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
22622274 tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
22632275 tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}, 0);
22642276
2277+ if (arch == LLM_ARCH_JINA_BERT_V3) {
2278+ float lora_alpha = 1.0f;
2279+ std::vector<std::string> lora_prompt_prefixes;
2280+
2281+ ml.get_key(LLM_KV_ADAPTER_LORA_ALPHA, lora_alpha, false);
2282+ ml.get_arr(LLM_KV_ADAPTER_LORA_NAMES, lora_names, false);
2283+ ml.get_arr(LLM_KV_ADAPTER_LORA_PROMPT_PREFIXES, lora_prompt_prefixes, false);
2284+ GGML_ASSERT(lora_names.size() == lora_prompt_prefixes.size());
2285+
2286+ for (size_t i = 0; i < lora_names.size(); ++i) {
2287+ llama_adapter_lora * adapter = new llama_adapter_lora();
2288+ std::string lora_name = lora_names[i];
2289+
2290+ adapter->alpha = lora_alpha;
2291+ adapter->prompt_prefix = lora_prompt_prefixes[i];
2292+ loras[lora_name] = adapter;
2293+
2294+ add_lora_tensors(lora_name, tok_embd->name);
2295+
2296+ if (type_embd) {
2297+ add_lora_tensors(lora_name, type_embd->name);
2298+ }
2299+ }
2300+ }
2301+
22652302 for (int i = 0; i < n_layer; ++i) {
22662303 auto & layer = layers[i];
22672304
@@ -2300,6 +2337,17 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
23002337 }
23012338 }
23022339
2340+ if (arch == LLM_ARCH_JINA_BERT_V3) {
2341+ GGML_ASSERT(layer.wqkv != nullptr);
2342+
2343+ for (const auto & lora_name : lora_names) {
2344+ add_lora_tensors(lora_name, layer.wqkv->name);
2345+ add_lora_tensors(lora_name, layer.wo->name);
2346+ add_lora_tensors(lora_name, layer.ffn_up->name);
2347+ add_lora_tensors(lora_name, layer.ffn_down->name);
2348+ }
2349+ }
2350+
23032351 layer.layer_out_norm = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd}, 0);
23042352 layer.layer_out_norm_b = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd}, 0);
23052353 }
0 commit comments