@@ -2210,9 +2210,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
22102210                        layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0);
22112211
22122212                        // optional bias tensors
2213-                         layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q,   "bias", i), {n_embd}, 0 );
2214-                         layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K,   "bias", i), {n_embd_gqa}, 0 );
2215-                         layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V,   "bias", i), {n_embd_gqa}, 0 );
2213+                         layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q,   "bias", i), {n_embd}, TENSOR_NOT_REQUIRED );
2214+                         layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K,   "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED );
2215+                         layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V,   "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED );
22162216
22172217                        layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
22182218
@@ -6193,16 +6193,25 @@ struct llm_build_qwen2moe : public llm_graph_context {
61936193            {
61946194                // compute Q and K and RoPE them
61956195                ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
6196-                 Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
61976196                cb(Qcur, "Qcur", il);
6197+                 if (model.layers[il].bq) {
6198+                     Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
6199+                     cb(Qcur, "Qcur", il);
6200+                 }
61986201
61996202                ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
6200-                 Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
62016203                cb(Kcur, "Kcur", il);
6204+                 if (model.layers[il].bk) {
6205+                     Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
6206+                     cb(Kcur, "Kcur", il);
6207+                 }
62026208
62036209                ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
6204-                 Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
62056210                cb(Vcur, "Vcur", il);
6211+                 if (model.layers[il].bv) {
6212+                     Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
6213+                     cb(Vcur, "Vcur", il);
6214+                 }
62066215
62076216                Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head,    n_tokens);
62086217                Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
0 commit comments