@@ -6176,9 +6176,9 @@ static bool llm_load_tensors(
61766176 layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
61776177
61786178 // optional MLP bias
6179- layer.ffn_gate_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6180- layer.ffn_down_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
6181- layer.ffn_up_b = ml.create_tensor(ctx_split , tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6179+ layer.ffn_gate_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
6180+ layer.ffn_down_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
6181+ layer.ffn_up_b = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
61826182 } else {
61836183 layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
61846184
@@ -6502,7 +6502,7 @@ static bool llm_load_tensors(
65026502 layer.bv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa});
65036503
65046504 layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); //output_dens
6505- layer.bo = ml.create_tensor(ctx_split , tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}); //output_dens
6505+ layer.bo = ml.create_tensor(ctx_layer , tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}); //output_dens
65066506
65076507 layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
65086508 layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
0 commit comments