@@ -1249,8 +1249,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
12491249 ml.get_key (LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps );
12501250
12511251 switch (hparams.n_layer ) {
1252- case 32 : model. type = e_model::MODEL_7B ; break ;
1253- default : model. type = e_model::MODEL_UNKNOWN ;
1252+ case 32 : type = LLM_TYPE_7B ; break ;
1253+ default : type = LLM_TYPE_UNKNOWN ;
12541254 }
12551255 }break ;
12561256 case LLM_ARCH_WAVTOKENIZER_DEC:
@@ -3384,42 +3384,40 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
33843384 } break ;
33853385 case LLM_ARCH_COGVLM:
33863386 {
3387- model. tok_embd = ml. create_tensor (ctx_input, tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab});
3387+ tok_embd = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, 0 );
33883388
3389- model. output_norm = ml. create_tensor (ctx_input, tn (LLM_TENSOR_OUTPUT_NORM, " weight" ), {n_embd});
3389+ output_norm = create_tensor (tn (LLM_TENSOR_OUTPUT_NORM, " weight" ), {n_embd}, 0 );
33903390
3391- model. output = ml. create_tensor (ctx_output, tn (LLM_TENSOR_OUTPUT, " weight" ), {n_embd, n_vocab});
3391+ output = create_tensor (tn (LLM_TENSOR_OUTPUT, " weight" ), {n_embd, n_vocab}, 0 );
33923392
33933393 // Not supporting ctx_split
33943394 for (int i=0 ; i < n_layer; i++) {
3395- ggml_context * ctx_layer = ctx_for_layer (i);
3396-
3397- auto & layer = model.layers [i];
3395+ auto & layer = layers[i];
33983396
3399- layer.attn_norm = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_NORM, " weight" , i), {n_embd});
3397+ layer.attn_norm = create_tensor (tn (LLM_TENSOR_ATTN_NORM, " weight" , i), {n_embd}, 0 );
34003398
3401- layer.wqkv_txt = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_TXT_QKV, " weight" , i), {n_embd, n_embd * 3 });
3402- layer.wqkv_img = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_IMG_QKV, " weight" , i), {n_embd, n_embd * 3 });
3403- layer.wdense_txt = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_TXT_DENSE, " weight" , i), {n_embd, n_embd});
3404- layer.wdense_img = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_IMG_DENSE, " weight" , i), {n_embd, n_embd});
3399+ layer.wqkv_txt = create_tensor (tn (LLM_TENSOR_ATTN_TXT_QKV, " weight" , i), {n_embd, n_embd * 3 }, 0 );
3400+ layer.wqkv_img = create_tensor (tn (LLM_TENSOR_ATTN_IMG_QKV, " weight" , i), {n_embd, n_embd * 3 }, 0 );
3401+ layer.wdense_txt = create_tensor (tn (LLM_TENSOR_ATTN_TXT_DENSE, " weight" , i), {n_embd, n_embd}, 0 );
3402+ layer.wdense_img = create_tensor (tn (LLM_TENSOR_ATTN_IMG_DENSE, " weight" , i), {n_embd, n_embd}, 0 );
34053403
3406- layer.attn_norm_2 = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_NORM_2, " weight" , i), {n_embd});
3404+ layer.attn_norm_2 = create_tensor (tn (LLM_TENSOR_ATTN_NORM_2, " weight" , i), {n_embd}, 0 );
34073405
3408- layer.wq_cross = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_CROSS_ATTN_Q, " weight" , i), {n_embd, n_embd_cross});
3406+ layer.wq_cross = create_tensor (tn (LLM_TENSOR_CROSS_ATTN_Q, " weight" , i), {n_embd, n_embd_cross}, 0 );
34093407 // The input dimension is the number of dimensions from the cross vision encoder
34103408 // it might not be guaranteed that this is the same as the number of dimensions
34113409 // in the cogvlm attention calculation
3412- layer.wkv_cross = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_CROSS_ATTN_KV, " weight" , i), {n_embd_cross, n_embd_cross * 2 });
3413- layer.wdense_cross = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_CROSS_ATTN_DENSE, " weight" , i), {n_embd_cross, n_embd});
3410+ layer.wkv_cross = create_tensor (tn (LLM_TENSOR_CROSS_ATTN_KV, " weight" , i), {n_embd_cross, n_embd_cross * 2 }, 0 );
3411+ layer.wdense_cross = create_tensor (tn (LLM_TENSOR_CROSS_ATTN_DENSE, " weight" , i), {n_embd_cross, n_embd}, 0 );
34143412
3415- layer.ffn_norm = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_NORM, " weight" , i), {n_embd});
3413+ layer.ffn_norm = create_tensor (tn (LLM_TENSOR_FFN_NORM, " weight" , i), {n_embd}, 0 );
34163414
3417- layer.ffn_gate_txt = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_TXT_GATE, " weight" , i), {n_embd, n_ff});
3418- layer.ffn_down_txt = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_TXT_DOWN, " weight" , i), {n_ff, n_embd});
3419- layer.ffn_up_txt = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_TXT_UP, " weight" , i), {n_embd, n_ff});
3420- layer.ffn_gate_img = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_IMG_GATE, " weight" , i), {n_embd, n_ff});
3421- layer.ffn_down_img = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_IMG_DOWN, " weight" , i), {n_ff, n_embd});
3422- layer.ffn_up_img = ml. create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_IMG_UP, " weight" , i), {n_embd, n_ff});
3415+ layer.ffn_gate_txt = create_tensor (tn (LLM_TENSOR_FFN_TXT_GATE, " weight" , i), {n_embd, n_ff}, 0 );
3416+ layer.ffn_down_txt = create_tensor (tn (LLM_TENSOR_FFN_TXT_DOWN, " weight" , i), {n_ff, n_embd}, 0 );
3417+ layer.ffn_up_txt = create_tensor (tn (LLM_TENSOR_FFN_TXT_UP, " weight" , i), {n_embd, n_ff}, 0 );
3418+ layer.ffn_gate_img = create_tensor (tn (LLM_TENSOR_FFN_IMG_GATE, " weight" , i), {n_embd, n_ff}, 0 );
3419+ layer.ffn_down_img = create_tensor (tn (LLM_TENSOR_FFN_IMG_DOWN, " weight" , i), {n_ff, n_embd}, 0 );
3420+ layer.ffn_up_img = create_tensor (tn (LLM_TENSOR_FFN_IMG_UP, " weight" , i), {n_embd, n_ff}, 0 );
34233421 }
34243422 } break ;
34253423 case LLM_ARCH_WAVTOKENIZER_DEC:
@@ -4170,6 +4168,7 @@ enum llama_rope_type llama_model_rope_type(const struct llama_model * model) {
41704168 case LLM_ARCH_GRANITE:
41714169 case LLM_ARCH_GRANITE_MOE:
41724170 case LLM_ARCH_CHAMELEON:
4171+ case LLM_ARCH_COGVLM:
41734172 return LLAMA_ROPE_TYPE_NORM;
41744173
41754174 // the pairs of head values are offset by n_rot/2
@@ -4309,3 +4308,10 @@ bool llama_model_is_recurrent(const struct llama_model * model) {
43094308 default : return false ;
43104309 }
43114310}
4311+
4312+ bool llama_model_has_cross_kv (const struct llama_model * model) {
4313+ switch (model->arch ) {
4314+ case LLM_ARCH_COGVLM: return true ;
4315+ default : return false ;
4316+ }
4317+ }
0 commit comments