@@ -1244,6 +1244,15 @@ void llama_model::load_hparams(llama_model_loader & ml) {
12441244 default : type = LLM_TYPE_UNKNOWN;
12451245 }
12461246 } break ;
1247+ case LLM_ARCH_COGVLM:
1248+ {
1249+ ml.get_key (LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps );
1250+
1251+ switch (hparams.n_layer ) {
1252+ case 32 : model.type = e_model::MODEL_7B; break ;
1253+ default : model.type = e_model::MODEL_UNKNOWN;
1254+ }
1255+ }break ;
12471256 case LLM_ARCH_WAVTOKENIZER_DEC:
12481257 {
12491258 ml.get_key (LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps );
@@ -1443,6 +1452,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
14431452 const int64_t n_expert = hparams.n_expert ;
14441453 const int64_t n_expert_used = hparams.n_expert_used ;
14451454 const int64_t n_ctx_train = hparams.n_ctx_train ;
1455+ const int64_t n_embd_cross = hparams.n_embd_cross ;
14461456
14471457 if (n_expert > 0 && hparams.n_expert_used == 0 ) {
14481458 throw std::runtime_error (" model has expert layers but no expert layers are used" );
@@ -3372,6 +3382,46 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
33723382 layer.ffn_up = create_tensor (tn (LLM_TENSOR_FFN_UP, " weight" , i), {n_embd, n_ff}, 0 );
33733383 }
33743384 } break ;
3385+ case LLM_ARCH_COGVLM:
3386+ {
3387+ model.tok_embd = ml.create_tensor (ctx_input, tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab});
3388+
3389+ model.output_norm = ml.create_tensor (ctx_input, tn (LLM_TENSOR_OUTPUT_NORM, " weight" ), {n_embd});
3390+
3391+ model.output = ml.create_tensor (ctx_output, tn (LLM_TENSOR_OUTPUT, " weight" ), {n_embd, n_vocab});
3392+
3393+ // Not supporting ctx_split
3394+ for (int i=0 ; i < n_layer; i++) {
3395+ ggml_context * ctx_layer = ctx_for_layer (i);
3396+
3397+ auto & layer = model.layers [i];
3398+
3399+ layer.attn_norm = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_NORM, " weight" , i), {n_embd});
3400+
3401+ layer.wqkv_txt = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_TXT_QKV, " weight" , i), {n_embd, n_embd * 3 });
3402+ layer.wqkv_img = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_IMG_QKV, " weight" , i), {n_embd, n_embd * 3 });
3403+ layer.wdense_txt = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_TXT_DENSE, " weight" , i), {n_embd, n_embd});
3404+ layer.wdense_img = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_IMG_DENSE, " weight" , i), {n_embd, n_embd});
3405+
3406+ layer.attn_norm_2 = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_NORM_2, " weight" , i), {n_embd});
3407+
3408+ layer.wq_cross = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_CROSS_ATTN_Q, " weight" , i), {n_embd, n_embd_cross});
3409+ // The input dimension is the number of dimensions from the cross vision encoder
3410+ // it might not be guaranteed that this is the same as the number of dimensions
3411+ // in the cogvlm attention calculation
3412+ layer.wkv_cross = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_CROSS_ATTN_KV, " weight" , i), {n_embd_cross, n_embd_cross * 2 });
3413+ layer.wdense_cross = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_CROSS_ATTN_DENSE, " weight" , i), {n_embd_cross, n_embd});
3414+
3415+ layer.ffn_norm = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_NORM, " weight" , i), {n_embd});
3416+
3417+ layer.ffn_gate_txt = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_TXT_GATE, " weight" , i), {n_embd, n_ff});
3418+ layer.ffn_down_txt = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_TXT_DOWN, " weight" , i), {n_ff, n_embd});
3419+ layer.ffn_up_txt = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_TXT_UP, " weight" , i), {n_embd, n_ff});
3420+ layer.ffn_gate_img = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_IMG_GATE, " weight" , i), {n_embd, n_ff});
3421+ layer.ffn_down_img = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_IMG_DOWN, " weight" , i), {n_ff, n_embd});
3422+ layer.ffn_up_img = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_FFN_IMG_UP, " weight" , i), {n_embd, n_ff});
3423+ }
3424+ } break ;
33753425 case LLM_ARCH_WAVTOKENIZER_DEC:
33763426 {
33773427 tok_embd = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {hparams.n_embd_features , n_vocab}, 0 );
0 commit comments