@@ -276,8 +276,8 @@ static bool weight_buft_supported(const llama_hparams & hparams, ggml_tensor * w
276276 } break ;
277277 case GGML_OP_IM2COL:
278278 {
279- const int n_embd = hparams.n_embd_full ;
280- ggml_tensor * b = ggml_new_tensor_4d (ctx, GGML_TYPE_F32, n_embd , w->ne [1 ], 1 , 1 );
279+ const int n_embd_inp = hparams.n_embd_inp () ;
280+ ggml_tensor * b = ggml_new_tensor_4d (ctx, GGML_TYPE_F32, n_embd_inp , w->ne [1 ], 1 , 1 );
281281 op_tensor = ggml_im2col (ctx, w, b, 1 , 0 , 0 , 0 , 1 , 0 , false , GGML_TYPE_F16);
282282 } break ;
283283 case GGML_OP_SCALE:
@@ -505,7 +505,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
505505 ml.get_key (LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used , false );
506506 ml.get_key (LLM_KV_EXPERT_GROUP_COUNT, hparams.n_expert_groups , false );
507507 ml.get_key (LLM_KV_EXPERT_GROUP_USED_COUNT, hparams.n_group_used , false );
508- hparams.n_embd_full = hparams.n_embd ;
509508
510509 if (arch == LLM_ARCH_WAVTOKENIZER_DEC) {
511510 ml.get_key (LLM_KV_FEATURES_LENGTH, hparams.n_embd_features );
@@ -1040,9 +1039,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
10401039 case 64 : type = LLM_TYPE_32B; break ;
10411040 default : type = LLM_TYPE_UNKNOWN;
10421041 }
1043- // since vision model stacks deepstack features along feature dim
1044- // we also create a fake "n_embd" for text model to be the main embd + deepstack embds
1045- hparams.n_embd_full *= hparams.n_deepstack_layers + 1 ;
10461042 } break ;
10471043 case LLM_ARCH_QWEN3MOE:
10481044 {
@@ -1066,9 +1062,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
10661062 case 94 : type = LLM_TYPE_235B_A22B; break ;
10671063 default : type = LLM_TYPE_UNKNOWN;
10681064 }
1069- // since vision model stacks deepstack features along feature dim
1070- // we also create a fake "n_embd" for text model to be the main embd + deepstack embds
1071- hparams.n_embd_full *= hparams.n_deepstack_layers + 1 ;
10721065 } break ;
10731066 case LLM_ARCH_PHI2:
10741067 {
@@ -6475,6 +6468,7 @@ void llama_model::print_info() const {
64756468 if (!hparams.vocab_only ) {
64766469 LLAMA_LOG_INFO (" %s: n_ctx_train = %u\n " , __func__, hparams.n_ctx_train );
64776470 LLAMA_LOG_INFO (" %s: n_embd = %u\n " , __func__, hparams.n_embd );
6471+ LLAMA_LOG_INFO (" %s: n_embd_inp = %u\n " , __func__, hparams.n_embd_inp ());
64786472 LLAMA_LOG_INFO (" %s: n_layer = %u\n " , __func__, hparams.n_layer );
64796473 LLAMA_LOG_INFO (" %s: n_head = %s\n " , __func__, print_f ([&](uint32_t il) { return hparams.n_head (il); }, hparams.n_layer ).c_str ());
64806474 LLAMA_LOG_INFO (" %s: n_head_kv = %s\n " , __func__, print_f ([&](uint32_t il) { return hparams.n_head_kv (il); }, hparams.n_layer ).c_str ());
@@ -6674,8 +6668,9 @@ ggml_backend_buffer_type_t llama_model::select_buft(int il) const {
66746668 return ::select_buft (
66756669 *pimpl->dev_layer .at (il).buft_list ,
66766670 [&](ggml_context * ctx) {
6677- ggml_tensor * cur = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, hparams.n_embd_full );
6678- ggml_tensor * layer_dir = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, hparams.n_embd_full );
6671+ const int n_embd_inp = hparams.n_embd_inp ();
6672+ ggml_tensor * cur = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, n_embd_inp);
6673+ ggml_tensor * layer_dir = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, n_embd_inp);
66796674 return ggml_add (ctx, cur, layer_dir);
66806675 });
66816676}
@@ -7322,8 +7317,8 @@ int32_t llama_model_n_embd(const llama_model * model) {
73227317 return model->hparams .n_embd ;
73237318}
73247319
7325- int32_t llama_model_n_embd_full (const llama_model * model) {
7326- return model->hparams .n_embd_full ;
7320+ int32_t llama_model_n_embd_inp (const llama_model * model) {
7321+ return model->hparams .n_embd_inp () ;
73277322}
73287323
73297324int32_t llama_model_n_layer (const llama_model * model) {
0 commit comments