Skip to content

Commit b85cddb

Browse files
authored
Remove n_embd hack from llama-models.cpp
1 parent 2f68ce7 commit b85cddb

File tree

1 file changed

+2
-14
lines changed

1 file changed

+2
-14
lines changed

src/llama-model.cpp

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,9 +1039,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
10391039
case 64: type = LLM_TYPE_32B; break;
10401040
default: type = LLM_TYPE_UNKNOWN;
10411041
}
1042-
// since vision model stacks deepstack features along feature dim
1043-
// we also create a fake "n_embd" for text model to be the main embd + deepstack embds
1044-
hparams.n_embd *= hparams.n_deepstack_layers + 1;
10451042
} break;
10461043
case LLM_ARCH_QWEN3MOE:
10471044
{
@@ -1065,9 +1062,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
10651062
case 94: type = LLM_TYPE_235B_A22B; break;
10661063
default: type = LLM_TYPE_UNKNOWN;
10671064
}
1068-
// since vision model stacks deepstack features along feature dim
1069-
// we also create a fake "n_embd" for text model to be the main embd + deepstack embds
1070-
hparams.n_embd *= hparams.n_deepstack_layers + 1;
10711065
} break;
10721066
case LLM_ARCH_PHI2:
10731067
{
@@ -3332,10 +3326,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
33323326
case LLM_ARCH_QWEN3:
33333327
case LLM_ARCH_QWEN3VL:
33343328
{
3335-
// for model loading, the weights only have the main embd
3336-
// so we need to divide by the number of deepstack layers + 1
3337-
// n_embd is const int so we declare a new variable
3338-
int64_t n_embd = hparams.n_embd / (hparams.n_deepstack_layers + 1);
3329+
int64_t n_embd = hparams.n_embd;
33393330
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
33403331

33413332
// output
@@ -3371,10 +3362,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
33713362
case LLM_ARCH_QWEN3MOE:
33723363
case LLM_ARCH_QWEN3VLMOE:
33733364
{
3374-
// for model loading, the weights only have the main embd
3375-
// so we need to divide by the number of deepstack layers + 1
3376-
// n_embd is const int so we declare a new variable
3377-
int64_t n_embd = hparams.n_embd / (hparams.n_deepstack_layers + 1);
3365+
int64_t n_embd = hparams.n_embd;
33783366
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
33793367

33803368
// output

0 commit comments

Comments
 (0)