Skip to content

Commit 03341fb

Browse files
authored
rename to n_embd_inp
1 parent 8e28665 commit 03341fb

File tree

7 files changed

+29
-22
lines changed

7 files changed

+29
-22
lines changed

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ extern "C" {
482482

483483
LLAMA_API int32_t llama_model_n_ctx_train(const struct llama_model * model);
484484
LLAMA_API int32_t llama_model_n_embd (const struct llama_model * model);
485-
LLAMA_API int32_t llama_model_n_embd_full(const struct llama_model * model);
485+
LLAMA_API int32_t llama_model_n_embd_inp (const struct llama_model * model);
486486
LLAMA_API int32_t llama_model_n_layer (const struct llama_model * model);
487487
LLAMA_API int32_t llama_model_n_head (const struct llama_model * model);
488488
LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model);

src/llama-context.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ float * llama_context::get_embeddings_ith(int32_t i) {
620620
throw std::runtime_error(format("corrupt output buffer (j=%" PRId64 ", n_outputs=%d)", j, n_outputs));
621621
}
622622

623-
return embd + j*model.hparams.n_embd_full;
623+
return embd + j*model.hparams.n_embd_inp();
624624
} catch (const std::exception & err) {
625625
LLAMA_LOG_ERROR("%s: invalid embeddings id %d, reason: %s\n", __func__, i, err.what());
626626
#ifndef NDEBUG
@@ -808,7 +808,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
808808

809809
const auto & hparams = model.hparams;
810810

811-
const int64_t n_embd = hparams.n_embd_full;
811+
const int64_t n_embd = hparams.n_embd_inp();
812812
const int64_t n_vocab = model.vocab.n_tokens();
813813

814814
// note: during encode, we always pass the full sequence starting from pos = 0
@@ -977,7 +977,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
977977
const auto & hparams = model.hparams;
978978

979979
const int64_t n_vocab = vocab.n_tokens();
980-
const int64_t n_embd = hparams.n_embd_full;
980+
const int64_t n_embd = hparams.n_embd_inp();
981981

982982
// when computing embeddings, all tokens are output
983983
const bool output_all = cparams.embeddings;
@@ -2135,7 +2135,7 @@ void llama_context::opt_epoch_iter(
21352135
batch.logits [pos_batch] = true;
21362136
}
21372137

2138-
if (!balloc->init(batch, model.vocab, nullptr, model.hparams.n_embd_full, cparams.kv_unified ? LLAMA_MAX_SEQ : cparams.n_seq_max, true)) {
2138+
if (!balloc->init(batch, model.vocab, nullptr, model.hparams.n_embd_inp(), cparams.kv_unified ? LLAMA_MAX_SEQ : cparams.n_seq_max, true)) {
21392139
LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__);
21402140
return;
21412141
}

src/llama-graph.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,7 +1142,7 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
11421142

11431143
// input embeddings with optional lora
11441144
ggml_tensor * llm_graph_context::build_inp_embd(ggml_tensor * tok_embd) const {
1145-
const int64_t n_embd = hparams.n_embd_full;
1145+
const int64_t n_embd = hparams.n_embd_inp();
11461146

11471147
auto inp = std::make_unique<llm_graph_input_embd>();
11481148

@@ -1279,7 +1279,7 @@ ggml_tensor * llm_graph_context::build_inp_cross_embd() const {
12791279
// return cur;
12801280
//}
12811281

1282-
const auto n_embd = !cross->v_embd.empty() ? cross->n_embd : hparams.n_embd_full;
1282+
const auto n_embd = !cross->v_embd.empty() ? cross->n_embd : hparams.n_embd_inp();
12831283
const auto n_enc = !cross->v_embd.empty() ? cross->n_enc : hparams.n_ctx_train;
12841284

12851285
cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, n_enc);

src/llama-hparams.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,16 @@ uint32_t llama_hparams::n_gqa(uint32_t il) const {
6060
return n_head/n_head_kv;
6161
}
6262

63+
uint32_t llama_hparams::n_embd_inp() const {
64+
uint32_t n_embd_inp = n_embd;
65+
66+
if (n_deepstack_layers > 0) {
67+
n_embd_inp += n_embd * n_deepstack_layers;
68+
}
69+
70+
return n_embd_inp;
71+
}
72+
6373
uint32_t llama_hparams::n_embd_k_gqa(uint32_t il) const {
6474
const uint32_t n_head_kv = this->n_head_kv(il);
6575

src/llama-hparams.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ struct llama_hparams {
4040

4141
uint32_t n_ctx_train; // context size the model was trained on
4242
uint32_t n_embd;
43-
uint32_t n_embd_full; // main + auxiliary embeds
4443
uint32_t n_embd_features = 0;
4544
uint32_t n_layer;
4645
int32_t n_layer_kv_from_start = -1; // if non-negative, the first n_layer_kv_from_start layers have KV cache
@@ -228,6 +227,9 @@ struct llama_hparams {
228227

229228
uint32_t n_gqa(uint32_t il = 0) const;
230229

230+
// dimension of main + auxiliary input embeddings
231+
uint32_t n_embd_inp() const;
232+
231233
// dimension of key embeddings across all k-v heads
232234
uint32_t n_embd_k_gqa(uint32_t il = 0) const;
233235

src/llama-model.cpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,8 @@ static bool weight_buft_supported(const llama_hparams & hparams, ggml_tensor * w
276276
} break;
277277
case GGML_OP_IM2COL:
278278
{
279-
const int n_embd = hparams.n_embd_full;
280-
ggml_tensor * b = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, n_embd, w->ne[1], 1, 1);
279+
const int n_embd_inp = hparams.n_embd_inp();
280+
ggml_tensor * b = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, n_embd_inp, w->ne[1], 1, 1);
281281
op_tensor = ggml_im2col(ctx, w, b, 1, 0, 0, 0, 1, 0, false, GGML_TYPE_F16);
282282
} break;
283283
case GGML_OP_SCALE:
@@ -505,7 +505,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
505505
ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false);
506506
ml.get_key(LLM_KV_EXPERT_GROUP_COUNT, hparams.n_expert_groups, false);
507507
ml.get_key(LLM_KV_EXPERT_GROUP_USED_COUNT, hparams.n_group_used, false);
508-
hparams.n_embd_full = hparams.n_embd;
509508

510509
if (arch == LLM_ARCH_WAVTOKENIZER_DEC) {
511510
ml.get_key(LLM_KV_FEATURES_LENGTH, hparams.n_embd_features);
@@ -1040,9 +1039,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
10401039
case 64: type = LLM_TYPE_32B; break;
10411040
default: type = LLM_TYPE_UNKNOWN;
10421041
}
1043-
// since vision model stacks deepstack features along feature dim
1044-
// we also create a fake "n_embd" for text model to be the main embd + deepstack embds
1045-
hparams.n_embd_full *= hparams.n_deepstack_layers + 1;
10461042
} break;
10471043
case LLM_ARCH_QWEN3MOE:
10481044
{
@@ -1066,9 +1062,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
10661062
case 94: type = LLM_TYPE_235B_A22B; break;
10671063
default: type = LLM_TYPE_UNKNOWN;
10681064
}
1069-
// since vision model stacks deepstack features along feature dim
1070-
// we also create a fake "n_embd" for text model to be the main embd + deepstack embds
1071-
hparams.n_embd_full *= hparams.n_deepstack_layers + 1;
10721065
} break;
10731066
case LLM_ARCH_PHI2:
10741067
{
@@ -6475,6 +6468,7 @@ void llama_model::print_info() const {
64756468
if (!hparams.vocab_only) {
64766469
LLAMA_LOG_INFO("%s: n_ctx_train = %u\n", __func__, hparams.n_ctx_train);
64776470
LLAMA_LOG_INFO("%s: n_embd = %u\n", __func__, hparams.n_embd);
6471+
LLAMA_LOG_INFO("%s: n_embd_inp = %u\n", __func__, hparams.n_embd_inp());
64786472
LLAMA_LOG_INFO("%s: n_layer = %u\n", __func__, hparams.n_layer);
64796473
LLAMA_LOG_INFO("%s: n_head = %s\n", __func__, print_f([&](uint32_t il) { return hparams.n_head(il); }, hparams.n_layer).c_str());
64806474
LLAMA_LOG_INFO("%s: n_head_kv = %s\n", __func__, print_f([&](uint32_t il) { return hparams.n_head_kv(il); }, hparams.n_layer).c_str());
@@ -6674,8 +6668,9 @@ ggml_backend_buffer_type_t llama_model::select_buft(int il) const {
66746668
return ::select_buft(
66756669
*pimpl->dev_layer.at(il).buft_list,
66766670
[&](ggml_context * ctx) {
6677-
ggml_tensor * cur = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hparams.n_embd_full);
6678-
ggml_tensor * layer_dir = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hparams.n_embd_full);
6671+
const int n_embd_inp = hparams.n_embd_inp();
6672+
ggml_tensor * cur = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd_inp);
6673+
ggml_tensor * layer_dir = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd_inp);
66796674
return ggml_add(ctx, cur, layer_dir);
66806675
});
66816676
}
@@ -7322,8 +7317,8 @@ int32_t llama_model_n_embd(const llama_model * model) {
73227317
return model->hparams.n_embd;
73237318
}
73247319

7325-
int32_t llama_model_n_embd_full(const llama_model * model) {
7326-
return model->hparams.n_embd_full;
7320+
int32_t llama_model_n_embd_inp(const llama_model * model) {
7321+
return model->hparams.n_embd_inp();
73277322
}
73287323

73297324
int32_t llama_model_n_layer(const llama_model * model) {

tools/mtmd/mtmd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ struct mtmd_context {
151151
print_timings(ctx_params.print_timings),
152152
n_threads (ctx_params.n_threads),
153153
media_marker (ctx_params.media_marker),
154-
n_embd_text (llama_model_n_embd_full(text_model))
154+
n_embd_text (llama_model_n_embd_inp(text_model))
155155
{
156156
if (std::string(ctx_params.image_marker) != MTMD_DEFAULT_IMAGE_MARKER) {
157157
throw std::runtime_error("custom image_marker is not supported anymore, use media_marker instead");

0 commit comments

Comments
 (0)