Skip to content

Commit af5f50f

Browse files
authored
Remove embd hack from qwen3vl-moe.cpp
1 parent 981d578 commit af5f50f

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

src/models/qwen3vl-moe.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#include "models.h"
22

33
llm_build_qwen3vlmoe::llm_build_qwen3vlmoe(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
4-
const int64_t n_embd_full = hparams.n_embd; // main embd + deepstack embds
54
const size_t n_deepstack_layers = hparams.n_deepstack_layers;
6-
const int64_t n_embd = n_embd_full / (n_deepstack_layers + 1);
5+
const int64_t n_embd_full = hparams.n_embd * (n_deepstack_layers + 1); // main embd + deepstack embds
6+
const int64_t n_embd = hparams.n_embd;
77
const int64_t n_embd_head = hparams.n_embd_head_v;
88

99
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
@@ -21,9 +21,9 @@ llm_build_qwen3vlmoe::llm_build_qwen3vlmoe(const llama_model & model, const llm_
2121

2222
if (ubatch.embd) {
2323
// Image input: split main embd and deepstack embds
24-
ggml_tensor * inpL_main = ggml_view_2d(ctx0, inpL, n_embd, n_tokens, inpL->nb[1], 0);
24+
ggml_tensor * inpL_main = ggml_view_2d(ctx0, inpL, n_embd_full, n_tokens, inpL->nb[1], 0);
2525
for (size_t i = 0; i < n_deepstack_layers; i++) {
26-
deepstack_features[i] = ggml_view_2d(ctx0, inpL, n_embd, n_tokens, inpL->nb[1], (i + 1) * n_embd * sizeof(float));
26+
deepstack_features[i] = ggml_view_2d(ctx0, inpL, n_embd_full, n_tokens, inpL->nb[1], (i + 1) * n_embd * sizeof(float));
2727
}
2828
inpL = inpL_main;
2929
}

0 commit comments

Comments
 (0)