Remove embd hack from qwen3vl.cpp

65a · web-flow · commit 981d578bc3b2 · 2025-11-01T14:21:09.000-07:00
diff --git a/src/models/qwen3vl.cpp b/src/models/qwen3vl.cpp
@@ -2,9 +2,9 @@
 
 llm_build_qwen3vl::llm_build_qwen3vl(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
 
-    const int64_t n_embd_full = hparams.n_embd; // main embd + deepstack embds
     const size_t n_deepstack_layers = hparams.n_deepstack_layers;
-    const int64_t n_embd = n_embd_full / (n_deepstack_layers + 1);
+    const int64_t n_embd_full = hparams.n_embd * (n_deepstack_layers + 1); // main embd + deepstack embds
+    const int64_t n_embd = hparams.n_embd;
     const int64_t n_embd_head = hparams.n_embd_head_v;
 
 
@@ -23,9 +23,9 @@ llm_build_qwen3vl::llm_build_qwen3vl(const llama_model & model, const llm_graph_
 
     if (ubatch.embd) {
         // Image input: split main embd and deepstack embds
-        ggml_tensor * inpL_main = ggml_view_2d(ctx0, inpL, n_embd, n_tokens, inpL->nb[1], 0);
+        ggml_tensor * inpL_main = ggml_view_2d(ctx0, inpL, n_embd_full, n_tokens, inpL->nb[1], 0);
         for (size_t i = 0; i < n_deepstack_layers; i++) {
-            deepstack_features[i] = ggml_view_2d(ctx0, inpL, n_embd, n_tokens, inpL->nb[1], (i + 1) * n_embd * sizeof(float));
+            deepstack_features[i] = ggml_view_2d(ctx0, inpL, n_embd_full, n_tokens, inpL->nb[1], (i + 1) * n_embd * sizeof(float));
         }
         inpL = inpL_main;
     }