We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 20424d8 commit 54bb6f1Copy full SHA for 54bb6f1
examples/model-conversion/qwen3stories.sh
@@ -0,0 +1,3 @@
1
+export MODEL_PATH=/devel/tools/llama.cpp/reference/theo77186_Qwen3-Next-70M-TinyStories
2
+export CONVERTED_MODEL=/devel/tools/llama.cpp/reference/theo77186_Qwen3-Next-70M-TinyStories/theo77186_Qwen3-Next-70M-TinyStories.gguf
3
+make causal-verify-logits
src/models/llm_build_qwen3next.cpp
@@ -279,14 +279,13 @@ struct ggml_tensor * llm_build_qwen3next::delta_net(
279
cb(q, "q_postscale", il);
280
cb(beta, "beta_sigmoid", il);
281
282
- // Pad first along the token dimension
283
- q = ggml_pad(ctx, q, 0, 0, pad_size, 0);
284
- k = ggml_pad(ctx, k, 0, 0, pad_size, 0);
285
- v = ggml_pad(ctx, v, 0, 0, pad_size, 0);
286
-
287
q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3));
288
k = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3));
289
v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3));
+
+ q = ggml_pad(ctx, q, 0, pad_size, 0, 0);
+ k = ggml_pad(ctx, k, 0, pad_size, 0, 0);
+ v = ggml_pad(ctx, v, 0, pad_size, 0, 0);
290
291
beta = ggml_cont(ctx, ggml_permute(ctx, beta, 1, 2, 0, 3));
292
cb(beta, "beta_reshape", il);
0 commit comments