Skip to content

Commit 638057a

Browse files
committed
Transpose input for convolution
1 parent 835d389 commit 638057a

File tree

1 file changed

+1
-2
lines changed

1 file changed

+1
-2
lines changed

src/models/llm_build_qwen3next.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,15 +407,14 @@ ggml_tensor * llm_build_qwen3next::build_qwen3next_linear_attn_layer(llm_graph_i
407407
int64_t qkv_dim = head_k_dim * num_k_heads * 2 + head_v_dim * num_v_heads;
408408

409409
// Reshape to [n_tokens, qkv_dim, n_seqs] for proper convolution input format
410-
qkv_mixed = ggml_reshape_3d(ctx0, qkv_mixed, n_tokens, qkv_dim, n_seqs);
410+
qkv_mixed = ggml_cont_3d(ctx0, ggml_transpose(ctx0, qkv_mixed), n_tokens, qkv_dim, n_seqs);
411411
cb(qkv_mixed, "qkv_mixed_for_conv", il);
412412

413413
// Calculate convolution kernel size
414414
const int64_t conv_kernel_size = model.layers[il].ssm_conv1d->ne[0];
415415
conv_states = ggml_reshape_3d(ctx0, conv_states, conv_kernel_size - 1, d_inner + 2 * hparams.ssm_n_group * hparams.ssm_d_state, n_seqs);
416416
cb(conv_states, "conv_states_reshaped", il);
417417

418-
// Now concatenate along the sequence dimension (dim 0 in Llama.cpp)
419418
ggml_tensor * conv_input = ggml_concat(ctx0, conv_states, qkv_mixed, 0);
420419
cb(conv_input, "conv_input", il);
421420

0 commit comments

Comments
 (0)