@@ -317,14 +317,13 @@ ggml_tensor * llm_build_qwen3next::build_qwen3next_linear_attn_layer(llm_graph_i
317317 num_v_heads / num_k_heads // alpha size
318318 };
319319
320- ggml_tensor * b =
321- ggml_view_4d (ctx0, mixed_ba_reshaped, split_sizes_ba[0 ], num_k_heads, n_tokens, n_seqs,
322- split_sizes_ba[0 ] * sizeof (float ), mixed_ba_reshaped->nb [1 ], mixed_ba_reshaped->nb [2 ], 0 );
320+ ggml_tensor * b = ggml_view_4d (ctx0, mixed_ba_reshaped, split_sizes_ba[0 ], num_k_heads, n_tokens, n_seqs,
321+ mixed_ba_reshaped->nb [1 ], mixed_ba_reshaped->nb [2 ], mixed_ba_reshaped->nb [3 ], 0 );
323322 cb (b, " b" , il);
324323
325324 ggml_tensor * a = ggml_view_4d (ctx0, mixed_ba_reshaped, split_sizes_ba[1 ], num_k_heads, n_tokens, n_seqs,
326- split_sizes_ba [1 ] * sizeof ( float ) , mixed_ba_reshaped->nb [1 ],
327- mixed_ba_reshaped-> nb [ 2 ], split_sizes_ba[0 ] * sizeof ( float ));
325+ mixed_ba_reshaped-> nb [1 ], mixed_ba_reshaped-> nb [ 2 ] , mixed_ba_reshaped->nb [3 ],
326+ split_sizes_ba[0 ] * ggml_element_size (mixed_ba_reshaped ));
328327 cb (a, " a" , il);
329328
330329 // Reshape b and a to merge head dimensions: [batch, seq_len, num_k_heads, num_v_heads/num_k_heads] -> [batch, seq_len, num_v_heads]
0 commit comments