@@ -650,8 +650,9 @@ ggml_tensor * llm_graph_context::build_ffn(
650650 {
651651 // Project to 4h. If using swiglu double the output width, see https://arxiv.org/pdf/2002.05202.pdf
652652 int64_t split_point = cur->ne [0 ] / 2 ;
653- ggml_tensor * x0 = ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], 0 );
654- ggml_tensor * x1 = ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], split_point * ggml_element_size (cur));
653+ // TODO: these conts should not be needed, see https://github.com/ggml-org/llama.cpp/pull/14090#discussion_r2137437217
654+ ggml_tensor * x0 = ggml_cont (ctx0, ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], 0 ));
655+ ggml_tensor * x1 = ggml_cont (ctx0, ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], split_point * ggml_element_size (cur)));
655656
656657 x0 = ggml_silu (ctx0, x0);
657658 cb (cur, " ffn_silu" , il);
@@ -663,8 +664,9 @@ ggml_tensor * llm_graph_context::build_ffn(
663664 {
664665 // Split into two equal parts
665666 int64_t split_point = cur->ne [0 ] / 2 ;
666- ggml_tensor * x0 = ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], 0 );
667- ggml_tensor * x1 = ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], split_point * ggml_element_size (cur));
667+ // TODO: these conts should not be needed, see https://github.com/ggml-org/llama.cpp/pull/14090#discussion_r2137437217
668+ ggml_tensor * x0 = ggml_cont (ctx0, ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], 0 ));
669+ ggml_tensor * x1 = ggml_cont (ctx0, ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], split_point * ggml_element_size (cur)));
668670
669671 x0 = ggml_gelu (ctx0, x0);
670672 cb (x0, " ffn_gelu" , il);
0 commit comments