@@ -663,22 +663,14 @@ ggml_tensor * llm_graph_context::build_ffn(
663663            {
664664                //  Split into two equal parts
665665                int64_t  split_point = cur->ne [0 ] / 2 ;
666-                 ggml_tensor * output_ffn_up = ggml_cont (ctx0, ggml_view_2d (
667-                                                 ctx0, cur, split_point,
668-                                                 cur->ne [1 ], cur->nb [1 ], 0 
669-                                             ));
670-                 ggml_tensor * output_ffn_gate = ggml_cont (ctx0, ggml_view_2d (
671-                                                 ctx0, cur, split_point,
672-                                                 cur->ne [1 ], cur->nb [1 ],
673-                                                 split_point * ggml_element_size (cur)
674-                                             ));
675- 
676-                 //  Apply GELU activation function to the first part
677-                 output_ffn_up = ggml_gelu (ctx0, output_ffn_up);
678-                 cb (output_ffn_up, " ffn_gelu" 
679- 
680-                 //  Element-wise multiplication between the activated part and the gate part
681-                 cur = ggml_mul (ctx0, output_ffn_up, output_ffn_gate);
666+                 //  TODO: these conts should not be needed
667+                 ggml_tensor * x0 = ggml_cont (ctx0, ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], 0 ));
668+                 ggml_tensor * x1 = ggml_cont (ctx0, ggml_view_2d (ctx0, cur, split_point, cur->ne [1 ], cur->nb [1 ], split_point * ggml_element_size (cur)));
669+ 
670+                 x0 = ggml_gelu (ctx0, x0);
671+                 cb (x0, " ffn_gelu" 
672+ 
673+                 cur = ggml_mul (ctx0, x0, x1);
682674                cb (cur, " ffn_geglu" 
683675            } break ;
684676    }
0 commit comments