File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -3189,6 +3189,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
31893189 // flag used to determine whether it is an integrated_gpu
31903190 const bool integrated = ggml_cuda_info ().devices [cuda_ctx->device ].integrated ;
31913191
3192+ ggml_cuda_stream_context & stream_ctx = cuda_ctx->stream_context ();
31923193 bool is_concurrent_event_active = false ;
31933194 ggml_cuda_concurrent_event * concurrent_event = nullptr ;
31943195
@@ -3215,9 +3216,8 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
32153216 if (!use_cuda_graph || cuda_graph_update_required) {
32163217 [[maybe_unused]] int prev_i = 0 ;
32173218
3218- ggml_cuda_stream_context & stream_ctx = cuda_ctx->stream_context ();
3219-
32203219 if (stream_ctx.concurrent_events .size () > 0 ) {
3220+ // Restore the original graph to enable fusion within the streams
32213221 cgraph->nodes = const_cast <ggml_tensor **>(stream_ctx.original_graph .data ());
32223222 }
32233223
You can’t perform that action at this time.
0 commit comments