ggml-cuda: add some more comments

am17an · am17an · commit cfa1a02c54e4 · 2025-11-14T23:38:01.000+08:00
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3189,6 +3189,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
     // flag used to determine whether it is an integrated_gpu
     const bool integrated = ggml_cuda_info().devices[cuda_ctx->device].integrated;
 
+    ggml_cuda_stream_context & stream_ctx = cuda_ctx->stream_context();
     bool                         is_concurrent_event_active = false;
     ggml_cuda_concurrent_event * concurrent_event           = nullptr;
 
@@ -3215,9 +3216,8 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
         if (!use_cuda_graph || cuda_graph_update_required) {
             [[maybe_unused]] int prev_i = 0;
 
-            ggml_cuda_stream_context & stream_ctx = cuda_ctx->stream_context();
-
             if (stream_ctx.concurrent_events.size() > 0) {
+                //Restore the original graph to enable fusion within the streams
                 cgraph->nodes = const_cast<ggml_tensor **>(stream_ctx.original_graph.data());
             }