We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bca194d commit e0c9d3dCopy full SHA for e0c9d3d
cpp/src/routing/cuda_graph.cuh
@@ -20,7 +20,9 @@ namespace detail {
20
struct cuda_graph_t {
21
void start_capture(rmm::cuda_stream_view stream)
22
{
23
- cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal);
+ // Use ThreadLocal mode to allow multi-threaded batch execution
24
+ // Global mode blocks other streams from performing operations during capture
25
+ cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal);
26
capture_started = true;
27
}
28
0 commit comments