Skip to content

Commit 4ebd0c1

Browse files
authored
cuda : fix GGML_CUDA_GRAPHS=OFF (#15300)
* fix USE_CUDA_GRAPH=OFF ggml-ci * check capture status * completely disable capturing check instead
1 parent 5cdb27e commit 4ebd0c1

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

ggml/src/ggml-cuda/mean.cu

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,12 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
2525

2626
// Special case for reducing vectors
2727
#ifdef GGML_CUDA_USE_CUB
28+
#ifdef USE_CUDA_GRAPH
2829
cudaStreamCaptureStatus iscapturing;
2930
CUDA_CHECK(cudaStreamIsCapturing(stream, &iscapturing));
31+
#endif // USE_CUDA_GRAPH
3032
if ((nrows == 1) &&
33+
#ifdef USE_CUDA_GRAPH
3134
// CUDA_GRAPHS_DISABLED
3235
((ncols > 65536) &&
3336
((ctx.cuda_graph->instance == nullptr) && (iscapturing == cudaStreamCaptureStatusNone) ||
@@ -38,6 +41,9 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
3841
!((ctx.cuda_graph->instance == nullptr) && (iscapturing == cudaStreamCaptureStatusNone) ||
3942
ctx.cuda_graph->disable_due_to_gpu_arch || ctx.cuda_graph->disable_due_to_too_many_updates ||
4043
ctx.cuda_graph->disable_due_to_failed_graph_capture))) {
44+
#else
45+
(ncols > 65536)) {
46+
#endif // USE_CUDA_GRAPH
4147
// Single row - use device-wide reduction
4248
size_t tmp_size = 0;
4349
ggml_cuda_pool & pool = ctx.pool();
@@ -51,7 +57,7 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
5157
divide_by_count<float><<<1, 1, 0, stream>>>(dst_d, ncols);
5258
return;
5359
}
54-
#endif
60+
#endif // GGML_CUDA_USE_CUB
5561

5662
const dim3 block_nums(nrows, 1, 1);
5763

0 commit comments

Comments
 (0)