Skip to content

Commit 67630cf

Browse files
authored
fix USE_CUDA_GRAPH=OFF
ggml-ci
1 parent 29c8fbe commit 67630cf

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

ggml/src/ggml-cuda/mean.cu

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
2828
cudaStreamCaptureStatus iscapturing;
2929
CUDA_CHECK(cudaStreamIsCapturing(stream, &iscapturing));
3030
if ((nrows == 1) &&
31+
#ifdef USE_CUDA_GRAPH
3132
// CUDA_GRAPHS_DISABLED
3233
((ncols > 65536) &&
3334
((ctx.cuda_graph->instance == nullptr) && (iscapturing == cudaStreamCaptureStatusNone) ||
@@ -38,6 +39,9 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
3839
!((ctx.cuda_graph->instance == nullptr) && (iscapturing == cudaStreamCaptureStatusNone) ||
3940
ctx.cuda_graph->disable_due_to_gpu_arch || ctx.cuda_graph->disable_due_to_too_many_updates ||
4041
ctx.cuda_graph->disable_due_to_failed_graph_capture))) {
42+
#else
43+
(ncols > 65536)) {
44+
#endif // USE_CUDA_GRAPH
4145
// Single row - use device-wide reduction
4246
size_t tmp_size = 0;
4347
ggml_cuda_pool & pool = ctx.pool();
@@ -51,7 +55,7 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
5155
divide_by_count<float><<<1, 1, 0, stream>>>(dst_d, ncols);
5256
return;
5357
}
54-
#endif
58+
#endif // GGML_CUDA_USE_CUB
5559

5660
const dim3 block_nums(nrows, 1, 1);
5761

0 commit comments

Comments
 (0)