Rename USE_CUB to GGML_CUDA_USE_CUB

ORippler · ORippler · commit 0e9a5d86dad6 · 2025-08-12T10:04:53.000+02:00
Suggested by @JohannesGaessler
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -88,7 +88,7 @@
 #define GGML_CUDA_CC_IS_NG(cc)       (cc >= GGML_CUDA_CC_NG)
 
 #if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
-#    define USE_CUB
+#    define GGML_CUDA_USE_CUB
 #endif  // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
 
 #ifdef __CUDA_ARCH_LIST__
diff --git a/ggml/src/ggml-cuda/mean.cu b/ggml/src/ggml-cuda/mean.cu
@@ -1,10 +1,10 @@
 #include "mean.cuh"
 #include "reduce_rows.cuh"
 
-#ifdef USE_CUB
+#ifdef GGML_CUDA_USE_CUB
 #    include <cub/cub.cuh>
 using namespace cub;
-#endif  // USE_CUB
+#endif  // GGML_CUDA_USE_CUB
 
 template <typename T> __global__ void divide_by_count(T * result, size_t count) {
     *result /= static_cast<T>(count);
@@ -24,7 +24,7 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     const int64_t nrows = ggml_nrows(src0);
 
 // Special case for reducing vectors
-#ifdef USE_CUB
+#ifdef GGML_CUDA_USE_CUB
     cudaStreamCaptureStatus iscapturing;
     CUDA_CHECK(cudaStreamIsCapturing(stream, &iscapturing));
     if ((nrows == 1) &&
diff --git a/ggml/src/ggml-cuda/sum.cu b/ggml/src/ggml-cuda/sum.cu
@@ -1,15 +1,15 @@
 #include "sum.cuh"
 #include "sumrows.cuh"
 
-#ifdef USE_CUB
+#ifdef GGML_CUDA_USE_CUB
 #    include <cub/cub.cuh>
 using namespace cub;
-#endif  // USE_CUB
+#endif  // GGML_CUDA_USE_CUB
 
 #include <cstdint>
 
 void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream) {
-#ifdef USE_CUB
+#ifdef GGML_CUDA_USE_CUB
     size_t tmp_size = 0;
     DeviceReduce::Sum(nullptr,       tmp_size, x, dst, ne, stream);
     ggml_cuda_pool_alloc<uint8_t> tmp_alloc(pool, tmp_size);
@@ -19,7 +19,7 @@ void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int
     // For AMD there is rocPRIM which could be used as a drop-in replacement via hipcub but this would require C++11 -> C++14.
     sum_rows_f32_cuda(x, dst, ne, 1, stream);
     GGML_UNUSED(pool);
-#endif // USE_CUB
+#endif // GGML_CUDA_USE_CUB
 }
 
 void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {