Skip to content

Commit 0e9a5d8

Browse files
committed
Rename USE_CUB to GGML_CUDA_USE_CUB
Suggested by @JohannesGaessler
1 parent e8373bf commit 0e9a5d8

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
#define GGML_CUDA_CC_IS_NG(cc) (cc >= GGML_CUDA_CC_NG)
8989

9090
#if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
91-
# define USE_CUB
91+
# define GGML_CUDA_USE_CUB
9292
#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
9393

9494
#ifdef __CUDA_ARCH_LIST__

ggml/src/ggml-cuda/mean.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#include "mean.cuh"
22
#include "reduce_rows.cuh"
33

4-
#ifdef USE_CUB
4+
#ifdef GGML_CUDA_USE_CUB
55
# include <cub/cub.cuh>
66
using namespace cub;
7-
#endif // USE_CUB
7+
#endif // GGML_CUDA_USE_CUB
88

99
template <typename T> __global__ void divide_by_count(T * result, size_t count) {
1010
*result /= static_cast<T>(count);
@@ -24,7 +24,7 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
2424
const int64_t nrows = ggml_nrows(src0);
2525

2626
// Special case for reducing vectors
27-
#ifdef USE_CUB
27+
#ifdef GGML_CUDA_USE_CUB
2828
cudaStreamCaptureStatus iscapturing;
2929
CUDA_CHECK(cudaStreamIsCapturing(stream, &iscapturing));
3030
if ((nrows == 1) &&

ggml/src/ggml-cuda/sum.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#include "sum.cuh"
22
#include "sumrows.cuh"
33

4-
#ifdef USE_CUB
4+
#ifdef GGML_CUDA_USE_CUB
55
# include <cub/cub.cuh>
66
using namespace cub;
7-
#endif // USE_CUB
7+
#endif // GGML_CUDA_USE_CUB
88

99
#include <cstdint>
1010

1111
void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream) {
12-
#ifdef USE_CUB
12+
#ifdef GGML_CUDA_USE_CUB
1313
size_t tmp_size = 0;
1414
DeviceReduce::Sum(nullptr, tmp_size, x, dst, ne, stream);
1515
ggml_cuda_pool_alloc<uint8_t> tmp_alloc(pool, tmp_size);
@@ -19,7 +19,7 @@ void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int
1919
// For AMD there is rocPRIM which could be used as a drop-in replacement via hipcub but this would require C++11 -> C++14.
2020
sum_rows_f32_cuda(x, dst, ne, 1, stream);
2121
GGML_UNUSED(pool);
22-
#endif // USE_CUB
22+
#endif // GGML_CUDA_USE_CUB
2323
}
2424

2525
void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

0 commit comments

Comments
 (0)