Skip to content

Commit 30cb3f3

Browse files
committed
add back sum_rows_f32_cuda
1 parent 256b026 commit 30cb3f3

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

ggml/src/ggml-cuda/sumrows.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#include "sumrows.cuh"
22

3+
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) {
4+
const dim3 block_dims(WARP_SIZE, 1, 1);
5+
const dim3 block_nums(nrows, 1, 1);
6+
reduce_rows_f32</*norm*/false><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
7+
}
8+
39
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
410
const ggml_tensor * src0 = dst->src[0];
511
const float * src0_d = (const float *)src0->data;

ggml/src/ggml-cuda/sumrows.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
#include "common.cuh"
22

3+
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream);
34
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

0 commit comments

Comments
 (0)