Skip to content

Commit 402858f

Browse files
committed
add back sum_rows_f32_cuda
1 parent 256b026 commit 402858f

File tree

4 files changed

+15
-8
lines changed

4 files changed

+15
-8
lines changed

ggml/src/ggml-cuda/mean.cu

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
#include "mean.cuh"
22

33
void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
4-
const ggml_tensor * src0 = dst->src[0];
5-
const float * src0_d = (const float *)src0->data;
6-
float * dst_d = (float *)dst->data;
7-
cudaStream_t stream = ctx.stream();
4+
const ggml_tensor * src0 = dst->src[0];
5+
const float * src0_d = (const float *) src0->data;
6+
float * dst_d = (float *) dst->data;
7+
cudaStream_t stream = ctx.stream();
88

99
GGML_ASSERT(src0->type == GGML_TYPE_F32);
10-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
10+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
1111
GGML_ASSERT(ggml_is_contiguous(src0));
1212

1313
const int64_t ncols = src0->ne[0];
1414
const int64_t nrows = ggml_nrows(src0);
1515

1616
const dim3 block_dims(WARP_SIZE, 1, 1);
1717
const dim3 block_nums(nrows, 1, 1);
18-
reduce_rows_f32</*norm*/true><<<block_nums, block_dims, 0, stream>>>(src0_d, dst_d, ncols);
19-
}
18+
reduce_rows_f32</*norm*/ true><<<block_nums, block_dims, 0, stream>>>(src0_d, dst_d, ncols);
19+
}

ggml/src/ggml-cuda/mean.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
#include "common.cuh"
22

3-
void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
3+
void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

ggml/src/ggml-cuda/sumrows.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#include "sumrows.cuh"
22

3+
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) {
4+
const dim3 block_dims(WARP_SIZE, 1, 1);
5+
const dim3 block_nums(nrows, 1, 1);
6+
reduce_rows_f32</*norm*/false><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
7+
}
8+
39
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
410
const ggml_tensor * src0 = dst->src[0];
511
const float * src0_d = (const float *)src0->data;

ggml/src/ggml-cuda/sumrows.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
#include "common.cuh"
22

3+
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream);
34
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

0 commit comments

Comments
 (0)