Skip to content

Commit 92a8738

Browse files
committed
add CUDA
1 parent e427af7 commit 92a8738

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

ggml/src/ggml-cuda/scale.cu

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
#include "scale.cuh"
22

3-
static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) {
3+
static __global__ void scale_f32(const float * x, float * dst, const float scale, const float bias, const int k) {
44
const int i = blockDim.x*blockIdx.x + threadIdx.x;
55

66
if (i >= k) {
77
return;
88
}
99

10-
dst[i] = scale * x[i];
10+
dst[i] = scale * x[i] + bias;
1111
}
1212

13-
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
13+
static void scale_f32_cuda(const float * x, float * dst, const float scale, const float bias, const int k, cudaStream_t stream) {
1414
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
15-
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
15+
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, bias, k);
1616
}
1717

1818
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
@@ -24,8 +24,8 @@ void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
2424
GGML_ASSERT(src0->type == GGML_TYPE_F32);
2525
GGML_ASSERT( dst->type == GGML_TYPE_F32);
2626

27-
float scale;
28-
memcpy(&scale, dst->op_params, sizeof(float));
27+
float scale = ((const float *)(dst->op_params))[0];
28+
float bias = ((const float *)(dst->op_params))[1];
2929

30-
scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream);
30+
scale_f32_cuda(src0_d, dst_d, scale, bias, ggml_nelements(src0), stream);
3131
}

0 commit comments

Comments
 (0)