Skip to content
47 changes: 46 additions & 1 deletion include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ extern "C" {
GGML_OP_ROPE,
GGML_OP_ROPE_BACK,
GGML_OP_CLAMP,
GGML_OP_CLAMP_BACK,
GGML_OP_CONV_TRANSPOSE_1D,
GGML_OP_IM2COL,
GGML_OP_CONV_TRANSPOSE_2D,
Expand Down Expand Up @@ -535,10 +536,13 @@ extern "C" {
GGML_UNARY_OP_STEP,
GGML_UNARY_OP_TANH,
GGML_UNARY_OP_ELU,
GGML_UNARY_OP_ELU_BACK,
GGML_UNARY_OP_RELU,
GGML_UNARY_OP_SIGMOID,
GGML_UNARY_OP_GELU,
GGML_UNARY_OP_GELU_BACK,
GGML_UNARY_OP_GELU_QUICK,
GGML_UNARY_OP_GELU_QUICK_BACK,
GGML_UNARY_OP_SILU,
GGML_UNARY_OP_HARDSWISH,
GGML_UNARY_OP_HARDSIGMOID,
Expand Down Expand Up @@ -1074,6 +1078,14 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_elu_back(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_elu_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_relu(
struct ggml_context * ctx,
struct ggml_tensor * a);
Expand Down Expand Up @@ -1102,6 +1114,14 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_back(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_quick(
struct ggml_context * ctx,
struct ggml_tensor * a);
Expand All @@ -1110,6 +1130,14 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_quick_back(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_quick_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_silu(
struct ggml_context * ctx,
struct ggml_tensor * a);
Expand Down Expand Up @@ -1575,13 +1603,30 @@ extern "C" {
float beta_slow);

// clamp
// in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_clamp(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);

GGML_API struct ggml_tensor * ggml_clamp_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);

GGML_API struct ggml_tensor * ggml_clamp_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);

GGML_API struct ggml_tensor * ggml_clamp_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);

GGML_API struct ggml_tensor * ggml_im2col(
struct ggml_context * ctx,
struct ggml_tensor * a,
Expand Down
4 changes: 4 additions & 0 deletions src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2276,6 +2276,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
case GGML_OP_CLAMP:
ggml_cuda_op_clamp(ctx, dst);
break;
case GGML_OP_CLAMP_BACK:
ggml_cuda_op_clamp_back(ctx, dst);
break;
case GGML_OP_NONE:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
Expand Down Expand Up @@ -2868,6 +2871,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
case GGML_OP_SIN:
case GGML_OP_COS:
case GGML_OP_CLAMP:
case GGML_OP_CLAMP_BACK:
case GGML_OP_CONT:
case GGML_OP_DIAG_MASK_INF:
case GGML_OP_SOFT_MAX:
Expand Down
32 changes: 32 additions & 0 deletions src/ggml-cuda/clamp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,21 @@ static void clamp_f32_cuda(const float * x, float * dst, const float min, const
clamp_f32<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
}

static __global__ void clamp_back_f32(const float * x, float * dst, const float min, const float max, const int k) {
const int i = blockDim.x*blockIdx.x + threadIdx.x;

if (i >= k) {
return;
}

dst[i] = x[i] < min || x[i] > max ? 0.0f : 1.0f;
}

static void clamp_back_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) {
const int num_blocks = (k + CUDA_CLAMP_BACK_BLOCK_SIZE - 1) / CUDA_CLAMP_BACK_BLOCK_SIZE;
clamp_back_f32<<<num_blocks, CUDA_CLAMP_BACK_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
}


void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
Expand All @@ -32,3 +47,20 @@ void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
}

void ggml_cuda_op_clamp_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();

GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);

float min;
float max;
memcpy(&min, dst->op_params, sizeof(float));
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));

clamp_back_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
}
2 changes: 2 additions & 0 deletions src/ggml-cuda/clamp.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "common.cuh"

#define CUDA_CLAMP_BLOCK_SIZE 256
#define CUDA_CLAMP_BACK_BLOCK_SIZE 256

void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_clamp_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
Loading