@@ -307,6 +307,7 @@ struct ggml_backend_opencl_context {
307307 cl_program program_rope;
308308 cl_program program_scale;
309309 cl_program program_silu;
310+ cl_program program_sigmoid;
310311 cl_program program_softmax_f32;
311312 cl_program program_softmax_f16;
312313 cl_program program_softmax_4_f32;
@@ -323,6 +324,7 @@ struct ggml_backend_opencl_context {
323324 cl_kernel kernel_gelu, kernel_gelu_4;
324325 cl_kernel kernel_gelu_quick, kernel_gelu_quick_4;
325326 cl_kernel kernel_relu;
327+ cl_kernel kernel_sigmoid_f32, kernel_sigmoid_f16;
326328 cl_kernel kernel_clamp;
327329 cl_kernel kernel_norm;
328330 cl_kernel kernel_rms_norm;
@@ -1060,6 +1062,23 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
10601062 GGML_LOG_CONT (" ." );
10611063 }
10621064
1065+ // sigmoid
1066+ {
1067+ #ifdef GGML_OPENCL_EMBED_KERNELS
1068+ const std::string kernel_src {
1069+ #include " sigmoid.cl.h"
1070+ };
1071+ #else
1072+ const std::string kernel_src = read_file (" sigmoid.cl" );
1073+ #endif
1074+ backend_ctx->program_sigmoid =
1075+ build_program_from_source (backend_ctx->context , backend_ctx->device , kernel_src.c_str (), compile_opts);
1076+
1077+ CL_CHECK ((backend_ctx->kernel_sigmoid_f32 = clCreateKernel (backend_ctx->program_sigmoid , " kernel_sigmoid_f32" , &err), err));
1078+ CL_CHECK ((backend_ctx->kernel_sigmoid_f16 = clCreateKernel (backend_ctx->program_sigmoid , " kernel_sigmoid_f16" , &err), err));
1079+ GGML_LOG_CONT (" ." );
1080+ }
1081+
10631082 // Adreno kernels
10641083#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
10651084 // transpose
@@ -1939,7 +1958,9 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
19391958 case GGML_UNARY_OP_SILU:
19401959 case GGML_UNARY_OP_RELU:
19411960 case GGML_UNARY_OP_GELU_QUICK:
1942- return ggml_is_contiguous (op->src [0 ]) && op->src [0 ]->type == GGML_TYPE_F32;
1961+ return ggml_is_contiguous (op->src [0 ]) && op->src [0 ]->type == GGML_TYPE_F32;
1962+ case GGML_UNARY_OP_SIGMOID:
1963+ return ggml_is_contiguous (op->src [0 ]);
19431964 default :
19441965 return false ;
19451966 }
@@ -3759,6 +3780,58 @@ static void ggml_cl_relu(ggml_backend_t backend, const ggml_tensor * src0, const
37593780#endif
37603781}
37613782
3783+ static void ggml_cl_sigmoid (ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3784+ GGML_ASSERT (src0);
3785+ GGML_ASSERT (src0->extra );
3786+ GGML_ASSERT (dst);
3787+ GGML_ASSERT (dst->extra );
3788+
3789+ UNUSED (src1);
3790+
3791+ ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context ;
3792+ cl_command_queue queue = backend_ctx->queue ;
3793+
3794+ ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra ;
3795+ ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra ;
3796+
3797+ cl_ulong offset0 = extra0->offset + src0->view_offs ;
3798+ cl_ulong offsetd = extrad->offset + dst->view_offs ;
3799+
3800+ cl_kernel kernel;
3801+ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
3802+ kernel = backend_ctx->kernel_sigmoid_f32 ;
3803+ } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
3804+ kernel = backend_ctx->kernel_sigmoid_f16 ;
3805+ } else {
3806+ GGML_ASSERT (false && " Unsupported data types for sigmoid (input and output must be both f32 or f16)" );
3807+ }
3808+
3809+ CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
3810+ CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (cl_ulong), &offset0));
3811+ CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extrad->data_device ));
3812+ CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (cl_ulong), &offsetd));
3813+
3814+ const int64_t n = ggml_nelements (dst);
3815+
3816+ size_t global_work_size[] = {(size_t )n, 1 , 1 };
3817+ size_t local_work_size[] = {64 , 1 , 1 };
3818+
3819+ size_t * local_work_size_ptr = local_work_size;
3820+ if (n % 64 != 0 && !backend_ctx->non_uniform_workgroups ) {
3821+ local_work_size_ptr = nullptr ; // Let driver choose the work-group sizes.
3822+ }
3823+
3824+ #ifdef GGML_OPENCL_PROFILING
3825+ cl_event evt;
3826+ CL_CHECK (clEnqueueNDRangeKernel (queue, kernel, 3 , NULL , global_work_size, local_work_size_ptr, 0 , NULL , &evt));
3827+
3828+ g_profiling_info.emplace_back ();
3829+ populateProfilingInfo (g_profiling_info.back (), evt, kernel, global_work_size, local_work_size_ptr, dst);
3830+ #else
3831+ CL_CHECK (clEnqueueNDRangeKernel (queue, kernel, 3 , NULL , global_work_size, local_work_size_ptr, 0 , NULL , NULL ));
3832+ #endif
3833+ }
3834+
37623835static void ggml_cl_clamp (ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
37633836 GGML_ASSERT (src0);
37643837 GGML_ASSERT (src0->extra );
@@ -5509,6 +5582,12 @@ bool ggml_cl_compute_forward(ggml_backend_t backend, struct ggml_tensor * tensor
55095582 }
55105583 func = ggml_cl_relu;
55115584 break ;
5585+ case GGML_UNARY_OP_SIGMOID:
5586+ if (!any_on_device) {
5587+ return false ;
5588+ }
5589+ func = ggml_cl_sigmoid;
5590+ break ;
55125591 default :
55135592 return false ;
55145593 } break ;
0 commit comments