@@ -85,6 +85,22 @@ static __device__ __forceinline__ float op_elu(float x) {
8585 return (x > 0 .f ) ? x : expm1f (x);
8686}
8787
88+ static __device__ __forceinline__ float op_floor (float x) {
89+ return floorf (x);
90+ }
91+
92+ static __device__ __forceinline__ float op_ceil (float x) {
93+ return ceilf (x);
94+ }
95+
96+ static __device__ __forceinline__ float op_round (float x) {
97+ return round (x);
98+ }
99+
100+ static __device__ __forceinline__ float op_trunc (float x) {
101+ return trunc (x);
102+ }
103+
88104template <float (*op)(float ), typename T>
89105static __global__ void unary_op_kernel (const T * x, T * dst, const int k) {
90106 const int i = blockDim .x *blockIdx .x + threadIdx .x ;
@@ -201,6 +217,22 @@ void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
201217void ggml_cuda_op_elu (ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
202218 ggml_cuda_op_unary<op_elu>(ctx, dst);
203219}
220+
221+ void ggml_cuda_op_floor (ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
222+ ggml_cuda_op_unary<op_floor>(ctx, dst);
223+ }
224+
225+ void ggml_cuda_op_ceil (ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
226+ ggml_cuda_op_unary<op_ceil>(ctx, dst);
227+ }
228+
229+ void ggml_cuda_op_round (ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
230+ ggml_cuda_op_unary<op_round>(ctx, dst);
231+ }
232+
233+ void ggml_cuda_op_trunc (ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
234+ ggml_cuda_op_unary<op_trunc>(ctx, dst);
235+ }
204236/* gated ops */
205237
206238template <float (*op)(float ), typename T>
0 commit comments