Skip to content

Commit 466c191

Browse files
authored
cpu : add FLOOR, CEIL, ROUND and TRUNC unary operators (ggml-org#16083)
* CPU: Add support for FLOOR,CEIL,ROUND and TRUNC unary operators - Added the operators to unary op enum - Implemented API functions - Implemented forward and unary-op logic in CPU backend - Updated ggml_get_n_tasks - Updated operators names array and static_assert - Updated docs and enabled automatic tests * docs: add documentation for ggml_trunc and ggml_trunc_inplace in ggml.h * chore: remove trailing whitespace from ggml.h * Remove unresolved merge markers * Apply review suggestions: cleanup formatting, enum order and leftover artifacts * Regenerate ops.md using create_ops_docs.py
1 parent 0cb7a06 commit 466c191

File tree

8 files changed

+181
-1
lines changed

8 files changed

+181
-1
lines changed

docs/ops.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Legend:
2222
| ARANGE ||||||||||
2323
| ARGMAX ||||||||||
2424
| ARGSORT ||||||||||
25+
| CEIL ||||||||||
2526
| CLAMP ||||| 🟡 | 🟡 || 🟡 ||
2627
| CONCAT |||| 🟡 || 🟡 | 🟡 |||
2728
| CONT || 🟡 |||| 🟡 | 🟡 | 🟡 ||
@@ -41,6 +42,7 @@ Legend:
4142
| ELU |||| 🟡 | 🟡 || 🟡 |||
4243
| EXP |||| 🟡 | 🟡 || 🟡 |||
4344
| FLASH_ATTN_EXT || 🟡 || 🟡 | 🟡 ||| 🟡 ||
45+
| FLOOR ||||||||||
4446
| GATED_LINEAR_ATTN ||||||||||
4547
| GEGLU ||||| 🟡 ||| 🟡 ||
4648
| GEGLU_ERF ||||| 🟡 ||| 🟡 ||
@@ -82,6 +84,7 @@ Legend:
8284
| ROLL ||||||||||
8385
| ROPE || 🟡 ||||||||
8486
| ROPE_BACK ||||||||||
87+
| ROUND ||||||||||
8588
| RWKV_WKV6 ||||||||||
8689
| RWKV_WKV7 ||||||||||
8790
| SCALE || 🟡 ||||||||
@@ -108,5 +111,6 @@ Legend:
108111
| TANH |||| 🟡 | 🟡 || 🟡 | 🟡 ||
109112
| TIMESTEP_EMBEDDING ||||||||||
110113
| TOPK_MOE ||||||||||
114+
| TRUNC ||||||||||
111115
| UPSCALE || 🟡 ||| 🟡 || 🟡 |||
112116
| XIELU ||||||||||

docs/ops/CPU.csv

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@
5959
"CPU","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
6060
"CPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
6161
"CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
62+
"CPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
63+
"CPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
64+
"CPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
65+
"CPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
66+
"CPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
67+
"CPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
68+
"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
69+
"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6270
"CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
6371
"CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6472
"CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
@@ -119,6 +127,14 @@
119127
"CPU","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
120128
"CPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
121129
"CPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
130+
"CPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
131+
"CPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
132+
"CPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
133+
"CPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
134+
"CPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
135+
"CPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
136+
"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
137+
"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
122138
"CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CPU"
123139
"CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CPU"
124140
"CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","CPU"

ggml/include/ggml.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,10 @@ extern "C" {
577577
GGML_UNARY_OP_EXP,
578578
GGML_UNARY_OP_GELU_ERF,
579579
GGML_UNARY_OP_XIELU,
580+
GGML_UNARY_OP_FLOOR,
581+
GGML_UNARY_OP_CEIL,
582+
GGML_UNARY_OP_ROUND,
583+
GGML_UNARY_OP_TRUNC,
580584

581585
GGML_UNARY_OP_COUNT,
582586
};
@@ -1151,6 +1155,46 @@ extern "C" {
11511155
struct ggml_context * ctx,
11521156
struct ggml_tensor * a);
11531157

1158+
GGML_API struct ggml_tensor * ggml_floor(
1159+
struct ggml_context * ctx,
1160+
struct ggml_tensor * a);
1161+
1162+
GGML_API struct ggml_tensor * ggml_floor_inplace(
1163+
struct ggml_context * ctx,
1164+
struct ggml_tensor * a);
1165+
1166+
GGML_API struct ggml_tensor * ggml_ceil(
1167+
struct ggml_context * ctx,
1168+
struct ggml_tensor * a);
1169+
1170+
GGML_API struct ggml_tensor * ggml_ceil_inplace(
1171+
struct ggml_context * ctx,
1172+
struct ggml_tensor * a);
1173+
1174+
GGML_API struct ggml_tensor * ggml_round(
1175+
struct ggml_context * ctx,
1176+
struct ggml_tensor * a);
1177+
1178+
GGML_API struct ggml_tensor * ggml_round_inplace(
1179+
struct ggml_context * ctx,
1180+
struct ggml_tensor * a);
1181+
1182+
/**
1183+
* Truncates the fractional part of each element in the tensor (towards zero).
1184+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
1185+
* Similar to std::trunc in C/C++.
1186+
*/
1187+
1188+
GGML_API struct ggml_tensor * ggml_trunc(
1189+
struct ggml_context * ctx,
1190+
struct ggml_tensor * a);
1191+
1192+
GGML_API struct ggml_tensor * ggml_trunc_inplace(
1193+
struct ggml_context * ctx,
1194+
struct ggml_tensor * a);
1195+
1196+
1197+
11541198
// xIELU activation function
11551199
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
11561200
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,6 +2184,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
21842184
case GGML_UNARY_OP_HARDSWISH:
21852185
case GGML_UNARY_OP_HARDSIGMOID:
21862186
case GGML_UNARY_OP_EXP:
2187+
case GGML_UNARY_OP_FLOOR:
2188+
case GGML_UNARY_OP_CEIL:
2189+
case GGML_UNARY_OP_ROUND:
2190+
case GGML_UNARY_OP_TRUNC:
21872191
{
21882192
n_tasks = 1;
21892193
} break;

ggml/src/ggml-cpu/ops.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8993,6 +8993,22 @@ void ggml_compute_forward_unary(
89938993
{
89948994
ggml_compute_forward_exp(params, dst);
89958995
} break;
8996+
case GGML_UNARY_OP_FLOOR:
8997+
{
8998+
ggml_compute_forward_floor(params, dst);
8999+
} break;
9000+
case GGML_UNARY_OP_CEIL:
9001+
{
9002+
ggml_compute_forward_ceil(params, dst);
9003+
} break;
9004+
case GGML_UNARY_OP_ROUND:
9005+
{
9006+
ggml_compute_forward_round(params, dst);
9007+
} break;
9008+
case GGML_UNARY_OP_TRUNC:
9009+
{
9010+
ggml_compute_forward_trunc(params, dst);
9011+
} break;
89969012
case GGML_UNARY_OP_XIELU:
89979013
{
89989014
ggml_compute_forward_xielu(params, dst);

ggml/src/ggml-cpu/unary-ops.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,22 @@ static inline float op_log(float x) {
7373
return logf(x);
7474
}
7575

76+
static inline float op_floor(float x) {
77+
return floorf(x);
78+
}
79+
80+
static inline float op_ceil(float x) {
81+
return ceilf(x);
82+
}
83+
84+
static inline float op_round(float x) {
85+
return roundf(x);
86+
}
87+
88+
static inline float op_trunc(float x) {
89+
return truncf(x);
90+
}
91+
7692
template <float (*op)(float), typename src0_t, typename dst_t>
7793
static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) {
7894
constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
@@ -274,6 +290,22 @@ void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor *
274290
unary_op<op_log>(params, dst);
275291
}
276292

293+
void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
294+
unary_op<op_floor>(params, dst);
295+
}
296+
297+
void ggml_compute_forward_ceil(const ggml_compute_params * params, ggml_tensor * dst) {
298+
unary_op<op_ceil>(params, dst);
299+
}
300+
301+
void ggml_compute_forward_round(const ggml_compute_params * params, ggml_tensor * dst) {
302+
unary_op<op_round>(params, dst);
303+
}
304+
305+
void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) {
306+
unary_op<op_trunc>(params, dst);
307+
}
308+
277309
void ggml_compute_forward_xielu(const ggml_compute_params * params, ggml_tensor * dst) {
278310
const float alpha_n = ggml_get_op_params_f32(dst, 1);
279311
const float alpha_p = ggml_get_op_params_f32(dst, 2);

ggml/src/ggml-cpu/unary-ops.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct
2222
void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst);
2323
void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
2424
void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst);
25+
void ggml_compute_forward_floor(const struct ggml_compute_params * params, struct ggml_tensor * dst);
26+
void ggml_compute_forward_ceil(const struct ggml_compute_params * params, struct ggml_tensor * dst);
27+
void ggml_compute_forward_round(const struct ggml_compute_params * params, struct ggml_tensor * dst);
28+
void ggml_compute_forward_trunc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
2529
void ggml_compute_forward_xielu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
2630

2731
#ifdef __cplusplus

ggml/src/ggml.c

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1144,9 +1144,13 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
11441144
"EXP",
11451145
"GELU_ERF",
11461146
"XIELU",
1147+
"FLOOR",
1148+
"CEIL",
1149+
"ROUND",
1150+
"TRUNC",
11471151
};
11481152

1149-
static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16");
1153+
static_assert(GGML_UNARY_OP_COUNT == 20, "GGML_UNARY_OP_COUNT != 20");
11501154

11511155
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
11521156
"REGLU",
@@ -2749,6 +2753,62 @@ static struct ggml_tensor * ggml_glu_impl(
27492753
return result;
27502754
}
27512755

2756+
// ggml_floor
2757+
2758+
struct ggml_tensor * ggml_floor(
2759+
struct ggml_context * ctx,
2760+
struct ggml_tensor * a) {
2761+
return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2762+
}
2763+
2764+
struct ggml_tensor * ggml_floor_inplace(
2765+
struct ggml_context * ctx,
2766+
struct ggml_tensor * a) {
2767+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2768+
}
2769+
2770+
// ggml_ceil
2771+
2772+
struct ggml_tensor * ggml_ceil(
2773+
struct ggml_context * ctx,
2774+
struct ggml_tensor * a) {
2775+
return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2776+
}
2777+
2778+
struct ggml_tensor * ggml_ceil_inplace(
2779+
struct ggml_context * ctx,
2780+
struct ggml_tensor * a) {
2781+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2782+
}
2783+
2784+
//ggml_round
2785+
2786+
struct ggml_tensor * ggml_round(
2787+
struct ggml_context * ctx,
2788+
struct ggml_tensor * a) {
2789+
return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2790+
}
2791+
2792+
struct ggml_tensor * ggml_round_inplace(
2793+
struct ggml_context * ctx,
2794+
struct ggml_tensor * a) {
2795+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2796+
}
2797+
2798+
//ggml_trunc
2799+
2800+
struct ggml_tensor * ggml_trunc(
2801+
struct ggml_context * ctx,
2802+
struct ggml_tensor * a) {
2803+
return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2804+
}
2805+
2806+
struct ggml_tensor * ggml_trunc_inplace(
2807+
struct ggml_context * ctx,
2808+
struct ggml_tensor * a) {
2809+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2810+
}
2811+
27522812
struct ggml_tensor * ggml_glu(
27532813
struct ggml_context * ctx,
27542814
struct ggml_tensor * a,

0 commit comments

Comments
 (0)