Skip to content

Commit 51040ed

Browse files
committed
CPU: Add support for FLOOR,CEIL,ROUND and TRUNC unary operators
- Added the operators to unary op enum - Implemented API functions - Implemented forward and unary-op logic in CPU backend - Updated ggml_get_n_tasks - Updated operators names array and static_assert - Updated docs and enabled automatic tests
1 parent e58174c commit 51040ed

File tree

8 files changed

+179
-1
lines changed

8 files changed

+179
-1
lines changed

docs/ops.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Legend:
2222
| ARANGE ||||||||||
2323
| ARGMAX ||||||||||
2424
| ARGSORT ||||||||||
25+
| CEIL ||||||||||
2526
| CLAMP ||||| 🟡 | 🟡 || 🟡 ||
2627
| CONCAT |||| 🟡 || 🟡 | 🟡 |||
2728
| CONT || 🟡 |||| 🟡 | 🟡 | 🟡 ||
@@ -41,6 +42,7 @@ Legend:
4142
| ELU |||| 🟡 | 🟡 || 🟡 |||
4243
| EXP |||| 🟡 | 🟡 || 🟡 |||
4344
| FLASH_ATTN_EXT || 🟡 || 🟡 | 🟡 ||| 🟡 ||
45+
| FLOOR ||||||||||
4446
| GATED_LINEAR_ATTN ||||||||||
4547
| GEGLU ||||| 🟡 ||| 🟡 ||
4648
| GEGLU_ERF ||||| 🟡 ||| 🟡 ||
@@ -82,6 +84,7 @@ Legend:
8284
| ROLL ||||||||||
8385
| ROPE || 🟡 ||||||||
8486
| ROPE_BACK ||||||||||
87+
| ROUND ||||||||||
8588
| RWKV_WKV6 ||||||||||
8689
| RWKV_WKV7 ||||||||||
8790
| SCALE || 🟡 ||||||||
@@ -107,4 +110,5 @@ Legend:
107110
| SWIGLU_OAI ||||||||||
108111
| TANH |||| 🟡 | 🟡 || 🟡 | 🟡 ||
109112
| TIMESTEP_EMBEDDING ||||||||||
113+
| TRUNC ||||||||||
110114
| UPSCALE || 🟡 ||| 🟡 || 🟡 |||

docs/ops/CPU.csv

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@
5959
"CPU","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
6060
"CPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
6161
"CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
62+
"CPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
63+
"CPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
64+
"CPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
65+
"CPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
66+
"CPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
67+
"CPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
68+
"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
69+
"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6270
"CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
6371
"CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6472
"CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
@@ -119,6 +127,14 @@
119127
"CPU","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
120128
"CPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
121129
"CPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
130+
"CPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
131+
"CPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
132+
"CPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
133+
"CPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
134+
"CPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
135+
"CPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
136+
"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
137+
"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
122138
"CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CPU"
123139
"CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CPU"
124140
"CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","CPU"

ggml/include/ggml.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,11 @@ extern "C" {
574574
GGML_UNARY_OP_HARDSIGMOID,
575575
GGML_UNARY_OP_EXP,
576576
GGML_UNARY_OP_GELU_ERF,
577+
GGML_UNARY_OP_FLOOR,
578+
GGML_UNARY_OP_CEIL,
579+
GGML_UNARY_OP_ROUND,
580+
GGML_UNARY_OP_TRUNC,
581+
577582

578583
GGML_UNARY_OP_COUNT,
579584
};
@@ -1148,6 +1153,40 @@ extern "C" {
11481153
struct ggml_context * ctx,
11491154
struct ggml_tensor * a);
11501155

1156+
GGML_API struct ggml_tensor * ggml_floor(
1157+
struct ggml_context * ctx,
1158+
struct ggml_tensor * a);
1159+
1160+
GGML_API struct ggml_tensor * ggml_floor_inplace(
1161+
struct ggml_context * ctx,
1162+
struct ggml_tensor * a);
1163+
1164+
GGML_API struct ggml_tensor * ggml_ceil(
1165+
struct ggml_context * ctx,
1166+
struct ggml_tensor * a);
1167+
1168+
GGML_API struct ggml_tensor * ggml_ceil_inplace(
1169+
struct ggml_context * ctx,
1170+
struct ggml_tensor * a);
1171+
1172+
GGML_API struct ggml_tensor * ggml_round(
1173+
struct ggml_context * ctx,
1174+
struct ggml_tensor * a);
1175+
1176+
GGML_API struct ggml_tensor * ggml_round_inplace(
1177+
struct ggml_context * ctx,
1178+
struct ggml_tensor * a);
1179+
1180+
GGML_API struct ggml_tensor * ggml_trunc(
1181+
struct ggml_context * ctx,
1182+
struct ggml_tensor * a);
1183+
1184+
GGML_API struct ggml_tensor * ggml_trunc_inplace(
1185+
struct ggml_context * ctx,
1186+
struct ggml_tensor * a);
1187+
1188+
1189+
11511190
// gated linear unit ops
11521191
// A: n columns, r rows,
11531192
// result is n / 2 columns, r rows,

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,6 +2179,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
21792179
case GGML_UNARY_OP_HARDSWISH:
21802180
case GGML_UNARY_OP_HARDSIGMOID:
21812181
case GGML_UNARY_OP_EXP:
2182+
case GGML_UNARY_OP_FLOOR:
2183+
case GGML_UNARY_OP_CEIL:
2184+
case GGML_UNARY_OP_ROUND:
2185+
case GGML_UNARY_OP_TRUNC:
21822186
{
21832187
n_tasks = 1;
21842188
} break;

ggml/src/ggml-cpu/ops.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9766,6 +9766,22 @@ void ggml_compute_forward_unary(
97669766
{
97679767
ggml_compute_forward_exp(params, dst);
97689768
} break;
9769+
case GGML_UNARY_OP_FLOOR:
9770+
{
9771+
ggml_compute_forward_floor(params, dst);
9772+
} break;
9773+
case GGML_UNARY_OP_CEIL:
9774+
{
9775+
ggml_compute_forward_ceil(params, dst);
9776+
} break;
9777+
case GGML_UNARY_OP_ROUND:
9778+
{
9779+
ggml_compute_forward_round(params, dst);
9780+
} break;
9781+
case GGML_UNARY_OP_TRUNC:
9782+
{
9783+
ggml_compute_forward_trunc(params, dst);
9784+
} break;
97699785
default:
97709786
{
97719787
GGML_ABORT("fatal error");

ggml/src/ggml-cpu/unary-ops.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,22 @@ static inline float op_log(float x) {
6464
return logf(x);
6565
}
6666

67+
static inline float op_floor(float x) {
68+
return floorf(x);
69+
}
70+
71+
static inline float op_ceil(float x) {
72+
return ceilf(x);
73+
}
74+
75+
static inline float op_round(float x) {
76+
return roundf(x);
77+
}
78+
79+
static inline float op_trunc(float x) {
80+
return truncf(x);
81+
}
82+
6783
template <float (*op)(float), typename src0_t, typename dst_t>
6884
static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) {
6985
constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
@@ -184,3 +200,19 @@ void ggml_compute_forward_cos(const ggml_compute_params * params, ggml_tensor *
184200
void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor * dst) {
185201
unary_op<op_log>(params, dst);
186202
}
203+
204+
void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
205+
unary_op<op_floor>(params, dst);
206+
}
207+
208+
void ggml_compute_forward_ceil(const ggml_compute_params * params, ggml_tensor * dst) {
209+
unary_op<op_ceil>(params, dst);
210+
}
211+
212+
void ggml_compute_forward_round(const ggml_compute_params * params, ggml_tensor * dst) {
213+
unary_op<op_round>(params, dst);
214+
}
215+
216+
void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) {
217+
unary_op<op_trunc>(params, dst);
218+
}

ggml/src/ggml-cpu/unary-ops.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct
2222
void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst);
2323
void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
2424
void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst);
25+
void ggml_compute_forward_floor(const struct ggml_compute_params * params, struct ggml_tensor * dst);
26+
void ggml_compute_forward_ceil(const struct ggml_compute_params * params, struct ggml_tensor * dst);
27+
void ggml_compute_forward_round(const struct ggml_compute_params * params, struct ggml_tensor * dst);
28+
void ggml_compute_forward_trunc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
29+
30+
31+
2532

2633
#ifdef __cplusplus
2734
}

ggml/src/ggml.c

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1143,9 +1143,13 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
11431143
"HARDSIGMOID",
11441144
"EXP",
11451145
"GELU_ERF",
1146+
"FLOOR",
1147+
"CEIL",
1148+
"ROUND",
1149+
"TRUNC",
11461150
};
11471151

1148-
static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
1152+
static_assert(GGML_UNARY_OP_COUNT == 19, "GGML_UNARY_OP_COUNT != 19");
11491153

11501154

11511155
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
@@ -2726,6 +2730,62 @@ static struct ggml_tensor * ggml_glu_impl(
27262730
return result;
27272731
}
27282732

2733+
// ggml_floor
2734+
2735+
struct ggml_tensor * ggml_floor(
2736+
struct ggml_context * ctx,
2737+
struct ggml_tensor * a) {
2738+
return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2739+
}
2740+
2741+
struct ggml_tensor * ggml_floor_inplace(
2742+
struct ggml_context * ctx,
2743+
struct ggml_tensor * a) {
2744+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2745+
}
2746+
2747+
// ggml_ceil
2748+
2749+
struct ggml_tensor * ggml_ceil(
2750+
struct ggml_context * ctx,
2751+
struct ggml_tensor * a) {
2752+
return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2753+
}
2754+
2755+
struct ggml_tensor * ggml_ceil_inplace(
2756+
struct ggml_context * ctx,
2757+
struct ggml_tensor * a) {
2758+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2759+
}
2760+
2761+
//ggml_round
2762+
2763+
struct ggml_tensor * ggml_round(
2764+
struct ggml_context * ctx,
2765+
struct ggml_tensor * a) {
2766+
return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2767+
}
2768+
2769+
struct ggml_tensor * ggml_round_inplace(
2770+
struct ggml_context * ctx,
2771+
struct ggml_tensor * a) {
2772+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2773+
}
2774+
2775+
//ggml_trunc
2776+
2777+
struct ggml_tensor * ggml_trunc(
2778+
struct ggml_context * ctx,
2779+
struct ggml_tensor * a) {
2780+
return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2781+
}
2782+
2783+
struct ggml_tensor * ggml_trunc_inplace(
2784+
struct ggml_context * ctx,
2785+
struct ggml_tensor * a) {
2786+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2787+
}
2788+
27292789
struct ggml_tensor * ggml_glu(
27302790
struct ggml_context * ctx,
27312791
struct ggml_tensor * a,

0 commit comments

Comments
 (0)