Skip to content

Commit 75faa5a

Browse files
hipuddingsafranowith
authored andcommitted
Add TRUNC unary op with SYCL support
1 parent 25f1045 commit 75faa5a

File tree

17 files changed

+155
-15
lines changed

17 files changed

+155
-15
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
---

docs/ops.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,5 @@ Legend:
100100
| SWIGLU ||||| 🟡 ||| 🟡 ||
101101
| TANH |||| 🟡 | 🟡 || 🟡 | 🟡 ||
102102
| TIMESTEP_EMBEDDING ||||||||||
103+
| TRUNC ||||||||||
103104
| UPSCALE || 🟡 ||| 🟡 || 🟡 |||

docs/ops/CPU.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name"
22
"CPU","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
33
"CPU","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
4+
"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
5+
"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
46
"CPU","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
57
"CPU","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
68
"CPU","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
@@ -61,6 +63,8 @@
6163
"CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
6264
"CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
6365
"CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
66+
"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
67+
"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6468
"CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
6569
"CPU","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6670
"CPU","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"

docs/ops/SYCL.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name"
22
"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
33
"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
4+
"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
5+
"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
46
"SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
57
"SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
68
"SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
@@ -61,6 +63,8 @@
6163
"SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
6264
"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
6365
"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
66+
"SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
67+
"SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
6468
"SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
6569
"SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
6670
"SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"

ggml/include/ggml.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ extern "C" {
559559

560560
enum ggml_unary_op {
561561
GGML_UNARY_OP_ABS,
562+
GGML_UNARY_OP_TRUNC,
562563
GGML_UNARY_OP_SGN,
563564
GGML_UNARY_OP_NEG,
564565
GGML_UNARY_OP_STEP,
@@ -1027,6 +1028,14 @@ extern "C" {
10271028
GGML_API struct ggml_tensor * ggml_abs_inplace(
10281029
struct ggml_context * ctx,
10291030
struct ggml_tensor * a);
1031+
1032+
GGML_API struct ggml_tensor * ggml_trunc(
1033+
struct ggml_context * ctx,
1034+
struct ggml_tensor * a);
1035+
1036+
GGML_API struct ggml_tensor * ggml_trunc_inplace(
1037+
struct ggml_context * ctx,
1038+
struct ggml_tensor * a);
10301039

10311040
GGML_API struct ggml_tensor * ggml_sgn(
10321041
struct ggml_context * ctx,

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,21 +1425,25 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
14251425
* @param start Starting exponent offset.
14261426
* @param stop Stopping exponent offset (exclusive).
14271427
* @param step Step size for the exponent increment.
1428+
* @param dtype Data type for slope tensor.
14281429
*/
14291430
static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_buffer,
1430-
float m, int64_t size, float start, float stop, float step){
1431+
float m, int64_t size, float start, float stop, float step, ggml_type dtype){
1432+
aclDataType acl_type = ggml_cann_type_mapping(dtype);
1433+
size_t type_size = ggml_type_size(dtype);
1434+
14311435
int64_t ne[] = {size};
1432-
size_t nb[] = {sizeof(uint16_t)};
1436+
size_t nb[] = {type_size};
14331437

1434-
ggml_cann_pool_alloc arange_allocator(ctx.pool(), size * sizeof(uint16_t));
1438+
ggml_cann_pool_alloc arange_allocator(ctx.pool(), size * type_size);
14351439
void* arange_buffer = arange_allocator.get();
14361440

14371441
aclTensor* arange_tensor = ggml_cann_create_tensor(
1438-
arange_buffer, ACL_FLOAT16, sizeof(uint16_t), ne, nb, 1);
1442+
arange_buffer, acl_type, type_size, ne, nb, 1);
14391443
aclnn_arange(ctx, arange_tensor, start, stop, step, size);
14401444

14411445
aclTensor* slope_tensor = ggml_cann_create_tensor(
1442-
slope_buffer, ACL_FLOAT16, sizeof(uint16_t), ne, nb, 1);
1446+
slope_buffer, acl_type, type_size, ne, nb, 1);
14431447

14441448
aclScalar* sc = aclCreateScalar(&m, aclDataType::ACL_FLOAT);
14451449

@@ -1470,10 +1474,11 @@ static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_bu
14701474
* @param n_head Total number of attention heads.
14711475
* @param slope_buffer Pointer to the output buffer (float array) for storing slopes.
14721476
* @param max_bias Maximum bias value for slope computation.
1477+
* @param dtype Data type for slope tensor.
14731478
*
14741479
*/
14751480
static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
1476-
void* slope_buffer, float max_bias) {
1481+
void* slope_buffer, float max_bias, ggml_type dtype) {
14771482
const int n_head_log2 = 1u << (uint32_t) floor(log2(n_head));
14781483

14791484
float m0 = powf(2.0f, -(max_bias) / n_head_log2);
@@ -1490,7 +1495,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
14901495
float step = 1;
14911496
float count = n_head_log2;
14921497
// end needs to be +1 because aclnn uses a left-closed, right-open interval.
1493-
aclnn_get_slope_inner(ctx, slope_buffer, m0, count, start, end + 1, step);
1498+
aclnn_get_slope_inner(ctx, slope_buffer, m0, count, start, end + 1, step, dtype);
14941499
if (n_head_log2 < n_head) {
14951500
// arange2
14961501
start = 2 * (n_head_log2 - n_head_log2) + 1;
@@ -1499,7 +1504,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
14991504
count = n_head - n_head_log2;
15001505
aclnn_get_slope_inner(
15011506
ctx, (char *) slope_buffer + n_head_log2 * sizeof(float),
1502-
m1, count, start, end + 1, step);
1507+
m1, count, start, end + 1, step, dtype);
15031508
}
15041509
}
15051510

@@ -1536,7 +1541,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
15361541
ggml_cann_pool_alloc bias_allocator(
15371542
ctx.pool(), ggml_nelements(dst) * ggml_element_size(dst));
15381543
bias_buffer = bias_allocator.get();
1539-
aclnn_get_slope(ctx, n_heads, slope_buffer, max_bias);
1544+
aclnn_get_slope(ctx, n_heads, slope_buffer, max_bias, GGML_TYPE_F32);
15401545
}
15411546

15421547
// broadcast for mask, slop and dst;
@@ -3269,7 +3274,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
32693274
const int64_t n_heads = src0->ne[2];
32703275
ggml_cann_pool_alloc slope_allocator(ctx.pool(), n_heads * sizeof(uint16_t));
32713276
void* slope_buffer = slope_allocator.get();
3272-
aclnn_get_slope(ctx, n_heads, slope_buffer, maxBias);
3277+
aclnn_get_slope(ctx, n_heads, slope_buffer, maxBias, GGML_TYPE_F16);
32733278

32743279
int64_t slope_ne[] = {1, 1, n_heads, 1};
32753280
size_t slope_nb[GGML_MAX_DIMS];

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,6 +2162,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
21622162
case GGML_OP_UNARY:
21632163
switch (ggml_get_unary_op(node)) {
21642164
case GGML_UNARY_OP_ABS:
2165+
case GGML_UNARY_OP_TRUNC:
21652166
case GGML_UNARY_OP_SGN:
21662167
case GGML_UNARY_OP_NEG:
21672168
case GGML_UNARY_OP_STEP:

ggml/src/ggml-cpu/ops.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9336,6 +9336,10 @@ void ggml_compute_forward_unary(
93369336
{
93379337
ggml_compute_forward_abs(params, dst);
93389338
} break;
9339+
case GGML_UNARY_OP_TRUNC:
9340+
{
9341+
ggml_compute_forward_trunc(params, dst);
9342+
} break;
93399343
case GGML_UNARY_OP_SGN:
93409344
{
93419345
ggml_compute_forward_sgn(params, dst);

ggml/src/ggml-cpu/unary-ops.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ static inline float op_abs(float x) {
44
return fabsf(x);
55
}
66

7+
static inline float op_trunc(float x) {
8+
return truncf(x);
9+
}
10+
711
static inline float op_sgn(float x) {
812
return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f);
913
}
@@ -125,6 +129,10 @@ void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor *
125129
unary_op<op_abs>(params, dst);
126130
}
127131

132+
void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) {
133+
unary_op<op_trunc>(params, dst);
134+
}
135+
128136
void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) {
129137
unary_op<op_sgn>(params, dst);
130138
}

ggml/src/ggml-cpu/unary-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ extern "C" {
77
#endif
88

99
void ggml_compute_forward_abs(const struct ggml_compute_params * params, struct ggml_tensor * dst);
10+
void ggml_compute_forward_trunc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
1011
void ggml_compute_forward_sgn(const struct ggml_compute_params * params, struct ggml_tensor * dst);
1112
void ggml_compute_forward_neg(const struct ggml_compute_params * params, struct ggml_tensor * dst);
1213
void ggml_compute_forward_step(const struct ggml_compute_params * params, struct ggml_tensor * dst);

0 commit comments

Comments
 (0)