Skip to content

Commit 820e045

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 4f34a07 + 7a50cf3 commit 820e045

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+3454
-2430
lines changed

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17601760
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
17611761
add_opt(common_arg(
17621762
{"-t", "--threads"}, "N",
1763-
string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
1763+
string_format("number of CPU threads to use during generation (default: %d)", params.cpuparams.n_threads),
17641764
[](common_params & params, int value) {
17651765
params.cpuparams.n_threads = value;
17661766
if (params.cpuparams.n_threads <= 0) {

docs/ops.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Legend:
2222
| ARANGE ||||||||||
2323
| ARGMAX ||||||||||
2424
| ARGSORT ||||||||||
25+
| CEIL ||||||||||
2526
| CLAMP ||||| 🟡 | 🟡 || 🟡 ||
2627
| CONCAT |||| 🟡 || 🟡 | 🟡 |||
2728
| CONT || 🟡 |||| 🟡 | 🟡 | 🟡 ||
@@ -41,6 +42,7 @@ Legend:
4142
| ELU |||| 🟡 | 🟡 || 🟡 |||
4243
| EXP |||| 🟡 | 🟡 || 🟡 |||
4344
| FLASH_ATTN_EXT || 🟡 || 🟡 | 🟡 ||| 🟡 ||
45+
| FLOOR ||||||||||
4446
| GATED_LINEAR_ATTN ||||||||||
4547
| GEGLU ||||| 🟡 ||| 🟡 ||
4648
| GEGLU_ERF ||||| 🟡 ||| 🟡 ||
@@ -82,6 +84,7 @@ Legend:
8284
| ROLL ||||||||||
8385
| ROPE || 🟡 ||||||||
8486
| ROPE_BACK ||||||||||
87+
| ROUND ||||||||||
8588
| RWKV_WKV6 ||||||||||
8689
| RWKV_WKV7 ||||||||||
8790
| SCALE || 🟡 ||||||||
@@ -108,5 +111,6 @@ Legend:
108111
| TANH |||| 🟡 | 🟡 || 🟡 | 🟡 ||
109112
| TIMESTEP_EMBEDDING ||||||||||
110113
| TOPK_MOE ||||||||||
114+
| TRUNC ||||||||||
111115
| UPSCALE || 🟡 ||| 🟡 || 🟡 |||
112116
| XIELU ||||||||||

docs/ops/CPU.csv

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@
5959
"CPU","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
6060
"CPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
6161
"CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
62+
"CPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
63+
"CPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
64+
"CPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
65+
"CPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
66+
"CPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
67+
"CPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
68+
"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
69+
"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6270
"CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
6371
"CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6472
"CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
@@ -119,6 +127,14 @@
119127
"CPU","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
120128
"CPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
121129
"CPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
130+
"CPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
131+
"CPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
132+
"CPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
133+
"CPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
134+
"CPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
135+
"CPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
136+
"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
137+
"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
122138
"CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CPU"
123139
"CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CPU"
124140
"CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","CPU"

ggml/include/ggml.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,10 @@ extern "C" {
577577
GGML_UNARY_OP_EXP,
578578
GGML_UNARY_OP_GELU_ERF,
579579
GGML_UNARY_OP_XIELU,
580+
GGML_UNARY_OP_FLOOR,
581+
GGML_UNARY_OP_CEIL,
582+
GGML_UNARY_OP_ROUND,
583+
GGML_UNARY_OP_TRUNC,
580584

581585
GGML_UNARY_OP_COUNT,
582586
};
@@ -1151,6 +1155,46 @@ extern "C" {
11511155
struct ggml_context * ctx,
11521156
struct ggml_tensor * a);
11531157

1158+
GGML_API struct ggml_tensor * ggml_floor(
1159+
struct ggml_context * ctx,
1160+
struct ggml_tensor * a);
1161+
1162+
GGML_API struct ggml_tensor * ggml_floor_inplace(
1163+
struct ggml_context * ctx,
1164+
struct ggml_tensor * a);
1165+
1166+
GGML_API struct ggml_tensor * ggml_ceil(
1167+
struct ggml_context * ctx,
1168+
struct ggml_tensor * a);
1169+
1170+
GGML_API struct ggml_tensor * ggml_ceil_inplace(
1171+
struct ggml_context * ctx,
1172+
struct ggml_tensor * a);
1173+
1174+
GGML_API struct ggml_tensor * ggml_round(
1175+
struct ggml_context * ctx,
1176+
struct ggml_tensor * a);
1177+
1178+
GGML_API struct ggml_tensor * ggml_round_inplace(
1179+
struct ggml_context * ctx,
1180+
struct ggml_tensor * a);
1181+
1182+
/**
1183+
* Truncates the fractional part of each element in the tensor (towards zero).
1184+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
1185+
* Similar to std::trunc in C/C++.
1186+
*/
1187+
1188+
GGML_API struct ggml_tensor * ggml_trunc(
1189+
struct ggml_context * ctx,
1190+
struct ggml_tensor * a);
1191+
1192+
GGML_API struct ggml_tensor * ggml_trunc_inplace(
1193+
struct ggml_context * ctx,
1194+
struct ggml_tensor * a);
1195+
1196+
1197+
11541198
// xIELU activation function
11551199
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
11561200
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions

ggml/src/ggml-cann/acl_tensor.cpp

100755100644
Lines changed: 46 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -51,28 +51,31 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
5151
return ACL_DT_UNDEFINED;
5252
}
5353

54-
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
55-
size_t* nb, int64_t dims, aclFormat format,
56-
size_t offset) {
54+
aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
55+
int64_t * ne,
56+
size_t * nb,
57+
int64_t dims,
58+
aclFormat format,
59+
size_t offset) {
5760
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
5861
// added.
5962
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
6063

6164
if (ne == nullptr) {
6265
for (int i = 0; i < GGML_MAX_DIMS; i++) {
63-
acl_ne[i] = tensor->ne[i];
66+
acl_ne[i] = tensor->ne[i];
6467
// The step size of acl is in elements.
6568
acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
6669
}
6770
} else {
6871
// With bcast
6972
for (int i = 0; i < dims; i++) {
70-
acl_ne[i] = ne[i];
73+
acl_ne[i] = ne[i];
7174
acl_stride[i] = nb[i] / ggml_element_size(tensor);
7275
}
7376
}
7477

75-
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
78+
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
7679
int64_t acl_storage_len = 1;
7780
for (int i = 0; i < final_dims; i++) {
7881
acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
@@ -84,15 +87,13 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
8487
std::reverse(acl_ne, acl_ne + final_dims);
8588
std::reverse(acl_stride, acl_stride + final_dims);
8689

87-
aclTensor* acl_tensor = aclCreateTensor(
88-
acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
89-
elem_offset, format, &acl_storage_len, 1,
90-
tensor->data);
90+
aclTensor * acl_tensor = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
91+
elem_offset, format, &acl_storage_len, 1, tensor->data);
9192

9293
return acl_tensor;
9394
}
9495

95-
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
96+
bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1) {
9697
for (int i = 0; i < GGML_MAX_DIMS; i++) {
9798
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
9899
return true;
@@ -101,15 +102,16 @@ bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
101102
return false;
102103
}
103104

104-
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
105-
const ggml_tensor* src1,
106-
int64_t* bcast_src0_ne,
107-
int64_t* bcast_src1_ne, size_t* bcast_src0_nb,
108-
size_t* bcast_src1_nb) {
105+
int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0,
106+
const ggml_tensor * src1,
107+
int64_t * bcast_src0_ne,
108+
int64_t * bcast_src1_ne,
109+
size_t * bcast_src0_nb,
110+
size_t * bcast_src1_nb) {
109111
GGML_ASSERT(ggml_can_repeat(src1, src0));
110112
int bcast_dim_cnt = 0;
111113
for (int i = 0; i < GGML_MAX_DIMS; i++) {
112-
int64_t nr = src0->ne[i] / src1->ne[i];
114+
int64_t nr = src0->ne[i] / src1->ne[i];
113115
bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
114116
bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
115117
bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
@@ -119,21 +121,26 @@ int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
119121
// Need to add an extra dim.
120122
bcast_src0_ne[bcast_dim_cnt] = nr;
121123
bcast_src1_ne[bcast_dim_cnt] = 1;
122-
bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] *
123-
bcast_src0_ne[bcast_dim_cnt - 1];
124-
bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] *
125-
bcast_src1_ne[bcast_dim_cnt - 1];
124+
bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * bcast_src0_ne[bcast_dim_cnt - 1];
125+
bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] * bcast_src1_ne[bcast_dim_cnt - 1];
126126
bcast_dim_cnt++;
127127
}
128128
}
129129
return bcast_dim_cnt;
130130
}
131131

132-
int64_t ggml_cann_get_mulmat_bcast_shape(
133-
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
134-
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
135-
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
136-
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) {
132+
int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne,
133+
const int64_t * weight_ne,
134+
const int64_t * dst_ne,
135+
const size_t * input_nb,
136+
const size_t * weight_nb,
137+
const size_t * dst_nb,
138+
int64_t * bcast_input_ne,
139+
int64_t * bcast_weight_ne,
140+
int64_t * bcast_dst_ne,
141+
size_t * bcast_input_nb,
142+
size_t * bcast_weight_nb,
143+
size_t * bcast_dst_nb) {
137144
// input and dst shoule in same shape, except first two dims.
138145
GGML_ASSERT(input_ne[2] == dst_ne[2]);
139146
GGML_ASSERT(input_ne[3] == dst_ne[3]);
@@ -148,34 +155,30 @@ int64_t ggml_cann_get_mulmat_bcast_shape(
148155
// Do not use bcast in the first two dimensions because we only support
149156
// the bcast batch dimension. Just copy them.
150157
if (i < 2 || nr == 1) {
151-
bcast_input_ne[bcast_dim_cnt] = input_ne[i];
158+
bcast_input_ne[bcast_dim_cnt] = input_ne[i];
152159
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
153-
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
160+
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
154161

155-
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
162+
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
156163
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
157-
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
164+
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
158165
bcast_dim_cnt++;
159166
} else {
160167
// Need to add an extra dim.
161-
bcast_input_ne[bcast_dim_cnt] = nr;
162-
bcast_dst_ne[bcast_dim_cnt] = nr;
168+
bcast_input_ne[bcast_dim_cnt] = nr;
169+
bcast_dst_ne[bcast_dim_cnt] = nr;
163170
bcast_weight_ne[bcast_dim_cnt] = 1;
164-
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
165-
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
171+
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
172+
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
166173
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
167174
bcast_dim_cnt++;
168175

169-
bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
170-
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
176+
bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
177+
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
171178
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
172-
bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] *
173-
bcast_input_ne[bcast_dim_cnt - 1];
174-
bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] *
175-
bcast_dst_ne[bcast_dim_cnt - 1];
176-
bcast_weight_nb[bcast_dim_cnt] =
177-
bcast_weight_nb[bcast_dim_cnt - 1] *
178-
bcast_weight_ne[bcast_dim_cnt - 1];
179+
bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] * bcast_input_ne[bcast_dim_cnt - 1];
180+
bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] * bcast_dst_ne[bcast_dim_cnt - 1];
181+
bcast_weight_nb[bcast_dim_cnt] = bcast_weight_nb[bcast_dim_cnt - 1] * bcast_weight_ne[bcast_dim_cnt - 1];
179182
bcast_dim_cnt++;
180183
}
181184
}

0 commit comments

Comments
 (0)