@@ -481,9 +481,7 @@ extern "C" {
481481 GGML_OP_ARGSORT ,
482482 GGML_OP_LEAKY_RELU ,
483483
484- GGML_OP_FLASH_ATTN ,
485484 GGML_OP_FLASH_ATTN_EXT ,
486- GGML_OP_FLASH_FF ,
487485 GGML_OP_FLASH_ATTN_BACK ,
488486 GGML_OP_SSM_CONV ,
489487 GGML_OP_SSM_SCAN ,
@@ -565,7 +563,8 @@ extern "C" {
565563 // n-dimensional tensor
566564 struct ggml_tensor {
567565 enum ggml_type type ;
568- enum ggml_backend_type backend ;
566+
567+ GGML_DEPRECATED (enum ggml_backend_type backend , "use the buffer type to find the storage location of the tensor" );
569568
570569 struct ggml_backend_buffer * buffer ;
571570
@@ -1008,12 +1007,13 @@ extern "C" {
10081007 struct ggml_tensor * a ,
10091008 struct ggml_tensor * b );
10101009
1011- // concat a and b on dim 2
1010+ // concat a and b along dim
10121011 // used in stable-diffusion
10131012 GGML_API struct ggml_tensor * ggml_concat (
10141013 struct ggml_context * ctx ,
10151014 struct ggml_tensor * a ,
1016- struct ggml_tensor * b );
1015+ struct ggml_tensor * b ,
1016+ int dim );
10171017
10181018 GGML_API struct ggml_tensor * ggml_abs (
10191019 struct ggml_context * ctx ,
@@ -1459,11 +1459,12 @@ extern "C" {
14591459 struct ggml_tensor * b );
14601460
14611461 // rotary position embedding
1462- // if mode & 1 == 1, skip n_past elements (DEPRECATED )
1462+ // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED )
14631463 // if mode & 2 == 1, GPT-NeoX style
14641464 // if mode & 4 == 1, ChatGLM style
14651465 //
14661466 // b is an int32 vector with size a->ne[2], it contains the positions
1467+ // c is freq factors (e.g. phi3-128k), (optional)
14671468 GGML_API struct ggml_tensor * ggml_rope (
14681469 struct ggml_context * ctx ,
14691470 struct ggml_tensor * a ,
@@ -1482,10 +1483,11 @@ extern "C" {
14821483 int n_ctx );
14831484
14841485 // custom RoPE
1485- GGML_API struct ggml_tensor * ggml_rope_custom (
1486+ GGML_API struct ggml_tensor * ggml_rope_ext (
14861487 struct ggml_context * ctx ,
14871488 struct ggml_tensor * a ,
14881489 struct ggml_tensor * b ,
1490+ struct ggml_tensor * c ,
14891491 int n_dims ,
14901492 int mode ,
14911493 int n_ctx ,
@@ -1498,10 +1500,11 @@ extern "C" {
14981500 float beta_slow );
14991501
15001502 // in-place, returns view(a)
1501- GGML_API struct ggml_tensor * ggml_rope_custom_inplace (
1503+ GGML_API struct ggml_tensor * ggml_rope_ext_inplace (
15021504 struct ggml_context * ctx ,
15031505 struct ggml_tensor * a ,
15041506 struct ggml_tensor * b ,
1507+ struct ggml_tensor * c ,
15051508 int n_dims ,
15061509 int mode ,
15071510 int n_ctx ,
@@ -1513,25 +1516,57 @@ extern "C" {
15131516 float beta_fast ,
15141517 float beta_slow );
15151518
1516- // compute correction dims for YaRN RoPE scaling
1517- GGML_CALL void ggml_rope_yarn_corr_dims (
1518- int n_dims , int n_orig_ctx , float freq_base , float beta_fast , float beta_slow , float dims [2 ]);
1519+ GGML_DEPRECATED (GGML_API struct ggml_tensor * ggml_rope_custom (
1520+ struct ggml_context * ctx ,
1521+ struct ggml_tensor * a ,
1522+ struct ggml_tensor * b ,
1523+ int n_dims ,
1524+ int mode ,
1525+ int n_ctx ,
1526+ int n_orig_ctx ,
1527+ float freq_base ,
1528+ float freq_scale ,
1529+ float ext_factor ,
1530+ float attn_factor ,
1531+ float beta_fast ,
1532+ float beta_slow ),
1533+ "use ggml_rope_ext instead ");
15191534
1520- // xPos RoPE, in-place, returns view(a)
1521- GGML_API struct ggml_tensor * ggml_rope_xpos_inplace (
1535+ GGML_DEPRECATED (GGML_API struct ggml_tensor * ggml_rope_custom_inplace (
15221536 struct ggml_context * ctx ,
15231537 struct ggml_tensor * a ,
15241538 struct ggml_tensor * b ,
15251539 int n_dims ,
1526- float base ,
1527- bool down );
1540+ int mode ,
1541+ int n_ctx ,
1542+ int n_orig_ctx ,
1543+ float freq_base ,
1544+ float freq_scale ,
1545+ float ext_factor ,
1546+ float attn_factor ,
1547+ float beta_fast ,
1548+ float beta_slow ),
1549+ "use ggml_rope_ext_inplace instead ");
1550+
1551+ struct ggml_tensor * ggml_rope_xpos_inplace (
1552+ struct ggml_context * ctx ,
1553+ struct ggml_tensor * a ,
1554+ struct ggml_tensor * b ,
1555+ int n_dims ,
1556+ float base ,
1557+ bool down );
1558+
1559+ // compute correction dims for YaRN RoPE scaling
1560+ GGML_CALL void ggml_rope_yarn_corr_dims (
1561+ int n_dims , int n_orig_ctx , float freq_base , float beta_fast , float beta_slow , float dims [2 ]);
15281562
15291563 // rotary position embedding backward, i.e compute dx from dy
15301564 // a - dy
15311565 GGML_API struct ggml_tensor * ggml_rope_back (
15321566 struct ggml_context * ctx ,
15331567 struct ggml_tensor * a ,
15341568 struct ggml_tensor * b ,
1569+ struct ggml_tensor * c ,
15351570 int n_dims ,
15361571 int mode ,
15371572 int n_ctx ,
@@ -1733,13 +1768,6 @@ extern "C" {
17331768 struct ggml_tensor * a ,
17341769 int k );
17351770
1736- GGML_API struct ggml_tensor * ggml_flash_attn (
1737- struct ggml_context * ctx ,
1738- struct ggml_tensor * q ,
1739- struct ggml_tensor * k ,
1740- struct ggml_tensor * v ,
1741- bool masked );
1742-
17431771#define GGML_KQ_MASK_PAD 32
17441772
17451773 // q: [n_embd, n_batch, n_head, 1]
@@ -1760,6 +1788,7 @@ extern "C" {
17601788 struct ggml_tensor * a ,
17611789 enum ggml_prec prec );
17621790
1791+ // TODO: needs to be adapted to ggml_flash_attn_ext
17631792 GGML_API struct ggml_tensor * ggml_flash_attn_back (
17641793 struct ggml_context * ctx ,
17651794 struct ggml_tensor * q ,
@@ -1768,14 +1797,6 @@ extern "C" {
17681797 struct ggml_tensor * d ,
17691798 bool masked );
17701799
1771- GGML_API struct ggml_tensor * ggml_flash_ff (
1772- struct ggml_context * ctx ,
1773- struct ggml_tensor * a ,
1774- struct ggml_tensor * b0 ,
1775- struct ggml_tensor * b1 ,
1776- struct ggml_tensor * c0 ,
1777- struct ggml_tensor * c1 );
1778-
17791800 GGML_API struct ggml_tensor * ggml_ssm_conv (
17801801 struct ggml_context * ctx ,
17811802 struct ggml_tensor * s ,
@@ -2298,35 +2319,37 @@ extern "C" {
22982319
22992320 GGML_API int gguf_get_n_kv (const struct gguf_context * ctx );
23002321 GGML_API int gguf_find_key (const struct gguf_context * ctx , const char * key );
2322+ GGML_API int gguf_find_key_array (const struct gguf_context * ctx , const char * key , const char * val );
23012323 GGML_API const char * gguf_get_key (const struct gguf_context * ctx , int key_id );
23022324
23032325 GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx , int key_id );
23042326 GGML_API enum gguf_type gguf_get_arr_type (const struct gguf_context * ctx , int key_id );
23052327
23062328 // will abort if the wrong type is used for the key
2307- GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx , int key_id );
2308- GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx , int key_id );
2309- GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx , int key_id );
2310- GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx , int key_id );
2311- GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx , int key_id );
2312- GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx , int key_id );
2313- GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx , int key_id );
2314- GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx , int key_id );
2315- GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx , int key_id );
2316- GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx , int key_id );
2317- GGML_API bool gguf_get_val_bool (const struct gguf_context * ctx , int key_id );
2318- GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx , int key_id );
2319- GGML_API uint64_t gguf_get_val_str_len (const struct gguf_context * ctx , int key_id );
2320- GGML_API const void * gguf_get_val_data (const struct gguf_context * ctx , int key_id );
2321- GGML_API int gguf_get_arr_n (const struct gguf_context * ctx , int key_id );
2322- GGML_API const void * gguf_get_arr_data (const struct gguf_context * ctx , int key_id );
2323- GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx , int key_id , int i );
2324-
2325- GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx );
2326- GGML_API int gguf_find_tensor (const struct gguf_context * ctx , const char * name );
2327- GGML_API size_t gguf_get_tensor_offset (const struct gguf_context * ctx , int i );
2328- GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx , int i );
2329- GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx , int i );
2329+ GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx , int key_id );
2330+ GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx , int key_id );
2331+ GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx , int key_id );
2332+ GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx , int key_id );
2333+ GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx , int key_id );
2334+ GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx , int key_id );
2335+ GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx , int key_id );
2336+ GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx , int key_id );
2337+ GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx , int key_id );
2338+ GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx , int key_id );
2339+ GGML_API bool gguf_get_val_bool (const struct gguf_context * ctx , int key_id );
2340+ GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx , int key_id );
2341+ GGML_API const void * gguf_get_val_data (const struct gguf_context * ctx , int key_id );
2342+ GGML_API int gguf_get_arr_n (const struct gguf_context * ctx , int key_id );
2343+ GGML_API const void * gguf_get_arr_data (const struct gguf_context * ctx , int key_id );
2344+ GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx , int key_id , int i );
2345+
2346+ GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx );
2347+ GGML_API int gguf_find_tensor (const struct gguf_context * ctx , const char * name );
2348+ GGML_API size_t gguf_get_tensor_offset (const struct gguf_context * ctx , int i );
2349+ GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx , int i );
2350+ GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx , int i );
2351+ GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx , int i );
2352+ GGML_API int gguf_find_and_get_tensor (const struct gguf_context * ctx , const char * name , char * * data , size_t * size );
23302353
23312354 // removes key if it exists
23322355 GGML_API void gguf_remove_key (struct gguf_context * ctx , const char * key );
@@ -2344,7 +2367,6 @@ extern "C" {
23442367 GGML_API void gguf_set_val_f64 (struct gguf_context * ctx , const char * key , double val );
23452368 GGML_API void gguf_set_val_bool (struct gguf_context * ctx , const char * key , bool val );
23462369 GGML_API void gguf_set_val_str (struct gguf_context * ctx , const char * key , const char * val );
2347- GGML_API void gguf_set_val_data (struct gguf_context * ctx , const char * key , const char * val , int n );
23482370 GGML_API void gguf_set_arr_data (struct gguf_context * ctx , const char * key , enum gguf_type type , const void * data , int n );
23492371 GGML_API void gguf_set_arr_str (struct gguf_context * ctx , const char * key , const char * * data , int n );
23502372
@@ -2391,8 +2413,10 @@ extern "C" {
23912413 GGML_API int ggml_cpu_has_avx512 (void );
23922414 GGML_API int ggml_cpu_has_avx512_vbmi (void );
23932415 GGML_API int ggml_cpu_has_avx512_vnni (void );
2416+ GGML_API int ggml_cpu_has_avx512_bf16 (void );
23942417 GGML_API int ggml_cpu_has_fma (void );
23952418 GGML_API int ggml_cpu_has_neon (void );
2419+ GGML_API int ggml_cpu_has_sve (void );
23962420 GGML_API int ggml_cpu_has_arm_fma (void );
23972421 GGML_API int ggml_cpu_has_metal (void );
23982422 GGML_API int ggml_cpu_has_f16c (void );
0 commit comments