read file data from tensor

katsu560 · katsu560 · commit aaa93bc47a1e · 2024-06-16T00:52:48.000+09:00
diff --git a/examples/yolo/yolov3-tiny.cpp b/examples/yolo/yolov3-tiny.cpp
@@ -30,7 +30,7 @@ struct yolo_model {
     int height = 416;
     std::vector<conv2d_layer> conv2d_layers;
     struct ggml_context * ctx;
-    struct gguf_context * ggufctx;
+    struct gguf_context * ctx_gguf;
 };
 
 struct yolo_layer {
@@ -72,7 +72,7 @@ static bool load_model(const std::string & fname, yolo_model & model) {
         fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
         return false;
     }
-    model.ggufctx = ctx;
+    model.ctx_gguf = ctx;
     model.width  = 416;
     model.height = 416;
     model.conv2d_layers.resize(13);
@@ -157,15 +157,19 @@ static bool load_labels(const char * filename, std::vector<std::string> & labels
     return true;
 }
 
-static bool load_labels_kv(const struct gguf_context * ctx, const char * filename, std::vector<std::string> & labels)
+static bool load_labels_gguf(const struct gguf_context * ctx, const char * filename, std::vector<std::string> & labels)
 {
-    int key_id = gguf_find_key(ctx, filename);
+    int key_id = gguf_find_key_array(ctx, "embedded_files", filename);
     if (key_id == -1) {
         return false;
     }
-    const char * data = gguf_get_val_str(ctx, key_id);
-    uint64_t n = gguf_get_val_str_len(ctx, key_id);
-    membuf buf(data, data + n);
+    char *data = NULL;
+    size_t size = 0;
+    int tensor = gguf_find_and_get_tensor(ctx, filename, &data, &size);
+    if (tensor == -1) {
+        return false;
+    }
+    membuf buf(data, data + size);
     std::istream file_in(&buf);
     if (!file_in) {
         return false;
@@ -194,21 +198,26 @@ static bool load_alphabet(std::vector<yolo_image> & alphabet)
     return true;
 }
 
-static bool load_alphabet_kv(const struct gguf_context * ctx, std::vector<yolo_image> & alphabet)
+static bool load_alphabet_gguf(const struct gguf_context * ctx, std::vector<yolo_image> & alphabet)
 {
     alphabet.resize(8 * 128);
     for (int j = 0; j < 8; j++) {
         for (int i = 32; i < 127; i++) {
             char fname[256];
-            sprintf(fname, "/data/labels/%d_%d.png", i, j);
-            int key_id = gguf_find_key(ctx, fname);
+            sprintf(fname, "data/labels/%d_%d.png", i, j);
+            int key_id = gguf_find_key_array(ctx, "embedded_files", fname);
             if (key_id == -1) {
-                fprintf(stderr, "Cannot find '%s'\n", fname);
+                fprintf(stderr, "Cannot find '%s' in embedded_files\n", fname);
+                return false;
+            }
+            char *data = NULL;
+            size_t size = 0;
+            int tensor = gguf_find_and_get_tensor(ctx, fname, &data, &size);
+            if (tensor == -1) {
+                fprintf(stderr, "Cannot find '%s' in tensor\n", fname);
                 return false;
             }
-            const char * data = gguf_get_val_str(ctx, key_id);
-            uint64_t n = gguf_get_val_str_len(ctx, key_id);
-            if (!load_image_from_memory(data, n, alphabet[j*128 + i])) {
+            if (!load_image_from_memory(data, size, alphabet[j*128 + i])) {
                 fprintf(stderr, "Cannot load '%s'\n", fname);
                 return false;
             }
@@ -499,7 +508,7 @@ void detect(yolo_image & img, const yolo_model & model, float thresh, const std:
     print_shape(18, result);
     result = ggml_upscale(ctx0, result, 2);
     print_shape(19, result);
-    result = ggml_concat(ctx0, result, layer_8);
+    result = ggml_concat(ctx0, result, layer_8, 2);
     print_shape(20, result);
     result = apply_conv2d(ctx0, result, model.conv2d_layers[11]);
     print_shape(21, result);
@@ -590,15 +599,15 @@ int main(int argc, char *argv[])
         return 1;
     }
     std::vector<std::string> labels;
-    if (!load_labels_kv(model.ggufctx, "/data/coco.names", labels)) {
-        fprintf(stderr, "%s: failed to load labels from '/data/coco.names' in model\n", __func__);
+    if (!load_labels_gguf(model.ctx_gguf, "data/coco.names", labels)) {
+        fprintf(stderr, "%s: failed to load labels from 'data/coco.names' in model\n", __func__);
         if (!load_labels("data/coco.names", labels)) {
             fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
             return 1;
         }
     }
     std::vector<yolo_image> alphabet;
-    if (!load_alphabet_kv(model.ggufctx, alphabet)) {
+    if (!load_alphabet_gguf(model.ctx_gguf, alphabet)) {
         fprintf(stderr, "%s: failed to load alphabet from model\n", __func__);
         if (!load_alphabet(alphabet)) {
             fprintf(stderr, "%s: failed to load alphabet\n", __func__);
diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
@@ -481,9 +481,7 @@ extern "C" {
         GGML_OP_ARGSORT,
         GGML_OP_LEAKY_RELU,
 
-        GGML_OP_FLASH_ATTN,
         GGML_OP_FLASH_ATTN_EXT,
-        GGML_OP_FLASH_FF,
         GGML_OP_FLASH_ATTN_BACK,
         GGML_OP_SSM_CONV,
         GGML_OP_SSM_SCAN,
@@ -565,7 +563,8 @@ extern "C" {
     // n-dimensional tensor
     struct ggml_tensor {
         enum ggml_type         type;
-        enum ggml_backend_type backend;
+
+        GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
 
         struct ggml_backend_buffer * buffer;
 
@@ -1008,12 +1007,13 @@ extern "C" {
             struct ggml_tensor  * a,
             struct ggml_tensor  * b);
 
-    // concat a and b on dim 2
+    // concat a and b along dim
     // used in stable-diffusion
     GGML_API struct ggml_tensor * ggml_concat(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+            struct ggml_tensor  * b,
+            int                   dim);
 
     GGML_API struct ggml_tensor * ggml_abs(
             struct ggml_context * ctx,
@@ -1459,11 +1459,12 @@ extern "C" {
             struct ggml_tensor  * b);
 
     // rotary position embedding
-    // if mode & 1 == 1, skip n_past elements (DEPRECATED)
+    // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
     // if mode & 2 == 1, GPT-NeoX style
     // if mode & 4 == 1, ChatGLM style
     //
     // b is an int32 vector with size a->ne[2], it contains the positions
+    // c is freq factors (e.g. phi3-128k), (optional)
     GGML_API struct ggml_tensor * ggml_rope(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
@@ -1482,10 +1483,11 @@ extern "C" {
             int                   n_ctx);
 
     // custom RoPE
-    GGML_API struct ggml_tensor * ggml_rope_custom(
+    GGML_API struct ggml_tensor * ggml_rope_ext(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             struct ggml_tensor  * b,
+            struct ggml_tensor  * c,
             int                   n_dims,
             int                   mode,
             int                   n_ctx,
@@ -1498,10 +1500,11 @@ extern "C" {
             float                 beta_slow);
 
     // in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
+    GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             struct ggml_tensor  * b,
+            struct ggml_tensor  * c,
             int                   n_dims,
             int                   mode,
             int                   n_ctx,
@@ -1513,25 +1516,57 @@ extern "C" {
             float                 beta_fast,
             float                 beta_slow);
 
-    // compute correction dims for YaRN RoPE scaling
-    GGML_CALL void ggml_rope_yarn_corr_dims(
-        int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * b,
+            int                   n_dims,
+            int                   mode,
+            int                   n_ctx,
+            int                   n_orig_ctx,
+            float                 freq_base,
+            float                 freq_scale,
+            float                 ext_factor,
+            float                 attn_factor,
+            float                 beta_fast,
+            float                 beta_slow),
+        "use ggml_rope_ext instead");
 
-    // xPos RoPE, in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             struct ggml_tensor  * b,
             int                   n_dims,
-            float                 base,
-            bool                  down);
+            int                   mode,
+            int                   n_ctx,
+            int                   n_orig_ctx,
+            float                 freq_base,
+            float                 freq_scale,
+            float                 ext_factor,
+            float                 attn_factor,
+            float                 beta_fast,
+            float                 beta_slow),
+        "use ggml_rope_ext_inplace instead");
+
+    struct ggml_tensor * ggml_rope_xpos_inplace(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * b,
+        int                   n_dims,
+        float                 base,
+        bool                  down);
+
+    // compute correction dims for YaRN RoPE scaling
+    GGML_CALL void ggml_rope_yarn_corr_dims(
+        int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
 
     // rotary position embedding backward, i.e compute dx from dy
     // a - dy
     GGML_API struct ggml_tensor * ggml_rope_back(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             struct ggml_tensor  * b,
+            struct ggml_tensor  * c,
             int                   n_dims,
             int                   mode,
             int                   n_ctx,
@@ -1733,13 +1768,6 @@ extern "C" {
             struct ggml_tensor  * a,
             int                   k);
 
-    GGML_API struct ggml_tensor * ggml_flash_attn(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * q,
-            struct ggml_tensor  * k,
-            struct ggml_tensor  * v,
-            bool                  masked);
-
 #define GGML_KQ_MASK_PAD 32
 
     // q:    [n_embd, n_batch,     n_head,    1]
@@ -1760,6 +1788,7 @@ extern "C" {
             struct ggml_tensor * a,
             enum ggml_prec       prec);
 
+    // TODO: needs to be adapted to ggml_flash_attn_ext
     GGML_API struct ggml_tensor * ggml_flash_attn_back(
            struct ggml_context * ctx,
            struct ggml_tensor  * q,
@@ -1768,14 +1797,6 @@ extern "C" {
            struct ggml_tensor  * d,
            bool                  masked);
 
-    GGML_API struct ggml_tensor * ggml_flash_ff(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * b0,
-            struct ggml_tensor  * b1,
-            struct ggml_tensor  * c0,
-            struct ggml_tensor  * c1);
-
     GGML_API struct ggml_tensor * ggml_ssm_conv(
             struct ggml_context * ctx,
             struct ggml_tensor  * s,
@@ -2298,35 +2319,37 @@ extern "C" {
 
     GGML_API int          gguf_get_n_kv(const struct gguf_context * ctx);
     GGML_API int          gguf_find_key(const struct gguf_context * ctx, const char * key);
+    GGML_API int          gguf_find_key_array(const struct gguf_context * ctx, const char * key, const char * val);
     GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
 
     GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
     GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
 
     // will abort if the wrong type is used for the key
-    GGML_API uint8_t          gguf_get_val_u8     (const struct gguf_context * ctx, int key_id);
-    GGML_API int8_t           gguf_get_val_i8     (const struct gguf_context * ctx, int key_id);
-    GGML_API uint16_t         gguf_get_val_u16    (const struct gguf_context * ctx, int key_id);
-    GGML_API int16_t          gguf_get_val_i16    (const struct gguf_context * ctx, int key_id);
-    GGML_API uint32_t         gguf_get_val_u32    (const struct gguf_context * ctx, int key_id);
-    GGML_API int32_t          gguf_get_val_i32    (const struct gguf_context * ctx, int key_id);
-    GGML_API float            gguf_get_val_f32    (const struct gguf_context * ctx, int key_id);
-    GGML_API uint64_t         gguf_get_val_u64    (const struct gguf_context * ctx, int key_id);
-    GGML_API int64_t          gguf_get_val_i64    (const struct gguf_context * ctx, int key_id);
-    GGML_API double           gguf_get_val_f64    (const struct gguf_context * ctx, int key_id);
-    GGML_API bool             gguf_get_val_bool   (const struct gguf_context * ctx, int key_id);
-    GGML_API const char *     gguf_get_val_str    (const struct gguf_context * ctx, int key_id);
-    GGML_API uint64_t         gguf_get_val_str_len(const struct gguf_context * ctx, int key_id);
-    GGML_API const void *     gguf_get_val_data   (const struct gguf_context * ctx, int key_id);
-    GGML_API int              gguf_get_arr_n      (const struct gguf_context * ctx, int key_id);
-    GGML_API const void *     gguf_get_arr_data   (const struct gguf_context * ctx, int key_id);
-    GGML_API const char *     gguf_get_arr_str    (const struct gguf_context * ctx, int key_id, int i);
-
-    GGML_API int            gguf_get_n_tensors    (const struct gguf_context * ctx);
-    GGML_API int            gguf_find_tensor      (const struct gguf_context * ctx, const char * name);
-    GGML_API size_t         gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
-    GGML_API char *         gguf_get_tensor_name  (const struct gguf_context * ctx, int i);
-    GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int i);
+    GGML_API uint8_t      gguf_get_val_u8  (const struct gguf_context * ctx, int key_id);
+    GGML_API int8_t       gguf_get_val_i8  (const struct gguf_context * ctx, int key_id);
+    GGML_API uint16_t     gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
+    GGML_API int16_t      gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
+    GGML_API uint32_t     gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
+    GGML_API int32_t      gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
+    GGML_API float        gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
+    GGML_API uint64_t     gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
+    GGML_API int64_t      gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
+    GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
+    GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
+    GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
+    GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
+    GGML_API int          gguf_get_arr_n   (const struct gguf_context * ctx, int key_id);
+    GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
+    GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
+
+    GGML_API int            gguf_get_n_tensors      (const struct gguf_context * ctx);
+    GGML_API int            gguf_find_tensor        (const struct gguf_context * ctx, const char * name);
+    GGML_API size_t         gguf_get_tensor_offset  (const struct gguf_context * ctx, int i);
+    GGML_API char *         gguf_get_tensor_name    (const struct gguf_context * ctx, int i);
+    GGML_API enum ggml_type gguf_get_tensor_type    (const struct gguf_context * ctx, int i);
+    GGML_API size_t         gguf_get_tensor_size    (const struct gguf_context * ctx, int i);
+    GGML_API int            gguf_find_and_get_tensor(const struct gguf_context * ctx, const char * name, char ** data, size_t * size);
 
     // removes key if it exists
     GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
@@ -2344,7 +2367,6 @@ extern "C" {
     GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double   val);
     GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool     val);
     GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
-    GGML_API void gguf_set_val_data(struct gguf_context * ctx, const char * key, const char * val, int n);
     GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
     GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
 
@@ -2391,8 +2413,10 @@ extern "C" {
     GGML_API int ggml_cpu_has_avx512     (void);
     GGML_API int ggml_cpu_has_avx512_vbmi(void);
     GGML_API int ggml_cpu_has_avx512_vnni(void);
+    GGML_API int ggml_cpu_has_avx512_bf16(void);
     GGML_API int ggml_cpu_has_fma        (void);
     GGML_API int ggml_cpu_has_neon       (void);
+    GGML_API int ggml_cpu_has_sve        (void);
     GGML_API int ggml_cpu_has_arm_fma    (void);
     GGML_API int ggml_cpu_has_metal      (void);
     GGML_API int ggml_cpu_has_f16c       (void);
diff --git a/src/ggml.c b/src/ggml.c