load files from model

katsu560 · katsu560 · commit 2c3603e402e6 · 2024-06-22T12:30:12.000+09:00
diff --git a/examples/yolo/yolo-image.cpp b/examples/yolo/yolo-image.cpp
@@ -88,6 +88,31 @@ bool load_image(const char *fname, yolo_image & img)
     return true;
 }
 
+bool load_image_from_memory(const char *buffer, int len, yolo_image & img)
+{
+    int w, h, c;
+    uint8_t * data = stbi_load_from_memory((uint8_t *)buffer, len, &w, &h, &c, 3);
+    if (!data) {
+        return false;
+    }
+    c = 3;
+    img.w = w;
+    img.h = h;
+    img.c = c;
+    img.data.resize(w*h*c);
+    for (int k = 0; k < c; ++k){
+        for (int j = 0; j < h; ++j){
+            for (int i = 0; i < w; ++i){
+                int dst_index = i + w*j + w*h*k;
+                int src_index = k + c*i + c*w*j;
+                img.data[dst_index] = (float)data[src_index]/255.;
+            }
+        }
+    }
+    stbi_image_free(data);
+    return true;
+}
+
 static yolo_image resize_image(const yolo_image & im, int w, int h)
 {
     yolo_image resized(w, h, im.c);
@@ -207,4 +232,4 @@ void draw_label(yolo_image & im, int row, int col, const yolo_image & label, con
             }
         }
     }
-}
+}
diff --git a/examples/yolo/yolo-image.h b/examples/yolo/yolo-image.h
@@ -32,6 +32,7 @@ struct yolo_image {
 };
 
 bool load_image(const char *fname, yolo_image & img);
+bool load_image_from_memory(const char *buffer, int len, yolo_image & img);
 void draw_box_width(yolo_image & a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
 yolo_image letterbox_image(const yolo_image & im, int w, int h);
 bool save_image(const yolo_image & im, const char *name, int quality);
diff --git a/examples/yolo/yolov3-tiny.cpp b/examples/yolo/yolov3-tiny.cpp
@@ -30,6 +30,7 @@ struct yolo_model {
     int height = 416;
     std::vector<conv2d_layer> conv2d_layers;
     struct ggml_context * ctx;
+    struct gguf_context * ctx_gguf;
 };
 
 struct yolo_layer {
@@ -71,6 +72,7 @@ static bool load_model(const std::string & fname, yolo_model & model) {
         fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
         return false;
     }
+    model.ctx_gguf = ctx;
     model.width  = 416;
     model.height = 416;
     model.conv2d_layers.resize(13);
@@ -100,6 +102,47 @@ static bool load_model(const std::string & fname, yolo_model & model) {
     return true;
 }
 
+// istream from memory
+#include <streambuf>
+#include <istream>
+
+struct membuf : std::streambuf {
+    membuf(const char * begin, const char * end) {
+        char * b(const_cast<char *>(begin));
+        char * e(const_cast<char *>(end));
+        this->begin = b;
+        this->end = e;
+        this->setg(b, b, e);
+    }
+
+    membuf(const char * base, size_t size) {
+        char * b(const_cast<char *>(begin));
+        this->begin = b;
+        this->end = b + size;
+        this->setg(b, b, end);
+    }
+    
+    virtual pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which = std::ios_base::in) override {
+        if(dir == std::ios_base::cur) {
+            gbump(off);
+        } else if(dir == std::ios_base::end) {
+            setg(begin, end + off, end);
+        } else if(dir == std::ios_base::beg) {
+            setg(begin, begin + off, end);
+        }
+
+        return gptr() - eback();
+    }
+    
+    virtual pos_type seekpos(std::streampos pos, std::ios_base::openmode mode) override {
+        return seekoff(pos - pos_type(off_type(0)), std::ios_base::beg, mode);
+    }
+    
+    char * begin;
+    char * end;
+};
+
+
 static bool load_labels(const char * filename, std::vector<std::string> & labels)
 {
     std::ifstream file_in(filename);
@@ -114,6 +157,32 @@ static bool load_labels(const char * filename, std::vector<std::string> & labels
     return true;
 }
 
+static bool load_labels_gguf(const struct gguf_context * ctx, const char * filename, std::vector<std::string> & labels)
+{
+    int key_id = gguf_find_key_array(ctx, "embedded_files", filename);
+    if (key_id == -1) {
+        return false;
+    }
+    int tensor = gguf_find_tensor(ctx, filename);
+    if (tensor == -1) {
+        return false;
+    }
+    const size_t offset = gguf_get_tensor_offset(ctx, tensor);
+    const size_t len = gguf_get_tensor_size(ctx, tensor);
+    const char * data = (char *)gguf_get_data(ctx);
+    membuf buf(data + offset, data + offset + len);
+    std::istream file_in(&buf);
+    if (!file_in) {
+        return false;
+    }
+    std::string line;
+    while (std::getline(file_in, line)) {
+        labels.push_back(line);
+    }
+    GGML_ASSERT(labels.size() == 80);
+    return true;
+}
+
 static bool load_alphabet(std::vector<yolo_image> & alphabet)
 {
     alphabet.resize(8 * 128);
@@ -130,6 +199,35 @@ static bool load_alphabet(std::vector<yolo_image> & alphabet)
     return true;
 }
 
+static bool load_alphabet_gguf(const struct gguf_context * ctx, std::vector<yolo_image> & alphabet)
+{
+    alphabet.resize(8 * 128);
+    for (int j = 0; j < 8; j++) {
+        for (int i = 32; i < 127; i++) {
+            char fname[256];
+            sprintf(fname, "data/labels/%d_%d.png", i, j);
+            int key_id = gguf_find_key_array(ctx, "embedded_files", fname);
+            if (key_id == -1) {
+                fprintf(stderr, "Cannot find '%s' in embedded_files\n", fname);
+                return false;
+            }
+            int tensor = gguf_find_tensor(ctx, fname);
+            if (tensor == -1) {
+                fprintf(stderr, "Cannot find '%s' in tensor\n", fname);
+                return false;
+            }
+            const size_t offset = gguf_get_tensor_offset(ctx, tensor);
+            const size_t len = gguf_get_tensor_size(ctx, tensor);
+            const char * data = (char *)gguf_get_data(ctx);
+            if (!load_image_from_memory(data + offset, len, alphabet[j*128 + i])) {
+                fprintf(stderr, "Cannot load '%s'\n", fname);
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 static ggml_tensor * apply_conv2d(ggml_context * ctx, ggml_tensor * input, const conv2d_layer & layer)
 {
     struct ggml_tensor * result = ggml_conv_2d(ctx, layer.weights, input, 1, 1, layer.padding, layer.padding, 1, 1);
@@ -503,14 +601,20 @@ int main(int argc, char *argv[])
         return 1;
     }
     std::vector<std::string> labels;
-    if (!load_labels("data/coco.names", labels)) {
-        fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
-        return 1;
+    if (!load_labels_gguf(model.ctx_gguf, "data/coco.names", labels)) {
+        fprintf(stderr, "%s: failed to load labels from 'data/coco.names' in model\n", __func__);
+        if (!load_labels("data/coco.names", labels)) {
+            fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
+            return 1;
+        }
     }
     std::vector<yolo_image> alphabet;
-    if (!load_alphabet(alphabet)) {
-        fprintf(stderr, "%s: failed to load alphabet\n", __func__);
-        return 1;
+    if (!load_alphabet_gguf(model.ctx_gguf, alphabet)) {
+        fprintf(stderr, "%s: failed to load alphabet from model\n", __func__);
+        if (!load_alphabet(alphabet)) {
+            fprintf(stderr, "%s: failed to load alphabet\n", __func__);
+            return 1;
+        }
     }
     const int64_t t_start_ms = ggml_time_ms();
     detect(img, model, params.thresh, labels, alphabet);
diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
@@ -2305,6 +2305,7 @@ extern "C" {
 
     GGML_API int          gguf_get_n_kv(const struct gguf_context * ctx);
     GGML_API int          gguf_find_key(const struct gguf_context * ctx, const char * key);
+    GGML_API int          gguf_find_key_array(const struct gguf_context * ctx, const char * key, const char * val);
     GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
 
     GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
@@ -2333,6 +2334,7 @@ extern "C" {
     GGML_API size_t         gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
     GGML_API char *         gguf_get_tensor_name  (const struct gguf_context * ctx, int i);
     GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int i);
+    GGML_API size_t         gguf_get_tensor_size  (const struct gguf_context * ctx, int i);
 
     // removes key if it exists
     GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
diff --git a/src/ggml.c b/src/ggml.c
@@ -21562,6 +21562,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
             ok = ok && gguf_fread_el (file, &info->type,   sizeof(info->type),    &offset);
             ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset),  &offset);
 
+            // set tensor size
+            size_t size = 1;
+            for (uint32_t j = 0; j < info->n_dims; ++j) {
+                size *= info->ne[j];
+            }
+            info->size = size;
+
             // TODO: return an error instead of crashing with GGML_ASSERT
             gguf_tensor_info_sanitize(info);
 
@@ -21784,6 +21791,37 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
     return keyfound;
 }
 
+int gguf_find_key_array(const struct gguf_context * ctx, const char * key, const char * val) {
+    // return -1 if key not found
+    int keyfound = -1;
+    int key_id = -1;
+
+    const int n_kv = gguf_get_n_kv(ctx);
+
+    for (int i = 0; i < n_kv; ++i) {
+        if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
+            key_id = i;
+            break;
+        }
+    }
+
+    if (key_id != -1) {
+        if (ctx->kv[key_id].type == GGUF_TYPE_ARRAY) {
+            const int n = gguf_get_arr_n(ctx, key_id); 
+            struct gguf_kv * kv = &ctx->kv[key_id];
+
+            for (int i = 0; i < n; ++i) {
+                struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
+                if (strcmp(val, str->data) == 0) {
+                    keyfound = i;
+                }
+            }
+        }
+    }
+
+    return keyfound;
+}
+
 const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
     GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
     return ctx->kv[key_id].key.data;
@@ -21920,17 +21958,25 @@ int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
 }
 
 size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
+    GGML_ASSERT(i >= 0 && i < gguf_get_n_tensors(ctx));
     return ctx->infos[i].offset;
 }
 
 char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
+    GGML_ASSERT(i >= 0 && i < gguf_get_n_tensors(ctx));
     return ctx->infos[i].name.data;
 }
 
 enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int i) {
+    GGML_ASSERT(i >= 0 && i < gguf_get_n_tensors(ctx));
     return ctx->infos[i].type;
 }
 
+size_t gguf_get_tensor_size(const struct gguf_context * ctx, int i) {
+    GGML_ASSERT(i >= 0 && i < gguf_get_n_tensors(ctx));
+    return ctx->infos[i].size;
+}
+
 // returns the index
 static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
     const int idx = gguf_find_key(ctx, key);
@@ -22242,7 +22288,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
         gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
 
         switch (kv->type) {
-            case GGUF_TYPE_UINT8:   gguf_bwrite_el( buf, &kv->value.uint8,   sizeof(kv->value.uint8)  ); break;
+            case GGUF_TYPE_UINT8:   gguf_bwrite_el (buf, &kv->value.uint8,   sizeof(kv->value.uint8)  ); break;
             case GGUF_TYPE_INT8:    gguf_bwrite_el (buf, &kv->value.int8,    sizeof(kv->value.int8)   ); break;
             case GGUF_TYPE_UINT16:  gguf_bwrite_el (buf, &kv->value.uint16,  sizeof(kv->value.uint16) ); break;
             case GGUF_TYPE_INT16:   gguf_bwrite_el (buf, &kv->value.int16,   sizeof(kv->value.int16)  ); break;