refine helpers

ngxson · ngxson · commit 6ed09b70dcca · 2025-04-10T18:20:28.000+02:00
diff --git a/examples/llava/gemma3-cli.cpp b/examples/llava/gemma3-cli.cpp
@@ -173,7 +173,7 @@ static int eval_message(gemma3_context & ctx, common_chat_msg & msg, std::vector
 
     for (auto & fname : images_fname) {
         mtmd_bitmap bitmap;
-        if (mtmd_bitmap_init_from_file(fname.c_str(), bitmap)) {
+        if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) {
             LOG_ERR("Unable to load image %s\n", fname.c_str());
             return 2; // image not found
         }
diff --git a/examples/llava/mtmd.cpp b/examples/llava/mtmd.cpp
@@ -68,19 +68,6 @@ void mtmd_free(mtmd_context * ctx) {
     }
 }
 
-int32_t mtmd_bitmap_init_from_file(const char * fname, mtmd_bitmap & output) {
-    clip_image_u8_ptr img_u8(clip_image_u8_init());
-    bool ok = clip_image_load_from_file(fname, img_u8.get());
-    if (!ok) {
-        LOG_ERR("Unable to load image %s\n", fname);
-        return 1;
-    }
-    unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny);
-    output.data.resize(output.nx * output.ny * 3);
-    std::memcpy(output.data.data(), data, output.nx * output.ny * 3);
-    return 0;
-}
-
 // copied from common_tokenize
 static std::vector<llama_token> mtmd_tokenize_text_internal(
     const struct llama_vocab * vocab,
@@ -326,3 +313,29 @@ int32_t mtmd_helper_eval(mtmd_context * ctx,
     llama_batch_free(text_batch);
     return 0;
 }
+
+int32_t mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len, mtmd_bitmap & output) {
+    clip_image_u8_ptr img_u8(clip_image_u8_init());
+    bool ok = clip_image_load_from_bytes(buf, len, img_u8.get());
+    if (!ok) {
+        LOG_ERR("Unable to load image from buffer\n");
+        return 1;
+    }
+    unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny);
+    output.data.resize(output.nx * output.ny * 3);
+    std::memcpy(output.data.data(), data, output.nx * output.ny * 3);
+    return 0;
+}
+
+int32_t mtmd_helper_bitmap_init_from_file(const char * fname, mtmd_bitmap & output) {
+    clip_image_u8_ptr img_u8(clip_image_u8_init());
+    bool ok = clip_image_load_from_file(fname, img_u8.get());
+    if (!ok) {
+        LOG_ERR("Unable to load image %s\n", fname);
+        return 1;
+    }
+    unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny);
+    output.data.resize(output.nx * output.ny * 3);
+    std::memcpy(output.data.data(), data, output.nx * output.ny * 3);
+    return 0;
+}
diff --git a/examples/llava/mtmd.h b/examples/llava/mtmd.h
@@ -71,11 +71,6 @@ MTMD_API mtmd_context * mtmd_init_from_file(const char * mmproj_fname,
 
 MTMD_API void mtmd_free(mtmd_context * ctx);
 
-// helper function to load an image from a file
-// returns 0 on success
-// this function is thread-safe
-MTMD_API int32_t mtmd_bitmap_init_from_file(const char * fname, mtmd_bitmap & output);
-
 // tokenize an input text prompt and an image
 // the prompt must have the input image marker (default: "<__image__>") in it
 // the marker will be replaced with the image tokens
@@ -101,7 +96,11 @@ MTMD_API int32_t mtmd_encode(mtmd_context * ctx,
 // get output embeddings from the last encode pass
 MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx);
 
-// simple helper to count the total number of tokens from a list of chunks, useful to keep track of n_past
+//
+// helper functions (can be implemented based on other functions)
+//
+
+// helper to count the total number of tokens from a list of chunks, useful to keep track of n_past
 MTMD_API size_t mtmd_helper_get_n_tokens(mtmd_input_chunks * chunks);
 
 // helper function that automatically:
@@ -116,6 +115,16 @@ MTMD_API int32_t mtmd_helper_eval(mtmd_context * ctx,
                                 llama_seq_id seq_id,
                                 int32_t n_batch);
 
+// helper function to construct a mtmd_bitmap from a file
+// returns 0 on success
+// this function is thread-safe
+MTMD_API int32_t mtmd_helper_bitmap_init_from_file(const char * fname, mtmd_bitmap & output);
+
+// helper function to construct a mtmd_bitmap from a buffer
+// the buffer must be an image in format supported by stb_image (jpg, png, bmp, gif, etc.)
+// returns 0 on success
+// this function is thread-safe
+MTMD_API int32_t mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len, mtmd_bitmap & output);
 
 // convenient unique_ptr wrappers
 struct mtmd_context_deleter {

Original file line number	Diff line number	Diff line change
`@@ -173,7 +173,7 @@ static int eval_message(gemma3_context & ctx, common_chat_msg & msg, std::vector`
`173`	`173`
`174`	`174`	`for (auto & fname : images_fname) {`
`175`	`175`	`mtmd_bitmap bitmap;`
`176`		`- if (mtmd_bitmap_init_from_file(fname.c_str(), bitmap)) {`
	`176`	`+ if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) {`
`177`	`177`	`LOG_ERR("Unable to load image %s\n", fname.c_str());`
`178`	`178`	`return 2; // image not found`
`179`	`179`	`}`