Skip to content

Commit 6ed09b7

Browse files
committed
refine helpers
1 parent 430dbd8 commit 6ed09b7

File tree

3 files changed

+42
-20
lines changed

3 files changed

+42
-20
lines changed

examples/llava/gemma3-cli.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ static int eval_message(gemma3_context & ctx, common_chat_msg & msg, std::vector
173173

174174
for (auto & fname : images_fname) {
175175
mtmd_bitmap bitmap;
176-
if (mtmd_bitmap_init_from_file(fname.c_str(), bitmap)) {
176+
if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) {
177177
LOG_ERR("Unable to load image %s\n", fname.c_str());
178178
return 2; // image not found
179179
}

examples/llava/mtmd.cpp

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,6 @@ void mtmd_free(mtmd_context * ctx) {
6868
}
6969
}
7070

71-
int32_t mtmd_bitmap_init_from_file(const char * fname, mtmd_bitmap & output) {
72-
clip_image_u8_ptr img_u8(clip_image_u8_init());
73-
bool ok = clip_image_load_from_file(fname, img_u8.get());
74-
if (!ok) {
75-
LOG_ERR("Unable to load image %s\n", fname);
76-
return 1;
77-
}
78-
unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny);
79-
output.data.resize(output.nx * output.ny * 3);
80-
std::memcpy(output.data.data(), data, output.nx * output.ny * 3);
81-
return 0;
82-
}
83-
8471
// copied from common_tokenize
8572
static std::vector<llama_token> mtmd_tokenize_text_internal(
8673
const struct llama_vocab * vocab,
@@ -326,3 +313,29 @@ int32_t mtmd_helper_eval(mtmd_context * ctx,
326313
llama_batch_free(text_batch);
327314
return 0;
328315
}
316+
317+
int32_t mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len, mtmd_bitmap & output) {
318+
clip_image_u8_ptr img_u8(clip_image_u8_init());
319+
bool ok = clip_image_load_from_bytes(buf, len, img_u8.get());
320+
if (!ok) {
321+
LOG_ERR("Unable to load image from buffer\n");
322+
return 1;
323+
}
324+
unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny);
325+
output.data.resize(output.nx * output.ny * 3);
326+
std::memcpy(output.data.data(), data, output.nx * output.ny * 3);
327+
return 0;
328+
}
329+
330+
int32_t mtmd_helper_bitmap_init_from_file(const char * fname, mtmd_bitmap & output) {
331+
clip_image_u8_ptr img_u8(clip_image_u8_init());
332+
bool ok = clip_image_load_from_file(fname, img_u8.get());
333+
if (!ok) {
334+
LOG_ERR("Unable to load image %s\n", fname);
335+
return 1;
336+
}
337+
unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny);
338+
output.data.resize(output.nx * output.ny * 3);
339+
std::memcpy(output.data.data(), data, output.nx * output.ny * 3);
340+
return 0;
341+
}

examples/llava/mtmd.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,6 @@ MTMD_API mtmd_context * mtmd_init_from_file(const char * mmproj_fname,
7171

7272
MTMD_API void mtmd_free(mtmd_context * ctx);
7373

74-
// helper function to load an image from a file
75-
// returns 0 on success
76-
// this function is thread-safe
77-
MTMD_API int32_t mtmd_bitmap_init_from_file(const char * fname, mtmd_bitmap & output);
78-
7974
// tokenize an input text prompt and an image
8075
// the prompt must have the input image marker (default: "<__image__>") in it
8176
// the marker will be replaced with the image tokens
@@ -101,7 +96,11 @@ MTMD_API int32_t mtmd_encode(mtmd_context * ctx,
10196
// get output embeddings from the last encode pass
10297
MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx);
10398

104-
// simple helper to count the total number of tokens from a list of chunks, useful to keep track of n_past
99+
//
100+
// helper functions (can be implemented based on other functions)
101+
//
102+
103+
// helper to count the total number of tokens from a list of chunks, useful to keep track of n_past
105104
MTMD_API size_t mtmd_helper_get_n_tokens(mtmd_input_chunks * chunks);
106105

107106
// helper function that automatically:
@@ -116,6 +115,16 @@ MTMD_API int32_t mtmd_helper_eval(mtmd_context * ctx,
116115
llama_seq_id seq_id,
117116
int32_t n_batch);
118117

118+
// helper function to construct a mtmd_bitmap from a file
119+
// returns 0 on success
120+
// this function is thread-safe
121+
MTMD_API int32_t mtmd_helper_bitmap_init_from_file(const char * fname, mtmd_bitmap & output);
122+
123+
// helper function to construct a mtmd_bitmap from a buffer
124+
// the buffer must be an image in format supported by stb_image (jpg, png, bmp, gif, etc.)
125+
// returns 0 on success
126+
// this function is thread-safe
127+
MTMD_API int32_t mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len, mtmd_bitmap & output);
119128

120129
// convenient unique_ptr wrappers
121130
struct mtmd_context_deleter {

0 commit comments

Comments
 (0)