Skip to content

Commit f8c27b9

Browse files
committed
add mtmd::bitmaps
1 parent 82f4246 commit f8c27b9

File tree

2 files changed

+25
-6
lines changed

2 files changed

+25
-6
lines changed

examples/llava/mtmd-cli.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int
173173
}
174174

175175
static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vector<std::string> & images_fname, bool add_bos = false) {
176-
std::vector<mtmd_bitmap *> bitmaps;
176+
mtmd::bitmaps bitmaps;
177177

178178
common_chat_templates_inputs tmpl_inputs;
179179
tmpl_inputs.messages = {msg};
@@ -183,12 +183,12 @@ static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vect
183183
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.prompt.c_str());
184184

185185
for (auto & fname : images_fname) {
186-
mtmd_bitmap * bitmap = mtmd_helper_bitmap_init_from_file(fname.c_str());
187-
if (!bitmap) {
186+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(fname.c_str()));
187+
if (!bmp.ptr) {
188188
LOG_ERR("Unable to load image %s\n", fname.c_str());
189189
return 2; // image not found
190190
}
191-
bitmaps.push_back(std::move(bitmap));
191+
bitmaps.entries.push_back(std::move(bmp));
192192
}
193193

194194
mtmd_input_text text;
@@ -199,11 +199,12 @@ static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vect
199199
if (g_is_interrupted) return 0;
200200

201201
mtmd::input_chunks chunks;
202+
auto bitmaps_c_ptr = bitmaps.c_ptr();
202203
int32_t res = mtmd_tokenize(ctx.ctx_vision.get(),
203204
chunks.ptr.get(), // output
204205
&text, // text
205-
bitmaps.data(), // bitmaps
206-
bitmaps.size());
206+
bitmaps_c_ptr.data(),
207+
bitmaps_c_ptr.size());
207208
if (res != 0) {
208209
LOG_ERR("Unable to tokenize prompt, res = %d\n", res);
209210
return 1;

examples/llava/mtmd.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,9 @@ using input_chunks_ptr = std::unique_ptr<mtmd_input_chunks, mtmd_input_chunks_de
257257

258258
struct bitmap {
259259
bitmap_ptr ptr;
260+
bitmap() : ptr(nullptr) {}
260261
bitmap(mtmd_bitmap * bitmap) : ptr(bitmap) {}
262+
bitmap(bitmap && other) noexcept : ptr(std::move(other.ptr)) {}
261263
bitmap(uint32_t nx, uint32_t ny, const unsigned char * data) {
262264
ptr.reset(mtmd_bitmap_init(nx, ny, data));
263265
}
@@ -269,6 +271,22 @@ struct bitmap {
269271
void set_id(const char * id) { mtmd_bitmap_set_id(ptr.get(), id); }
270272
};
271273

274+
struct bitmaps {
275+
std::vector<bitmap> entries;
276+
~bitmaps() = default;
277+
// return list of pointers to mtmd_bitmap
278+
// example:
279+
// auto bitmaps_c_ptr = bitmaps.c_ptr();
280+
// int32_t res = mtmd_tokenize(... bitmaps_c_ptr.data(), bitmaps_c_ptr.size());
281+
std::vector<mtmd_bitmap *> c_ptr() {
282+
std::vector<mtmd_bitmap *> res(entries.size());
283+
for (size_t i = 0; i < entries.size(); i++) {
284+
res[i] = entries[i].ptr.get();
285+
}
286+
return res;
287+
}
288+
};
289+
272290
struct input_chunks {
273291
input_chunks_ptr ptr;
274292
input_chunks() {

0 commit comments

Comments
 (0)