Skip to content

Commit 82f4246

Browse files
committed
working version
1 parent e0806c2 commit 82f4246

File tree

3 files changed

+505
-243
lines changed

3 files changed

+505
-243
lines changed

examples/llava/mtmd-cli.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ static void sigint_handler(int signo) {
6363
#endif
6464

6565
struct mtmd_cli_context {
66-
mtmd_context_ptr ctx_vision;
66+
mtmd::context_ptr ctx_vision;
6767
common_init_result llama_init;
6868

6969
llama_model * model;
@@ -173,7 +173,7 @@ static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int
173173
}
174174

175175
static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vector<std::string> & images_fname, bool add_bos = false) {
176-
std::vector<mtmd_bitmap> bitmaps;
176+
std::vector<mtmd_bitmap *> bitmaps;
177177

178178
common_chat_templates_inputs tmpl_inputs;
179179
tmpl_inputs.messages = {msg};
@@ -183,34 +183,46 @@ static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vect
183183
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.prompt.c_str());
184184

185185
for (auto & fname : images_fname) {
186-
mtmd_bitmap bitmap;
187-
if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) {
186+
mtmd_bitmap * bitmap = mtmd_helper_bitmap_init_from_file(fname.c_str());
187+
if (!bitmap) {
188188
LOG_ERR("Unable to load image %s\n", fname.c_str());
189189
return 2; // image not found
190190
}
191191
bitmaps.push_back(std::move(bitmap));
192192
}
193193

194194
mtmd_input_text text;
195-
text.text = formatted_chat.prompt;
195+
text.text = formatted_chat.prompt.c_str();
196196
text.add_special = add_bos;
197197
text.parse_special = true;
198-
std::vector<mtmd_input_chunk> chunks;
199198

200199
if (g_is_interrupted) return 0;
201200

202-
int32_t res = mtmd_tokenize(ctx.ctx_vision.get(), chunks, text, bitmaps);
201+
mtmd::input_chunks chunks;
202+
int32_t res = mtmd_tokenize(ctx.ctx_vision.get(),
203+
chunks.ptr.get(), // output
204+
&text, // text
205+
bitmaps.data(), // bitmaps
206+
bitmaps.size());
203207
if (res != 0) {
204208
LOG_ERR("Unable to tokenize prompt, res = %d\n", res);
205209
return 1;
206210
}
207211

208-
if (mtmd_helper_eval(ctx.ctx_vision.get(), ctx.lctx, chunks, ctx.n_past, 0, ctx.n_batch)) {
212+
llama_pos new_n_past;
213+
if (mtmd_helper_eval_chunks(ctx.ctx_vision.get(),
214+
ctx.lctx, // lctx
215+
chunks.ptr.get(), // chunks
216+
ctx.n_past, // n_past
217+
0, // seq_id
218+
ctx.n_batch, // n_batch
219+
true, // logits_last
220+
&new_n_past)) {
209221
LOG_ERR("Unable to eval prompt\n");
210222
return 1;
211223
}
212224

213-
ctx.n_past += mtmd_helper_get_n_pos(chunks);
225+
ctx.n_past = new_n_past;
214226

215227
return 0;
216228
}
@@ -241,7 +253,7 @@ int main(int argc, char ** argv) {
241253
struct common_sampler * smpl = common_sampler_init(ctx.model, params.sampling);
242254
int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
243255

244-
// ctrl+C handling
256+
// Ctrl+C handling
245257
{
246258
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
247259
struct sigaction sigint_action;

0 commit comments

Comments
 (0)