@@ -63,7 +63,7 @@ static void sigint_handler(int signo) {
6363#endif 
6464
6565struct  mtmd_cli_context  {
66-     mtmd_context_ptr  ctx_vision;
66+     mtmd::context_ptr  ctx_vision;
6767    common_init_result llama_init;
6868
6969    llama_model       * model;
@@ -173,7 +173,7 @@ static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int
173173}
174174
175175static  int  eval_message (mtmd_cli_context & ctx, common_chat_msg & msg, std::vector<std::string> & images_fname, bool  add_bos = false ) {
176-     std::vector<mtmd_bitmap> bitmaps;
176+     std::vector<mtmd_bitmap * > bitmaps;
177177
178178    common_chat_templates_inputs tmpl_inputs;
179179    tmpl_inputs.messages  = {msg};
@@ -183,34 +183,46 @@ static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vect
183183    LOG_DBG (" formatted_chat.prompt: %s\n "  , formatted_chat.prompt .c_str ());
184184
185185    for  (auto  & fname : images_fname) {
186-         mtmd_bitmap bitmap;
187-         if  (mtmd_helper_bitmap_init_from_file (fname. c_str (),  bitmap) ) {
186+         mtmd_bitmap *  bitmap =  mtmd_helper_bitmap_init_from_file (fname. c_str ()) ;
187+         if  (! bitmap) {
188188            LOG_ERR (" Unable to load image %s\n "  , fname.c_str ());
189189            return  2 ; //  image not found
190190        }
191191        bitmaps.push_back (std::move (bitmap));
192192    }
193193
194194    mtmd_input_text text;
195-     text.text           = formatted_chat.prompt ;
195+     text.text           = formatted_chat.prompt . c_str () ;
196196    text.add_special    = add_bos;
197197    text.parse_special  = true ;
198-     std::vector<mtmd_input_chunk> chunks;
199198
200199    if  (g_is_interrupted) return  0 ;
201200
202-     int32_t  res = mtmd_tokenize (ctx.ctx_vision .get (), chunks, text, bitmaps);
201+     mtmd::input_chunks chunks;
202+     int32_t  res = mtmd_tokenize (ctx.ctx_vision .get (),
203+                         chunks.ptr .get (), //  output
204+                         &text, //  text
205+                         bitmaps.data (), //  bitmaps
206+                         bitmaps.size ());
203207    if  (res != 0 ) {
204208        LOG_ERR (" Unable to tokenize prompt, res = %d\n "  , res);
205209        return  1 ;
206210    }
207211
208-     if  (mtmd_helper_eval (ctx.ctx_vision .get (), ctx.lctx , chunks, ctx.n_past , 0 , ctx.n_batch )) {
212+     llama_pos new_n_past;
213+     if  (mtmd_helper_eval_chunks (ctx.ctx_vision .get (),
214+                 ctx.lctx , //  lctx
215+                 chunks.ptr .get (), //  chunks
216+                 ctx.n_past , //  n_past
217+                 0 , //  seq_id
218+                 ctx.n_batch , //  n_batch
219+                 true , //  logits_last
220+                 &new_n_past)) {
209221        LOG_ERR (" Unable to eval prompt\n "  );
210222        return  1 ;
211223    }
212224
213-     ctx.n_past  +=  mtmd_helper_get_n_pos (chunks) ;
225+     ctx.n_past  = new_n_past ;
214226
215227    return  0 ;
216228}
@@ -241,7 +253,7 @@ int main(int argc, char ** argv) {
241253    struct  common_sampler  * smpl = common_sampler_init (ctx.model , params.sampling );
242254    int  n_predict = params.n_predict  < 0  ? INT_MAX : params.n_predict ;
243255
244-     //  ctrl +C handling
256+     //  Ctrl +C handling
245257    {
246258#if  defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
247259        struct  sigaction  sigint_action;
0 commit comments