@@ -16,14 +16,15 @@ struct llava2_context {
1616 struct clip_ctx * ctx_clip;
1717 const struct llama_model * text_model;
1818 std::vector<float > image_embd_v; // image embedding vector
19+ bool print_timings;
1920 int n_threads;
2021 std::string image_marker;
2122
2223 // TODO @ngxson : add timings
2324
2425 llava2_context (const char * mmproj_fname,
2526 const struct llama_model * text_model,
26- const struct llava2_context_params & ctx_params) : n_threads(ctx_params.n_threads), image_marker(ctx_params.image_marker) {
27+ const struct llava2_context_params & ctx_params) : print_timings(ctx_params.print_timings), n_threads(ctx_params.n_threads), image_marker(ctx_params.image_marker) {
2728 clip_context_params ctx_clip_params;
2829 ctx_clip_params.use_gpu = ctx_params.use_gpu ;
2930 ctx_clip_params.verbosity = ctx_params.verbosity ;
@@ -260,22 +261,30 @@ int32_t llava2_helper_eval(llava2_context_ptr & ctx,
260261
261262 } else if (chunk.type == LLAVA2_INPUT_CHUNK_TYPE_IMAGE) {
262263 GGML_ASSERT (!is_last && " logits for last image chunk is not yet support" );
264+ int64_t t0 = ggml_time_ms ();
263265 ret = llava2_encode (ctx, chunk.tokens_image );
264266 if (ret != 0 ) {
265267 LOG_ERR (" failed to encode image\n " );
266268 llama_batch_free (text_batch);
267269 return ret;
268270 }
271+ if (ctx->print_timings ) {
272+ LOG_INF (" Image encoded in %" PRId64 " ms\n " , ggml_time_ms () - t0);
273+ }
269274
270275 int32_t n_tokens = chunk.tokens_image .n_tokens ;
271276 float * embd = llava2_get_output_embd (ctx);
272277 decode_embd_batch batch_img (embd, n_tokens, n_past, 0 );
278+ int64_t t1 = ggml_time_ms ();
273279 ret = llama_decode (lctx, batch_img.batch );
274280 if (ret != 0 ) {
275281 LOG_ERR (" failed to decode image\n " );
276282 llama_batch_free (text_batch);
277283 return ret;
278284 }
285+ if (ctx->print_timings ) {
286+ LOG_INF (" Image decoded in %" PRId64 " ms\n " , ggml_time_ms () - t1);
287+ }
279288
280289 n_past += n_tokens;
281290
0 commit comments