Skip to content

Commit 7cc4108

Browse files
committed
add timings
1 parent 94564ac commit 7cc4108

File tree

4 files changed

+13
-1
lines changed

4 files changed

+13
-1
lines changed

examples/llava/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ target_link_libraries(llava2 PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
3838

3939
target_include_directories(llava2 PUBLIC .)
4040
target_include_directories(llava2 PUBLIC ../..)
41+
target_include_directories(llava2 PUBLIC ../../common) # for stb_image.h
4142

4243
target_compile_features(llava2 PRIVATE cxx_std_17)
4344

examples/llava/gemma3-cli.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ struct gemma3_context {
8888
const char * clip_path = params.mmproj.path.c_str();
8989
ctx_vision = llava2_init_from_file(clip_path, model, llava2_context_params{
9090
/* use_gpu */ true,
91+
/* timings */ true,
9192
/* n_threads */ params.cpuparams.n_threads,
9293
/* verbosity */ GGML_LOG_LEVEL_INFO,
9394
});

examples/llava/llava2.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@ struct llava2_context {
1616
struct clip_ctx * ctx_clip;
1717
const struct llama_model * text_model;
1818
std::vector<float> image_embd_v; // image embedding vector
19+
bool print_timings;
1920
int n_threads;
2021
std::string image_marker;
2122

2223
// TODO @ngxson : add timings
2324

2425
llava2_context(const char * mmproj_fname,
2526
const struct llama_model * text_model,
26-
const struct llava2_context_params & ctx_params) : n_threads(ctx_params.n_threads), image_marker(ctx_params.image_marker) {
27+
const struct llava2_context_params & ctx_params) : print_timings(ctx_params.print_timings), n_threads(ctx_params.n_threads), image_marker(ctx_params.image_marker) {
2728
clip_context_params ctx_clip_params;
2829
ctx_clip_params.use_gpu = ctx_params.use_gpu;
2930
ctx_clip_params.verbosity = ctx_params.verbosity;
@@ -260,22 +261,30 @@ int32_t llava2_helper_eval(llava2_context_ptr & ctx,
260261

261262
} else if (chunk.type == LLAVA2_INPUT_CHUNK_TYPE_IMAGE) {
262263
GGML_ASSERT(!is_last && "logits for last image chunk is not yet support");
264+
int64_t t0 = ggml_time_ms();
263265
ret = llava2_encode(ctx, chunk.tokens_image);
264266
if (ret != 0) {
265267
LOG_ERR("failed to encode image\n");
266268
llama_batch_free(text_batch);
267269
return ret;
268270
}
271+
if (ctx->print_timings) {
272+
LOG_INF("Image encoded in %" PRId64 " ms\n", ggml_time_ms() - t0);
273+
}
269274

270275
int32_t n_tokens = chunk.tokens_image.n_tokens;
271276
float * embd = llava2_get_output_embd(ctx);
272277
decode_embd_batch batch_img(embd, n_tokens, n_past, 0);
278+
int64_t t1 = ggml_time_ms();
273279
ret = llama_decode(lctx, batch_img.batch);
274280
if (ret != 0) {
275281
LOG_ERR("failed to decode image\n");
276282
llama_batch_free(text_batch);
277283
return ret;
278284
}
285+
if (ctx->print_timings) {
286+
LOG_INF("Image decoded in %" PRId64 " ms\n", ggml_time_ms() - t1);
287+
}
279288

280289
n_past += n_tokens;
281290

examples/llava/llava2.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct llava2_input_chunk {
6060

6161
struct llava2_context_params {
6262
bool use_gpu = true;
63+
bool print_timings = true;
6364
int n_threads = 4;
6465
enum ggml_log_level verbosity = GGML_LOG_LEVEL_INFO;
6566
const char * image_marker = "<__image__>";

0 commit comments

Comments
 (0)