Skip to content

Commit 6c1e408

Browse files
committed
llama: enhance loading progress logging with throughput and ETA calculations
1 parent 73e2ed3 commit 6c1e408

File tree

1 file changed

+29
-11
lines changed

1 file changed

+29
-11
lines changed

src/llama.cpp

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9424,30 +9424,45 @@ int64_t llama_time_us(void) {
94249424
return ggml_time_us();
94259425
}
94269426

9427+
struct loading_progress_ctx {
9428+
int64_t t_load_start_us;
9429+
unsigned int cur_percentage;
9430+
llama_model const* model;
9431+
};
9432+
94279433
static struct llama_model * llama_model_load_from_file_impl(
94289434
const std::string & path_model,
94299435
std::vector<std::string> & splits,
94309436
struct llama_model_params params) {
94319437
ggml_time_init();
9438+
auto const t_load_start_us = ggml_time_us();
9439+
9440+
llama_model * model = new llama_model(params);
94329441

9433-
unsigned cur_percentage = 0;
9442+
loading_progress_ctx default_progress_ctx = { t_load_start_us, 0, model };
9443+
// Setup default progress callback
94349444
if (params.progress_callback == NULL) {
9435-
params.progress_callback_user_data = &cur_percentage;
9436-
params.progress_callback = [](float progress, void * ctx) {
9437-
unsigned * cur_percentage_p = (unsigned *) ctx;
9445+
model->params.progress_callback_user_data = &default_progress_ctx;
9446+
model->params.progress_callback = [](float progress, void * ctx) -> bool {
9447+
loading_progress_ctx * progress_ctx = (loading_progress_ctx *) ctx;
94389448
unsigned percentage = (unsigned) (100 * progress);
9439-
while (percentage > *cur_percentage_p) {
9440-
*cur_percentage_p = percentage;
9441-
LLAMA_LOG_CONT(".");
9449+
while (percentage > progress_ctx->cur_percentage) {
9450+
progress_ctx->cur_percentage = percentage;
9451+
const auto t_now_us = ggml_time_us();
9452+
const float t_elapsed_s = (t_now_us - progress_ctx->t_load_start_us) / 1e6f;
9453+
auto const model_size_bytes = progress_ctx->model->size();
9454+
const float throughput_mb_s = (progress * model_size_bytes / t_elapsed_s) / (1024.0f * 1024.0f);
9455+
auto const remaining_bytes = (1-progress)*model_size_bytes;
9456+
const float remaining_mb = remaining_bytes / (1024.0f * 1024.0f);
9457+
const float remaining_time_s = remaining_mb / throughput_mb_s;
9458+
LLAMA_LOG_CONT("Loading: %u%%, %.0f MiB/s, ETA: %.0f s\r", percentage, throughput_mb_s, remaining_time_s);
94429459
if (percentage >= 100) {
9443-
LLAMA_LOG_CONT("\n");
9460+
LLAMA_LOG_CONT("\nLoading complete\n");
94449461
}
94459462
}
94469463
return true;
94479464
};
9448-
}
9449-
9450-
llama_model * model = new llama_model(params);
9465+
}
94519466

94529467
// create list of devices to use with this model
94539468
if (params.devices) {
@@ -9512,6 +9527,9 @@ static struct llama_model * llama_model_load_from_file_impl(
95129527
return nullptr;
95139528
}
95149529

9530+
float t_load_time_s = (ggml_time_us() - t_load_start_us) / 1e6f;
9531+
LLAMA_LOG_INFO("Model loading took %.3f s\n", t_load_time_s);
9532+
95159533
return model;
95169534
}
95179535

0 commit comments

Comments
 (0)