@@ -9424,30 +9424,45 @@ int64_t llama_time_us(void) {
94249424 return ggml_time_us ();
94259425}
94269426
9427+ struct loading_progress_ctx {
9428+ int64_t t_load_start_us;
9429+ unsigned int cur_percentage;
9430+ llama_model const * model;
9431+ };
9432+
94279433static struct llama_model * llama_model_load_from_file_impl (
94289434 const std::string & path_model,
94299435 std::vector<std::string> & splits,
94309436 struct llama_model_params params) {
94319437 ggml_time_init ();
9438+ auto const t_load_start_us = ggml_time_us ();
9439+
9440+ llama_model * model = new llama_model (params);
94329441
9433- unsigned cur_percentage = 0 ;
9442+ loading_progress_ctx default_progress_ctx = { t_load_start_us, 0 , model };
9443+ // Setup default progress callback
94349444 if (params.progress_callback == NULL ) {
9435- params.progress_callback_user_data = &cur_percentage ;
9436- params.progress_callback = [](float progress, void * ctx) {
9437- unsigned * cur_percentage_p = (unsigned *) ctx;
9445+ model-> params .progress_callback_user_data = &default_progress_ctx ;
9446+ model-> params .progress_callback = [](float progress, void * ctx) -> bool {
9447+ loading_progress_ctx * progress_ctx = (loading_progress_ctx *) ctx;
94389448 unsigned percentage = (unsigned ) (100 * progress);
9439- while (percentage > *cur_percentage_p) {
9440- *cur_percentage_p = percentage;
9441- LLAMA_LOG_CONT (" ." );
9449+ while (percentage > progress_ctx->cur_percentage ) {
9450+ progress_ctx->cur_percentage = percentage;
9451+ const auto t_now_us = ggml_time_us ();
9452+ const float t_elapsed_s = (t_now_us - progress_ctx->t_load_start_us ) / 1e6f;
9453+ auto const model_size_bytes = progress_ctx->model ->size ();
9454+ const float throughput_mb_s = (progress * model_size_bytes / t_elapsed_s) / (1024 .0f * 1024 .0f );
9455+ auto const remaining_bytes = (1 -progress)*model_size_bytes;
9456+ const float remaining_mb = remaining_bytes / (1024 .0f * 1024 .0f );
9457+ const float remaining_time_s = remaining_mb / throughput_mb_s;
9458+ LLAMA_LOG_CONT (" Loading: %u%%, %.0f MiB/s, ETA: %.0f s\r " , percentage, throughput_mb_s, remaining_time_s);
94429459 if (percentage >= 100 ) {
9443- LLAMA_LOG_CONT (" \n " );
9460+ LLAMA_LOG_CONT (" \n Loading complete \ n" );
94449461 }
94459462 }
94469463 return true ;
94479464 };
9448- }
9449-
9450- llama_model * model = new llama_model (params);
9465+ }
94519466
94529467 // create list of devices to use with this model
94539468 if (params.devices ) {
@@ -9512,6 +9527,9 @@ static struct llama_model * llama_model_load_from_file_impl(
95129527 return nullptr ;
95139528 }
95149529
9530+ float t_load_time_s = (ggml_time_us () - t_load_start_us) / 1e6f;
9531+ LLAMA_LOG_INFO (" Model loading took %.3f s\n " , t_load_time_s);
9532+
95159533 return model;
95169534}
95179535
0 commit comments