fix llama_reset_model_time

Septa2112 · Septa2112 · commit 216e7d964817 · 2024-09-19T11:30:47.000+08:00
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
@@ -1558,7 +1558,8 @@ int main(int argc, char ** argv) {
             }
             prev_inst = &inst;
         } else {
-            llama_model_reset_time(lmodel);
+            // ensure load_time dost not accumulate in llama_bench when not loading the same model
+            llama_reset_model_time(lmodel);
         }
 
         llama_context * ctx = llama_new_context_with_model(lmodel, inst.to_llama_cparams());
diff --git a/include/llama.h b/include/llama.h
@@ -414,7 +414,7 @@ extern "C" {
                              const char * path_model,
               struct llama_model_params   params);
 
-    LLAMA_API void llama_model_reset_time(struct llama_model * model);
+    LLAMA_API void llama_reset_model_time(struct llama_model * model);
 
     LLAMA_API void llama_free_model(struct llama_model * model);
 
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -8809,11 +8809,6 @@ static bool llm_load_tensors(
     return true;
 }
 
-void llama_model_reset_time(llama_model * model) {
-    model->t_start_us = ggml_time_us();
-    model->t_load_us = ggml_time_us() - model->t_start_us;
-}
-
 // Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
 static int llama_model_load(const std::string & fname, llama_model & model, llama_model_params & params) {
     model.t_start_us = ggml_time_us();
@@ -18695,6 +18690,10 @@ struct llama_model * llama_load_model_from_file(
     return model;
 }
 
+void llama_reset_model_time(llama_model * model) {
+    model->t_start_us = ggml_time_us() - model->t_load_us;
+}
+
 void llama_free_model(struct llama_model * model) {
     delete model;
 }

Original file line number	Diff line number	Diff line change
`@@ -1558,7 +1558,8 @@ int main(int argc, char ** argv) {`
`1558`	`1558`	`}`
`1559`	`1559`	`prev_inst = &inst;`
`1560`	`1560`	`} else {`
`1561`		`- llama_model_reset_time(lmodel);`
	`1561`	`+ // ensure load_time dost not accumulate in llama_bench when not loading the same model`
	`1562`	`+ llama_reset_model_time(lmodel);`
`1562`	`1563`	`}`
`1563`	`1564`
`1564`	`1565`	`llama_context * ctx = llama_new_context_with_model(lmodel, inst.to_llama_cparams());`