@@ -1090,12 +1090,12 @@ int llama_context::decode(const llama_batch & batch_inp) {
10901090 ggml_status status;
10911091 const auto * res = process_ubatch (ubatch, LLM_GRAPH_TYPE_DECODER, mctx.get (), status);
10921092
1093- #if defined(GGML_PERF)
1093+ #if defined(GGML_PERF) || defined(GGML_PERF_RELEASE)
10941094 ggml_perf_accumulate (perf_totals, res->get_gf ());
10951095#elif defined(GGML_PERF_DETAIL)
10961096 ggml_perf_accumulate (perf_totals, res->get_gf ());
10971097 ggml_perf_write_detailed_csv (res->get_gf (), perf_all_shape_fp);
1098- #endif /* GGML_PERF || GGML_PERF_DETAIL */
1098+ #endif /* GML_PERF-related flags */
10991099
11001100
11011101 if (!res) {
@@ -2759,7 +2759,7 @@ llama_perf_context_data llama_perf_context(const llama_context * ctx) {
27592759}
27602760
27612761
2762- #if defined(GGML_PERF )
2762+ #if defined(GGML_PERF_RELEASE )
27632763void ggml_perf_print_totals (struct ggml_perf_totals totals[GGML_OP_COUNT]) {
27642764 LLAMA_LOG_TSAVORITE (" \n === GGML Perf Summary ===\n " );
27652765 LLAMA_LOG_TSAVORITE (" %-16s %7s %14s %16s\n " , " Op" , " Runs" , " Total us" , " Avg us" );
@@ -2788,7 +2788,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
27882788 }
27892789}
27902790
2791- #elif defined(GGML_PERF_DETAIL)
2791+ #elif defined(GGML_PERF) || defined( GGML_PERF_DETAIL)
27922792void ggml_perf_print_totals (struct ggml_perf_totals totals[GGML_OP_COUNT]) {
27932793 LLAMA_LOG_TSAVORITE (" \n === GGML Perf Summary ===\n " );
27942794 LLAMA_LOG_TSAVORITE (" %-16s %-8s %7s %14s %16s\n " , " Op" , " Target" , " Runs" , " Total us" , " Avg us" );
@@ -2838,7 +2838,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
28382838 }
28392839 }
28402840}
2841- #endif /* GGML_PERF || GGML_PERF_DETAI */
2841+ #endif /* GGML_PERF-related flags */
28422842
28432843
28442844void llama_perf_context_print (const llama_context * ctx) {
@@ -2852,7 +2852,7 @@ void llama_perf_context_print(const llama_context * ctx) {
28522852 __func__, data.t_eval_ms , data.n_eval , data.t_eval_ms / data.n_eval , 1e3 / data.t_eval_ms * data.n_eval );
28532853 LLAMA_LOG_INFO (" %s: total time = %10.2f ms / %5d tokens\n " , __func__, (t_end_ms - data.t_start_ms ), (data.n_p_eval + data.n_eval ));
28542854
2855- #if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
2855+ #if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) || defined( GGML_PERF_DETAIL)
28562856 LLAMA_LOG_TSAVORITE (" \n %s: load time = %10.2f ms\n " , __func__, data.t_load_ms );
28572857 LLAMA_LOG_TSAVORITE (" %s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n " ,
28582858 __func__, data.t_p_eval_ms , data.n_p_eval , data.t_p_eval_ms / data.n_p_eval , 1e3 / data.t_p_eval_ms * data.n_p_eval );
@@ -2861,7 +2861,7 @@ void llama_perf_context_print(const llama_context * ctx) {
28612861 LLAMA_LOG_TSAVORITE (" %s: total time = %10.2f ms / %5d tokens\n " , __func__, (t_end_ms - data.t_start_ms ), (data.n_p_eval + data.n_eval ));
28622862
28632863 ggml_perf_print_totals (const_cast <ggml_perf_totals *>(ctx->perf_totals ));
2864- #endif /* GGML_PERF || GGML_PERF_DETAIL */
2864+ #endif /* GGML_PERF-related flags */
28652865}
28662866
28672867void llama_perf_context_reset (llama_context * ctx) {
0 commit comments