@@ -932,9 +932,9 @@ int llama_context::decode(llama_batch & inp_batch) {
932932 kv_self_update ();
933933
934934 int64_t n_outputs_prev = 0 ;
935- #ifdef GGML_PERF
935+ #ifdef GGML_PERF_DETAIL
936936 FILE *perf_all_shape_fp = ggml_perf_log_open (" ggml_perf-all-shape.log" );
937- #endif /* GGML_PERF */
937+ #endif /* GGML_PERF_DETAIL */
938938
939939 while (sbatch.n_tokens > 0 ) {
940940 llama_ubatch ubatch = kv_self->ubatch_next (sbatch, cparams.n_ubatch , embd_pooled);
@@ -975,11 +975,13 @@ int llama_context::decode(llama_batch & inp_batch) {
975975
976976 const auto compute_status = graph_compute (gf, ubatch.n_tokens > 1 );
977977#ifdef GGML_PERF
978+ ggml_perf_accumulate (perf_totals, gf);
979+ #elif GGML_PERF_DETAIL
978980 if (perf_all_shape_fp) {
979981 ggml_perf_write_detailed_csv (gf, perf_all_shape_fp);
980982 }
981983 ggml_perf_accumulate (perf_totals, gf);
982- #endif /* GGML_PERF */
984+ #endif /* GGML_PERF || GGML_PERF_DETAI */
983985 if (compute_status != GGML_STATUS_SUCCESS) {
984986 switch (compute_status) {
985987 case GGML_STATUS_ABORTED:
@@ -2649,7 +2651,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26492651 }
26502652 }
26512653}
2652- #else
2654+ #elif GGML_PERF_DETAIL
26532655void ggml_perf_print_totals (struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26542656 LLAMA_LOG_TSAVORITE (" \n === GGML Perf Summary ===\n " );
26552657 LLAMA_LOG_TSAVORITE (" %-16s %-8s %7s %14s %16s\n " , " Op" , " Target" , " Runs" , " Total us" , " Avg us" );
@@ -2699,7 +2701,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26992701 }
27002702 }
27012703}
2702- #endif /* GGML_PERF */
2704+ #endif /* GGML_PERF || GGML_PERF_DETAI */
27032705
27042706
27052707void llama_perf_context_print (const llama_context * ctx) {
0 commit comments