@@ -2620,25 +2620,51 @@ llama_perf_context_data llama_perf_context(const llama_context * ctx) {
26202620 return data;
26212621}
26222622
2623+
26232624#ifdef GGML_PERF
26242625void ggml_perf_print_totals (struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26252626 LLAMA_LOG_TSAVORITE (" \n === GGML Perf Summary ===\n " );
2626- LLAMA_LOG_TSAVORITE (" %-16s: %7s %14s %16s\n " , " Op" , " Runs" , " Total us" , " Avg us" );
2627+ LLAMA_LOG_TSAVORITE (" %-16s %7s %14s %16s\n " , " Op" , " Runs" , " Total us" , " Avg us" );
26272628
26282629 for (int i = 0 ; i < GGML_OP_COUNT; ++i) {
26292630 if (totals[i].runs > 0 ) {
2630- // Main op row
2631- LLAMA_LOG_TSAVORITE (" %-16s: %7ld %14ld %16.2f\n " ,
2631+ LLAMA_LOG_TSAVORITE (" %-16s %7ld %14ld %16.2f\n " ,
26322632 totals[i].op_name ? totals[i].op_name : " UNKNOWN" ,
26332633 totals[i].runs ,
26342634 totals[i].total_us ,
26352635 (double )totals[i].total_us / totals[i].runs );
2636+ }
2637+
2638+ // Unary sub-op breakdown
2639+ if (i == GGML_OP_UNARY) {
2640+ for (int j = 0 ; j < GGML_UNARY_OP_COUNT; ++j) {
2641+ if (totals[i].unary_subtotals [j].runs > 0 ) {
2642+ LLAMA_LOG_TSAVORITE (" -> %-11s %7ld %14ld %16.2f\n " ,
2643+ ggml_unary_op_name ((enum ggml_unary_op) j),
2644+ totals[i].unary_subtotals [j].runs ,
2645+ totals[i].unary_subtotals [j].total_us ,
2646+ (double )totals[i].unary_subtotals [j].total_us / totals[i].unary_subtotals [j].runs );
2647+ }
2648+ }
2649+ }
2650+ }
2651+ }
2652+ #else
2653+ void ggml_perf_print_totals (struct ggml_perf_totals totals[GGML_OP_COUNT]) {
2654+ LLAMA_LOG_TSAVORITE (" \n === GGML Perf Summary ===\n " );
2655+ LLAMA_LOG_TSAVORITE (" %-16s %-8s %7s %14s %16s\n " , " Op" , " Target" , " Runs" , " Total us" , " Avg us" );
26362656
2637- // Backend subtotals
2657+ for (int i = 0 ; i < GGML_OP_COUNT; ++i) {
2658+ if (totals[i].runs > 0 ) {
26382659 for (int b = 0 ; b < GGML_COMPUTE_BACKEND_COUNT; ++b) {
26392660 if (totals[i].backend_subtotals [b].runs > 0 ) {
2640- LLAMA_LOG_TSAVORITE (" [%-10s] : %7ld %14ld %16.2f\n " ,
2641- ggml_backend_type ((enum ggml_compute_backend_type) b),
2661+ const char *backend_name = ggml_backend_type ((enum ggml_compute_backend_type) b);
2662+ char padded_backend[7 ] = {0 }; // 6 chars + null terminator
2663+ snprintf (padded_backend, sizeof (padded_backend), " %-6s" , backend_name);
2664+
2665+ LLAMA_LOG_TSAVORITE (" %-16s %-8s %7ld %14ld %16.2f\n " ,
2666+ totals[i].op_name ? totals[i].op_name : " UNKNOWN" ,
2667+ padded_backend,
26422668 totals[i].backend_subtotals [b].runs ,
26432669 totals[i].backend_subtotals [b].total_us ,
26442670 (double )totals[i].backend_subtotals [b].total_us / totals[i].backend_subtotals [b].runs );
@@ -2649,8 +2675,21 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26492675 if (i == GGML_OP_UNARY) {
26502676 for (int j = 0 ; j < GGML_UNARY_OP_COUNT; ++j) {
26512677 if (totals[i].unary_subtotals [j].runs > 0 ) {
2652- LLAMA_LOG_TSAVORITE (" -> %-12s: %7ld %14ld %16.2f\n " ,
2678+ // Find backend for unary op (assumes same as parent op)
2679+ const char *backend_name = NULL ;
2680+ for (int b = 0 ; b < GGML_COMPUTE_BACKEND_COUNT; ++b) {
2681+ if (totals[i].backend_subtotals [b].runs > 0 ) {
2682+ backend_name = ggml_backend_type ((enum ggml_compute_backend_type) b);
2683+ break ;
2684+ }
2685+ }
2686+
2687+ char padded_backend[7 ] = {0 };
2688+ snprintf (padded_backend, sizeof (padded_backend), " %-6s" , backend_name ? backend_name : " UNK" );
2689+
2690+ LLAMA_LOG_TSAVORITE (" -> %-11s %-8s %7ld %14ld %16.2f\n " ,
26532691 ggml_unary_op_name ((enum ggml_unary_op) j),
2692+ padded_backend,
26542693 totals[i].unary_subtotals [j].runs ,
26552694 totals[i].unary_subtotals [j].total_us ,
26562695 (double )totals[i].unary_subtotals [j].total_us / totals[i].unary_subtotals [j].runs );
@@ -2662,6 +2701,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26622701}
26632702#endif /* GGML_PERF */
26642703
2704+
26652705void llama_perf_context_print (const llama_context * ctx) {
26662706 const auto data = llama_perf_context (ctx);
26672707
0 commit comments