@@ -2501,11 +2501,21 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
25012501 }
25022502#endif
25032503
2504+ #if defined(GGML_OP_PERF)
2505+ const uint64_t op_start_us = ggml_time_us ();
2506+ #endif // defined(GGML_OP_PERF)
2507+
25042508 bool ok = ggml_cuda_compute_forward (*cuda_ctx, node);
25052509 if (!ok) {
25062510 GGML_LOG_ERROR (" %s: op not supported %s (%s)\n " , __func__, node->name , ggml_op_name (node->op ));
25072511 }
25082512 GGML_ASSERT (ok);
2513+
2514+ #if defined(GGML_OP_PERF)
2515+ const uint64_t op_end_us = ggml_time_us ();
2516+ op_stats[node->op ][OP_COUNT] += 1 ;
2517+ op_stats[node->op ][OP_TOTAL_TIME] += op_end_us - op_start_us;
2518+ #endif // defined(GGML_OP_PERF)
25092519 }
25102520 }
25112521
@@ -2607,6 +2617,27 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
26072617#endif // USE_CUDA_GRAPH
26082618 }
26092619
2620+ #if defined(GGML_OP_PERF)
2621+ {
2622+ FILE *logFile = fopen (" ggml_op_perf.log" , " a" );
2623+ fprintf (logFile, " ## compute stats for each op: ##################################################\n " );
2624+ float total_time = 0 , total_count = 0 ;
2625+ for (int i = 0 ; i < GGML_OP_COUNT; ++i) {
2626+ total_count += op_stats[i][OP_COUNT];
2627+ total_time += op_stats[i][OP_TOTAL_TIME];
2628+ }
2629+ for (int i = 0 ; i < GGML_OP_COUNT; ++i) {
2630+ fprintf (logFile,
2631+ " OP[%d] Stat: count = %9.0f, count%% = %3.2f%%, time = %12.0f, time%% = %3.2f%%\n " ,
2632+ i,
2633+ op_stats[i][OP_COUNT], 100 * op_stats[i][OP_COUNT] / total_count,
2634+ op_stats[i][OP_TOTAL_TIME], 100 * op_stats[i][OP_TOTAL_TIME] / total_time
2635+ );
2636+ }
2637+ fclose (logFile);
2638+ }
2639+ #endif // defined(GGML_OP_PERF)
2640+
26102641 return GGML_STATUS_SUCCESS;
26112642}
26122643
0 commit comments