Skip to content

Commit 7420f98

Browse files
author
Huaishun Hu
committed
add ggml op perf logs
1 parent 6064a11 commit 7420f98

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

ggml/include/ggml.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2249,6 +2249,17 @@ extern "C" {
22492249
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
22502250
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
22512251

2252+
#define GGML_OP_PERF
2253+
#if defined(GGML_OP_PERF)
2254+
// op: [ count, total_time ]
2255+
enum OP_STAT_ENUM {
2256+
OP_COUNT = 0,
2257+
OP_TOTAL_TIME,
2258+
OP_STAT_ENUM_LEN,
2259+
};
2260+
static float op_stats[GGML_OP_COUNT][OP_STAT_ENUM_LEN] = {0};
2261+
#endif // defined(GGML_OP_PERF)
2262+
22522263
#ifdef __cplusplus
22532264
}
22542265
#endif

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2501,11 +2501,21 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
25012501
}
25022502
#endif
25032503

2504+
#if defined(GGML_OP_PERF)
2505+
const uint64_t op_start_us = ggml_time_us();
2506+
#endif // defined(GGML_OP_PERF)
2507+
25042508
bool ok = ggml_cuda_compute_forward(*cuda_ctx, node);
25052509
if (!ok) {
25062510
GGML_LOG_ERROR("%s: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
25072511
}
25082512
GGML_ASSERT(ok);
2513+
2514+
#if defined(GGML_OP_PERF)
2515+
const uint64_t op_end_us = ggml_time_us();
2516+
op_stats[node->op][OP_COUNT] += 1;
2517+
op_stats[node->op][OP_TOTAL_TIME] += op_end_us - op_start_us;
2518+
#endif // defined(GGML_OP_PERF)
25092519
}
25102520
}
25112521

@@ -2607,6 +2617,27 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
26072617
#endif // USE_CUDA_GRAPH
26082618
}
26092619

2620+
#if defined(GGML_OP_PERF)
2621+
{
2622+
FILE *logFile = fopen("ggml_op_perf.log", "a");
2623+
fprintf(logFile, "## compute stats for each op: ##################################################\n");
2624+
float total_time = 0, total_count = 0;
2625+
for (int i = 0; i < GGML_OP_COUNT; ++i) {
2626+
total_count += op_stats[i][OP_COUNT];
2627+
total_time += op_stats[i][OP_TOTAL_TIME];
2628+
}
2629+
for (int i = 0; i < GGML_OP_COUNT; ++i) {
2630+
fprintf(logFile,
2631+
"OP[%d] Stat: count = %9.0f, count%% = %3.2f%%, time = %12.0f, time%% = %3.2f%%\n",
2632+
i,
2633+
op_stats[i][OP_COUNT], 100 * op_stats[i][OP_COUNT] / total_count,
2634+
op_stats[i][OP_TOTAL_TIME], 100 * op_stats[i][OP_TOTAL_TIME] / total_time
2635+
);
2636+
}
2637+
fclose(logFile);
2638+
}
2639+
#endif // defined(GGML_OP_PERF)
2640+
26102641
return GGML_STATUS_SUCCESS;
26112642
}
26122643

0 commit comments

Comments
 (0)