Skip to content

Commit eff5f15

Browse files
author
Anoop Kapoor
committed
@FIR-929 - GGML: Profiling data improvement
1 parent d92f452 commit eff5f15

File tree

4 files changed

+58
-13
lines changed

4 files changed

+58
-13
lines changed

ggml/src/ggml-tsavorite/ggml-tsavorite.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ static struct ggml_backend_tsavorite_context *ggml_tsavorite_init(ggml_backend_d
522522
if (tsi_log_setup() == false)
523523
return NULL;
524524

525-
std::string mainProfilerName = "GGML Tsavorite ";
525+
std::string mainProfilerName = "OPU ";
526526
tsirt::utils::TSIProfiler::initialize();
527527
tsirt::utils::TSIScopedProfiler mainProfiler(mainProfilerName);
528528

@@ -630,7 +630,7 @@ static void ggml_tsavorite_free(struct ggml_backend_tsavorite_context *ctx) {
630630
tsi_finalize();
631631
GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
632632
tsirt::utils::TSIProfiler::finalize();
633-
std::cout << "\nGGML Tsavorite Profiling Results:" << std::endl;
633+
std::cout << "\nOPU Profiling Results:" << std::endl;
634634
std::cout << tsirt::utils::TSIProfiler::getFormattedResults(
635635
/*truncateFuncNames*/ true)
636636
<< std::endl;

ggml/src/ggml.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
988988
#ifdef GGML_PERF
989989
static const char * GGML_BACKEND_TYPE[GGML_COMPUTE_BACKEND_COUNT] = {
990990
"CPU",
991-
"TSAVORITE"
991+
"OPU"
992992
};
993993
#endif /* GGML_PERF */
994994

src/llama-context.cpp

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2620,25 +2620,51 @@ llama_perf_context_data llama_perf_context(const llama_context * ctx) {
26202620
return data;
26212621
}
26222622

2623+
26232624
#ifdef GGML_PERF
26242625
void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26252626
LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");
2626-
LLAMA_LOG_TSAVORITE(" %-16s: %7s %14s %16s\n", "Op", "Runs", "Total us", "Avg us");
2627+
LLAMA_LOG_TSAVORITE(" %-16s %7s %14s %16s\n", "Op", "Runs", "Total us", "Avg us");
26272628

26282629
for (int i = 0; i < GGML_OP_COUNT; ++i) {
26292630
if (totals[i].runs > 0) {
2630-
// Main op row
2631-
LLAMA_LOG_TSAVORITE(" %-16s: %7ld %14ld %16.2f\n",
2631+
LLAMA_LOG_TSAVORITE(" %-16s %7ld %14ld %16.2f\n",
26322632
totals[i].op_name ? totals[i].op_name : "UNKNOWN",
26332633
totals[i].runs,
26342634
totals[i].total_us,
26352635
(double)totals[i].total_us / totals[i].runs);
2636+
}
2637+
2638+
// Unary sub-op breakdown
2639+
if (i == GGML_OP_UNARY) {
2640+
for (int j = 0; j < GGML_UNARY_OP_COUNT; ++j) {
2641+
if (totals[i].unary_subtotals[j].runs > 0) {
2642+
LLAMA_LOG_TSAVORITE(" -> %-11s %7ld %14ld %16.2f\n",
2643+
ggml_unary_op_name((enum ggml_unary_op) j),
2644+
totals[i].unary_subtotals[j].runs,
2645+
totals[i].unary_subtotals[j].total_us,
2646+
(double)totals[i].unary_subtotals[j].total_us / totals[i].unary_subtotals[j].runs);
2647+
}
2648+
}
2649+
}
2650+
}
2651+
}
2652+
#else
2653+
void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
2654+
LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");
2655+
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s\n", "Op", "Target", "Runs", "Total us", "Avg us");
26362656

2637-
// Backend subtotals
2657+
for (int i = 0; i < GGML_OP_COUNT; ++i) {
2658+
if (totals[i].runs > 0) {
26382659
for (int b = 0; b < GGML_COMPUTE_BACKEND_COUNT; ++b) {
26392660
if (totals[i].backend_subtotals[b].runs > 0) {
2640-
LLAMA_LOG_TSAVORITE(" [%-10s] : %7ld %14ld %16.2f\n",
2641-
ggml_backend_type((enum ggml_compute_backend_type) b),
2661+
const char *backend_name = ggml_backend_type((enum ggml_compute_backend_type) b);
2662+
char padded_backend[7] = {0}; // 6 chars + null terminator
2663+
snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name);
2664+
2665+
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16.2f\n",
2666+
totals[i].op_name ? totals[i].op_name : "UNKNOWN",
2667+
padded_backend,
26422668
totals[i].backend_subtotals[b].runs,
26432669
totals[i].backend_subtotals[b].total_us,
26442670
(double)totals[i].backend_subtotals[b].total_us / totals[i].backend_subtotals[b].runs);
@@ -2649,8 +2675,21 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26492675
if (i == GGML_OP_UNARY) {
26502676
for (int j = 0; j < GGML_UNARY_OP_COUNT; ++j) {
26512677
if (totals[i].unary_subtotals[j].runs > 0) {
2652-
LLAMA_LOG_TSAVORITE(" -> %-12s: %7ld %14ld %16.2f\n",
2678+
// Find backend for unary op (assumes same as parent op)
2679+
const char *backend_name = NULL;
2680+
for (int b = 0; b < GGML_COMPUTE_BACKEND_COUNT; ++b) {
2681+
if (totals[i].backend_subtotals[b].runs > 0) {
2682+
backend_name = ggml_backend_type((enum ggml_compute_backend_type) b);
2683+
break;
2684+
}
2685+
}
2686+
2687+
char padded_backend[7] = {0};
2688+
snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name ? backend_name : "UNK");
2689+
2690+
LLAMA_LOG_TSAVORITE(" -> %-11s %-8s %7ld %14ld %16.2f\n",
26532691
ggml_unary_op_name((enum ggml_unary_op) j),
2692+
padded_backend,
26542693
totals[i].unary_subtotals[j].runs,
26552694
totals[i].unary_subtotals[j].total_us,
26562695
(double)totals[i].unary_subtotals[j].total_us / totals[i].unary_subtotals[j].runs);
@@ -2662,6 +2701,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
26622701
}
26632702
#endif /* GGML_PERF */
26642703

2704+
26652705
void llama_perf_context_print(const llama_context * ctx) {
26662706
const auto data = llama_perf_context(ctx);
26672707

tools/main/main.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -971,16 +971,21 @@ int main(int argc, char ** argv) {
971971
LOG("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
972972
llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
973973
}
974+
// Ensure all prompt response output is fully completed before printing profiling data.
975+
// Free threadpools first to guarantee all worker threads have finished processing and output.
976+
// This prevents interleaving of prompt response and profiling output, ensuring correct order.
977+
// fflush(stdout) is called to flush any remaining output before printing profiling stats.
978+
979+
ggml_threadpool_free_fn(threadpool);
980+
ggml_threadpool_free_fn(threadpool_batch);
974981

975982
LOG("\n\n");
983+
fflush(stdout);
976984
common_perf_print(ctx, smpl);
977985

978986
common_sampler_free(smpl);
979987

980988
llama_backend_free();
981989

982-
ggml_threadpool_free_fn(threadpool);
983-
ggml_threadpool_free_fn(threadpool_batch);
984-
985990
return 0;
986991
}

0 commit comments

Comments
 (0)