Skip to content

Commit 22d8b00

Browse files
authored
Merge pull request #64 from tsisw/FIR-1031
@FIR-1031 - GGML: Add TSI Kernel Count for each OP at PERF TABLE
2 parents 721bb4b + 219745e commit 22d8b00

File tree

4 files changed

+14
-4
lines changed

4 files changed

+14
-4
lines changed

ggml/include/ggml.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,9 +662,10 @@ extern "C" {
662662

663663
#if defined(GGML_PERF) || defined(GGML_PERF_RELEASE) || defined(GGML_PERF_DETAIL)
664664
int64_t perf_runs;
665+
int64_t tsi_kernel_runs;
665666
int64_t perf_time_us;
666667
enum ggml_compute_backend_type ggml_compute_backend;
667-
char padding[4];
668+
char padding[12];
668669
#else
669670
char padding[8];
670671
#endif /* GML_PERF-related flag */
@@ -2561,11 +2562,13 @@ extern "C" {
25612562
struct ggml_perf_backend_subtotals {
25622563
int64_t total_us;
25632564
int64_t runs;
2565+
int64_t tsi_kernel_count;
25642566
};
25652567

25662568
struct ggml_perf_unary_subtotals {
25672569
int64_t total_us;
25682570
int64_t runs;
2571+
int64_t tsi_kernel_count;
25692572
};
25702573
// internal perf accumulation struct
25712574
struct ggml_perf_totals {

ggml/src/ggml-tsavorite/ggml-tsavorite.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
12251225
val[0] = scale;
12261226
ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type](srcP0, srcP1, nodeP, glob_buf);
12271227
++device->stats.op_run_count[kernel_type].num_of_kernel_call;
1228+
++node->tsi_kernel_runs;
12281229
}
12291230
}
12301231
}
@@ -1258,6 +1259,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
12581259
// kernel call
12591260
ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type](srcP0, srcP1, nodeP);
12601261
++device->stats.op_run_count[kernel_type].num_of_kernel_call;
1262+
++node->tsi_kernel_runs;
12611263
}
12621264
}
12631265
}
@@ -1372,6 +1374,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
13721374
ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type](srcP0, nodeP);
13731375
}
13741376
++device->stats.op_run_count[kernel_type].num_of_kernel_call;
1377+
++node->tsi_kernel_runs;
13751378

13761379
if (ggml_tsavorite_log_type_val == GGML_TSAVORITE_LOG_DEBUG) {
13771380
log_data.data_type = GGML_TSAVORITE_TENSOR_NODE;

ggml/src/ggml.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7249,12 +7249,14 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct
72497249
if (be >= GGML_COMPUTE_BACKEND_CPU && be < GGML_COMPUTE_BACKEND_COUNT) {
72507250
totals[op].backend_subtotals[be].total_us += node->perf_time_us;
72517251
totals[op].backend_subtotals[be].runs += node->perf_runs;
7252+
totals[op].backend_subtotals[be].tsi_kernel_count += node->tsi_kernel_runs;
72527253
}
72537254

72547255
if (op == GGML_OP_UNARY) {
72557256
enum ggml_unary_op subop = ggml_get_unary_op(node);
72567257
totals[op].unary_subtotals[subop].total_us += node->perf_time_us;
72577258
totals[op].unary_subtotals[subop].runs += node->perf_runs;
7259+
totals[op].unary_subtotals[subop].tsi_kernel_count += node->tsi_kernel_runs;
72587260
}
72597261
}
72607262
}

src/llama-context.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2791,7 +2791,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
27912791
#elif defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
27922792
void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
27932793
LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");
2794-
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s\n", "Op", "Target", "Runs", "Total us", "Avg us");
2794+
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s %16s\n", "Op", "Target", "Runs", "TSI_KERNEL-RUN", "Total us", "Avg us");
27952795

27962796
for (int i = 0; i < GGML_OP_COUNT; ++i) {
27972797
if (totals[i].runs > 0) {
@@ -2801,10 +2801,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
28012801
char padded_backend[7] = {0}; // 6 chars + null terminator
28022802
snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name);
28032803

2804-
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16.2f\n",
2804+
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7ld %14ld %16ld %16.2f\n",
28052805
totals[i].op_name ? totals[i].op_name : "UNKNOWN",
28062806
padded_backend,
28072807
totals[i].backend_subtotals[b].runs,
2808+
totals[i].backend_subtotals[b].tsi_kernel_count,
28082809
totals[i].backend_subtotals[b].total_us,
28092810
(double)totals[i].backend_subtotals[b].total_us / totals[i].backend_subtotals[b].runs);
28102811
}
@@ -2826,10 +2827,11 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
28262827
char padded_backend[7] = {0};
28272828
snprintf(padded_backend, sizeof(padded_backend), "%-6s", backend_name ? backend_name : "UNK");
28282829

2829-
LLAMA_LOG_TSAVORITE(" -> %-11s %-8s %7ld %14ld %16.2f\n",
2830+
LLAMA_LOG_TSAVORITE(" -> %-11s %-8s %7ld %14ld %16ld %16.2f\n",
28302831
ggml_unary_op_name((enum ggml_unary_op) j),
28312832
padded_backend,
28322833
totals[i].unary_subtotals[j].runs,
2834+
totals[i].unary_subtotals[j].tsi_kernel_count,
28332835
totals[i].unary_subtotals[j].total_us,
28342836
(double)totals[i].unary_subtotals[j].total_us / totals[i].unary_subtotals[j].runs);
28352837
}

0 commit comments

Comments
 (0)