Skip to content

Commit b6fd008

Browse files
author
Anoop Kapoor
committed
Extra llama-cpp model related message coming when enable compilation flag
GGML_PERF_DETAIL. Fix this issue
1 parent 4b00537 commit b6fd008

File tree

4 files changed

+17
-20
lines changed

4 files changed

+17
-20
lines changed

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2879,12 +2879,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
28792879
for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
28802880
struct ggml_tensor * node = cgraph->nodes[node_n];
28812881

2882-
#ifdef GGML_PERF
2882+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
28832883
int64_t t_start = ggml_time_us();
2884-
#endif
2884+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
28852885
ggml_compute_forward(&params, node);
28862886

2887-
#ifdef GGML_PERF
2887+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
28882888
int64_t t_end = ggml_time_us();
28892889
node->perf_runs++;
28902890
if (t_end >= t_start) {
@@ -2893,7 +2893,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
28932893
// Handle wraparound by assuming timer rolls over at max int64_t value
28942894
node->perf_time_us += (INT64_MAX - t_start + t_end + 1);
28952895
}
2896-
#endif
2896+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
28972897
if (state->ith == 0 && cplan->abort_callback &&
28982898
cplan->abort_callback(cplan->abort_callback_data)) {
28992899
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);

src/llama-context.cpp

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,16 +1090,12 @@ int llama_context::decode(const llama_batch & batch_inp) {
10901090
ggml_status status;
10911091
const auto * res = process_ubatch(ubatch, LLM_GRAPH_TYPE_DECODER, mctx.get(), status);
10921092

1093-
#ifdef GGML_PERF
1093+
#if defined(GGML_PERF)
10941094
ggml_perf_accumulate(perf_totals, res->get_gf());
1095-
#endif /* GGML_PERF */
1096-
1097-
#ifdef GGML_PERF_DETAIL
1098-
if (perf_all_shape_fp) {
1099-
ggml_perf_write_detailed_csv(res->get_gf(), perf_all_shape_fp);
1100-
}
1095+
#elif defined(GGML_PERF_DETAIL)
11011096
ggml_perf_accumulate(perf_totals, res->get_gf());
1102-
#endif /* GGML_PERF_DETAI */
1097+
ggml_perf_write_detailed_csv(res->get_gf(), perf_all_shape_fp);
1098+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
11031099

11041100

11051101
if (!res) {
@@ -2763,7 +2759,7 @@ llama_perf_context_data llama_perf_context(const llama_context * ctx) {
27632759
}
27642760

27652761

2766-
#ifdef GGML_PERF
2762+
#if defined(GGML_PERF)
27672763
void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
27682764
LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");
27692765
LLAMA_LOG_TSAVORITE(" %-16s %7s %14s %16s\n", "Op", "Runs", "Total us", "Avg us");
@@ -2791,7 +2787,8 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
27912787
}
27922788
}
27932789
}
2794-
#elif GGML_PERF_DETAIL
2790+
2791+
#elif defined(GGML_PERF_DETAIL)
27952792
void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
27962793
LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");
27972794
LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s\n", "Op", "Target", "Runs", "Total us", "Avg us");
@@ -2855,7 +2852,7 @@ void llama_perf_context_print(const llama_context * ctx) {
28552852
__func__, data.t_eval_ms, data.n_eval, data.t_eval_ms / data.n_eval, 1e3 / data.t_eval_ms * data.n_eval);
28562853
LLAMA_LOG_INFO("%s: total time = %10.2f ms / %5d tokens\n", __func__, (t_end_ms - data.t_start_ms), (data.n_p_eval + data.n_eval));
28572854

2858-
#ifdef GGML_PERF
2855+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
28592856
LLAMA_LOG_TSAVORITE("\n%s: load time = %10.2f ms\n", __func__, data.t_load_ms);
28602857
LLAMA_LOG_TSAVORITE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
28612858
__func__, data.t_p_eval_ms, data.n_p_eval, data.t_p_eval_ms / data.n_p_eval, 1e3 / data.t_p_eval_ms * data.n_p_eval);
@@ -2864,7 +2861,7 @@ void llama_perf_context_print(const llama_context * ctx) {
28642861
LLAMA_LOG_TSAVORITE("%s: total time = %10.2f ms / %5d tokens\n", __func__, (t_end_ms - data.t_start_ms), (data.n_p_eval + data.n_eval));
28652862

28662863
ggml_perf_print_totals(const_cast<ggml_perf_totals *>(ctx->perf_totals));
2867-
#endif /* GGML_PERF */
2864+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
28682865
}
28692866

28702867
void llama_perf_context_reset(llama_context * ctx) {

src/llama-context.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,9 @@ struct llama_context {
198198

199199
// reserve a graph with a dummy ubatch of the specified size
200200
ggml_cgraph * graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_context_i * mctx, bool split_only = false);
201-
#ifdef GGML_PERF
201+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
202202
struct ggml_perf_totals perf_totals[GGML_OP_COUNT] = {}; // add this to llama_context
203-
#endif
203+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
204204

205205
private:
206206
llm_graph_params graph_params(

tools/main/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ int main(int argc, char ** argv) {
126126
LOG_WRN("%s: warning: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale);
127127
}
128128

129-
#ifdef GGML_PERF
129+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
130130
llama_log_set(my_logger, nullptr);
131-
#endif /* GGML_PERF */
131+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
132132
LOG_INF("%s: llama backend init\n", __func__);
133133

134134
llama_backend_init();

0 commit comments

Comments
 (0)