Merge pull request #47 from tsisw/FIR-929

akapoor3518 · web-flow · commit 0882cb0082f9 · 2025-09-03T10:53:52.000-07:00
@FIR935 - GGML: Disable all Math operation detail writing to file
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -932,9 +932,9 @@ int llama_context::decode(llama_batch & inp_batch) {
     kv_self_update();
 
     int64_t n_outputs_prev = 0;
-#ifdef GGML_PERF
+#ifdef GGML_PERF_DETAIL
     FILE *perf_all_shape_fp = ggml_perf_log_open("ggml_perf-all-shape.log");
-#endif /* GGML_PERF */
+#endif /* GGML_PERF_DETAIL */
 
     while (sbatch.n_tokens > 0) {
         llama_ubatch ubatch = kv_self->ubatch_next(sbatch, cparams.n_ubatch, embd_pooled);
@@ -975,11 +975,13 @@ int llama_context::decode(llama_batch & inp_batch) {
 
         const auto compute_status = graph_compute(gf, ubatch.n_tokens > 1);
 #ifdef GGML_PERF
+        ggml_perf_accumulate(perf_totals, gf);
+#elif GGML_PERF_DETAIL
         if (perf_all_shape_fp) {
             ggml_perf_write_detailed_csv(gf, perf_all_shape_fp);
         }
         ggml_perf_accumulate(perf_totals, gf);
-#endif /* GGML_PERF */
+#endif /* GGML_PERF  || GGML_PERF_DETAI */
         if (compute_status != GGML_STATUS_SUCCESS) {
             switch (compute_status) {
                 case GGML_STATUS_ABORTED:
@@ -2649,7 +2651,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
         }
     }
 }
-#else 
+#elif GGML_PERF_DETAIL
 void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
     LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");
     LLAMA_LOG_TSAVORITE("  %-16s %-8s %7s  %14s  %16s\n", "Op", "Target", "Runs", "Total us", "Avg us");
@@ -2699,7 +2701,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {
         }
     }
 }
-#endif /* GGML_PERF */
+#endif /* GGML_PERF  || GGML_PERF_DETAI */
 
 
 void llama_perf_context_print(const llama_context * ctx) {

Original file line number	Diff line number	Diff line change
`@@ -932,9 +932,9 @@ int llama_context::decode(llama_batch & inp_batch) {`
`932`	`932`	`kv_self_update();`
`933`	`933`
`934`	`934`	`int64_t n_outputs_prev = 0;`
`935`		`-#ifdef GGML_PERF`
	`935`	`+#ifdef GGML_PERF_DETAIL`
`936`	`936`	`FILE *perf_all_shape_fp = ggml_perf_log_open("ggml_perf-all-shape.log");`
`937`		`-#endif /* GGML_PERF */`
	`937`	`+#endif /* GGML_PERF_DETAIL */`
`938`	`938`
`939`	`939`	`while (sbatch.n_tokens > 0) {`
`940`	`940`	`llama_ubatch ubatch = kv_self->ubatch_next(sbatch, cparams.n_ubatch, embd_pooled);`
`@@ -975,11 +975,13 @@ int llama_context::decode(llama_batch & inp_batch) {`
`975`	`975`
`976`	`976`	`const auto compute_status = graph_compute(gf, ubatch.n_tokens > 1);`
`977`	`977`	`#ifdef GGML_PERF`
	`978`	`+ ggml_perf_accumulate(perf_totals, gf);`
	`979`	`+#elif GGML_PERF_DETAIL`
`978`	`980`	`if (perf_all_shape_fp) {`
`979`	`981`	`ggml_perf_write_detailed_csv(gf, perf_all_shape_fp);`
`980`	`982`	`}`
`981`	`983`	`ggml_perf_accumulate(perf_totals, gf);`
`982`		`-#endif /* GGML_PERF */`
	`984`	`+#endif /* GGML_PERF \|\| GGML_PERF_DETAI */`
`983`	`985`	`if (compute_status != GGML_STATUS_SUCCESS) {`
`984`	`986`	`switch (compute_status) {`
`985`	`987`	`case GGML_STATUS_ABORTED:`
`@@ -2649,7 +2651,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {`
`2649`	`2651`	`}`
`2650`	`2652`	`}`
`2651`	`2653`	`}`
`2652`		`-#else`
	`2654`	`+#elif GGML_PERF_DETAIL`
`2653`	`2655`	`void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {`
`2654`	`2656`	`LLAMA_LOG_TSAVORITE("\n=== GGML Perf Summary ===\n");`
`2655`	`2657`	`LLAMA_LOG_TSAVORITE(" %-16s %-8s %7s %14s %16s\n", "Op", "Target", "Runs", "Total us", "Avg us");`
`@@ -2699,7 +2701,7 @@ void ggml_perf_print_totals(struct ggml_perf_totals totals[GGML_OP_COUNT]) {`
`2699`	`2701`	`}`
`2700`	`2702`	`}`
`2701`	`2703`	`}`
`2702`		`-#endif /* GGML_PERF */`
	`2704`	`+#endif /* GGML_PERF \|\| GGML_PERF_DETAI */`
`2703`	`2705`
`2704`	`2706`
`2705`	`2707`	`void llama_perf_context_print(const llama_context * ctx) {`