Add graph logging feature to GGML

luker · luker · commit a5410f046c01 · 2025-04-20T15:19:19.000-05:00
This commit introduces a new feature to log the computation graph of the model to a CSV file. The logging can be enabled by setting the environment variable `GGML_LOG_GRAPH` to `1` or `true`. The output file can be customized using `GGML_LOG_GRAPH_FILENAME`. The CSV includes details such as node IDs, names, operations, dimensions, sizes, and flags for each tensor in the graph. The program will terminate after logging the graph.

Additionally, the necessary header and source files for graph logging have been added, and the logging function is called after building the model graph.
diff --git a/README.md b/README.md
@@ -1,3 +1,35 @@
+# GGML GRAPH LOGGING FORK OF LLAMA.CPP
+
+This fork includes a feature to log the computation graph of the model to a CSV file.
+
+## Graph Logging
+
+To enable graph logging, set the environment variable `GGML_LOG_GRAPH` to `1` or `true`:
+
+```bash
+export GGML_LOG_GRAPH=1
+```
+
+By default, the graph will be written to `ggml_graph.csv` in the current working directory. You can specify a different filename using the `GGML_LOG_GRAPH_FILENAME` environment variable:
+
+```bash
+export GGML_LOG_GRAPH_FILENAME=/path/to/your/graph_log.csv
+```
+
+**Important:** When graph logging is enabled, the program will terminate immediately after writing the log file.
+
+### Output Format
+
+The output CSV file contains the following columns for each node (tensor) in the graph:
+
+-   `node_id`: The memory address of the tensor, serving as a unique ID.
+-   `name`: The name assigned to the tensor (if any).
+-   `op`: The GGML operation that produces this tensor.
+-   `dim0`, `dim1`, `dim2`, `dim3`: The dimensions of the tensor.
+-   `bytes`: The size of the tensor data in bytes.
+-   `flags`: Tensor flags (e.g., `PARAM`, `INPUT`, `OUTPUT`, `LEAF`).
+-   `src0`...`srcN`: The `node_id` (memory address) of the source tensors for this node, up to `GGML_MAX_SRC`.
+
 # llama.cpp
 
 ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)
diff --git a/ggml/include/ggml-cpp.h b/ggml/include/ggml-cpp.h
@@ -8,6 +8,7 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 #include "gguf.h"
+#include "ggml-graph-logging.h"
 #include <memory>
 
 // Smart pointers for ggml types
diff --git a/ggml/include/ggml-graph-logging.h b/ggml/include/ggml-graph-logging.h
@@ -0,0 +1,19 @@
+// tensor-footprint-estimation.h
+#pragma once
+
+#include <stdio.h>
+#include <stdint.h>
+
+// Forward declaration for ggml_cgraph
+struct ggml_cgraph;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Log the entire computation graph to CSV
+void ggml_log_graph(struct ggml_cgraph* cgraph);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
@@ -201,7 +201,8 @@ add_library(ggml-base
             ggml-threading.h
             ggml-quants.c
             ggml-quants.h
-            gguf.cpp)
+            gguf.cpp
+            ggml-graph-logging.c)
 
 target_include_directories(ggml-base PRIVATE .)
 if (GGML_BACKEND_DL)
diff --git a/ggml/src/ggml-graph-logging.c b/ggml/src/ggml-graph-logging.c
@@ -0,0 +1,150 @@
+// ggml-graph-logging.c
+#include "ggml-graph-logging.h"
+#include <stdlib.h> // for getenv
+#include <stdio.h>  // for fprintf, stderr
+#include <stdbool.h>
+#include <string.h>
+#include <inttypes.h>
+
+// Include the full definition of ggml structs
+#include "ggml.h"
+#include "ggml-impl.h" // This includes the full definition of ggml_cgraph
+
+
+//
+// Graph logging
+//
+// This is a simple logging system for the graph of a GGML model.
+//
+// The graph is logged to a CSV file.
+//
+// The CSV file contains the following columns:
+//
+// - node_id: The unique identifier for the node.
+// - name: The name of the node.
+// - op: The operation performed by the node.
+// - dim0, dim1, dim2, dim3: The dimensions of the node.
+// - bytes: The number of bytes in the node.
+// - flags: The flags of the node.
+// - src0..srcN: The source nodes of the node.
+//
+// The CSV file is written to the current working directory.
+// The CSV file is overwritten if it already exists.
+// The program will terminate after the graph is logged.
+//
+// The graph is logged when the environment variable GGML_LOG_GRAPH is set to 1.
+// The filename for the log file can be set using the environment variable GGML_LOG_GRAPH_FILENAME.
+//
+// The graph is logged using the ggml_log_graph function.
+//
+
+
+static FILE* ggml_graph_log_init(const char* filename) {    
+    FILE* file = fopen(filename, "w");
+    if (file) {
+        fprintf(stderr, "%s: Graph logging enabled, will write to '%s'\n", __func__, filename);
+        
+        // Write CSV header - now with dynamic source columns
+        fprintf(file, "node_id,name,op,dim0,dim1,dim2,dim3,bytes,flags");
+        
+        // Add source columns based on GGML_MAX_SRC
+        for (int i = 0; i < GGML_MAX_SRC; i++) {
+            fprintf(file, ",src%d", i);
+        }
+        fprintf(file, "\n");
+    } else {
+        fprintf(stderr, "%s: Error: Failed to open graph file '%s' for writing.\n", __func__, filename);
+    }
+    return file;
+}
+
+static void ggml_graph_log_free(FILE* file) {
+    if (file) {
+        fclose(file);
+    }
+}
+
+static void write_tensor_to_csv(struct ggml_tensor* tensor, const char* custom_flags, FILE* file) {
+    if (!tensor || !file) return;
+    
+    // Get flags
+    const char* flags = custom_flags ? custom_flags : "-";
+    if (!custom_flags) {
+        if (tensor->flags & GGML_TENSOR_FLAG_PARAM) {
+            flags = "PARAM";
+        } else if (tensor->flags & GGML_TENSOR_FLAG_INPUT) {
+            flags = "INPUT";
+        } else if (tensor->flags & GGML_TENSOR_FLAG_OUTPUT) {
+            flags = "OUTPUT";
+        }
+    }
+    
+    // Calculate size in bytes
+    size_t total_size = ggml_nbytes(tensor);
+    
+    // Write base tensor info
+    fprintf(file, 
+            "%p,%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%.2f,%s",
+            (void*)tensor,                               // node_id (pointer for uniqueness)
+            tensor->name[0] ? tensor->name : "unnamed",  // name
+            ggml_op_name(tensor->op),                    // op
+            tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], // dimensions
+            (double)total_size,                          // bytes
+            flags);                                      // flags
+    
+    // Write all source tensors dynamically
+    for (int i = 0; i < GGML_MAX_SRC; i++) {
+        fprintf(file, ",%p", (void*)tensor->src[i]);
+    }
+    
+    fprintf(file, "\n");
+}
+
+void ggml_log_graph(struct ggml_cgraph* cgraph) {
+    const char* log_graph_env = getenv("GGML_LOG_GRAPH");
+    if (!log_graph_env || (strcmp(log_graph_env, "1") != 0 && strcmp(log_graph_env, "true") != 0)) {
+        return;
+    }    
+
+    // Get the filename from the environment variable, or use the default
+    const char* filename_env = getenv("GGML_LOG_GRAPH_FILENAME");
+    const char* filename = filename_env ? filename_env : "ggml_graph.csv";
+
+    FILE* file = ggml_graph_log_init(filename);
+    if (!file || !cgraph) {
+        return;
+    }
+
+    // Process all nodes in the graph
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        struct ggml_tensor* node = cgraph->nodes[i];
+        write_tensor_to_csv(node, NULL, file);
+    }
+    
+    // Process all leaf nodes as well
+    for (int i = 0; i < cgraph->n_leafs; i++) {
+        struct ggml_tensor* leaf = cgraph->leafs[i];
+        if (!leaf) continue;
+        
+        // Skip if already included in nodes
+        bool already_processed = false;
+        for (int j = 0; j < cgraph->n_nodes; j++) {
+            if (cgraph->nodes[j] == leaf) {
+                already_processed = true;
+                break;
+            }
+        }
+        if (already_processed) continue;
+        
+        write_tensor_to_csv(leaf, "LEAF", file);
+    }
+    
+    // Flush the file to ensure all data is written
+    fflush(file);
+    ggml_graph_log_free(file);
+    
+    fprintf(stderr, "Graph logging complete: %d nodes and %d leafs written to CSV file. Terminating.\n", 
+            cgraph->n_nodes, cgraph->n_leafs);    
+    exit(0);
+}
+
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -13064,6 +13064,9 @@ llm_graph_result_ptr llama_model::build_graph(
     // add on pooling layer
     llm->build_pooling(gf, cls, cls_b, cls_out, cls_out_b);
 
+    // Log the entire computation graph after it's built
+    ggml_log_graph(gf);
+
     return std::move(llm->res);
 }
 

Original file line number	Diff line number	Diff line change
`@@ -13064,6 +13064,9 @@ llm_graph_result_ptr llama_model::build_graph(`
`13064`	`13064`	`// add on pooling layer`
`13065`	`13065`	`llm->build_pooling(gf, cls, cls_b, cls_out, cls_out_b);`
`13066`	`13066`
	`13067`	`+ // Log the entire computation graph after it's built`
	`13068`	`+ ggml_log_graph(gf);`
	`13069`	`+`
`13067`	`13070`	`return std::move(llm->res);`
`13068`	`13071`	`}`
`13069`	`13072`