ggml-org · LukeRouleau · Apr 17, 2025 · Apr 20, 2025
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
@@ -0,0 +1,20 @@
+{
+    "configurations": [
+        {
+            "name": "Linux",
+            "includePath": [
+                "${workspaceFolder}/**",
+                "${workspaceFolder}/include/**",
+                "${workspaceFolder}/ggml/**"
+            ],
+            "defines": [],
+            "compilerPath": "/usr/bin/gcc",
+            "cStandard": "c11",
+            "cppStandard": "c++11",
+            "intelliSenseMode": "linux-gcc-x64",
+            "configurationProvider": "ms-vscode.cmake-tools",
+            "compileCommands": "${workspaceFolder}/build/compile_commands.json"
+        }
+    ],
+    "version": 4
+} 
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,73 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Debug llama-cli TinyLlama-1.1B-Chat-v1.0-GGUF",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/bin/llama-cli",
+            "args": [
+                "-hf", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+                "-p", "Hello, I am a language model"
+            ],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                }
+            ],
+            "preLaunchTask": "build",
+            "miDebuggerPath": "/usr/bin/gdb"
+        },
+        {
+            "name": "Debug llama-server TinyLlama-1.1B-Chat-v1.0-GGUF",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/bin/llama-server",
+            "args": [
+                "-hf", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+            ],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                }
+            ],
+            "preLaunchTask": "build",
+            "miDebuggerPath": "/usr/bin/gdb"
+        },
+        {
+            "name": "Debug llama-cli --list-devices",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/bin/llama-cli",
+            "args": ["--list-devices"],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                }
+            ],
+            "preLaunchTask": "build",
+            "miDebuggerPath": "/usr/bin/gdb"
+        }
+    ]
+} 
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -0,0 +1,42 @@
+{
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "build",
+            "type": "shell",
+            "command": "cmake",
+            "args": [
+                "--build",
+                "build",
+                "--config",
+                "Debug",
+                "-j8"
+            ],
+            "group": {
+                "kind": "build",
+                "isDefault": true
+            },
+            "problemMatcher": "$gcc",
+            "options": {
+                "cwd": "${workspaceFolder}"
+            },
+            "dependsOn": [
+                "cmake-configure"
+            ]
+        },
+        {
+            "label": "cmake-configure",
+            "type": "shell",
+            "command": "cmake",
+            "args": [
+                "-B",
+                "build",
+                "-DCMAKE_BUILD_TYPE=Debug"
+            ],
+            "problemMatcher": "$gcc",
+            "options": {
+                "cwd": "${workspaceFolder}"
+            }
+        }
+    ]
+} 
@@ -1,3 +1,35 @@
+# GGML GRAPH LOGGING FORK OF LLAMA.CPP
+
+This fork includes a feature to log the computation graph of the model to a CSV file.
+
+## Graph Logging
+
+To enable graph logging, set the environment variable `GGML_LOG_GRAPH` to `1` or `true`:
+
+```bash
+export GGML_LOG_GRAPH=1
+```
+
+By default, the graph will be written to `ggml_graph.csv` in the current working directory. You can specify a different filename using the `GGML_LOG_GRAPH_FILENAME` environment variable:
+
+```bash
+export GGML_LOG_GRAPH_FILENAME=/path/to/your/graph_log.csv
+```
+
+**Important:** When graph logging is enabled, the program will terminate immediately after writing the log file.
+
+### Output Format
+
+The output CSV file contains the following columns for each node (tensor) in the graph:
+
+-   `node_id`: The memory address of the tensor, serving as a unique ID.
+-   `name`: The name assigned to the tensor (if any).
+-   `op`: The GGML operation that produces this tensor.
+-   `dim0`, `dim1`, `dim2`, `dim3`: The dimensions of the tensor.
+-   `bytes`: The size of the tensor data in bytes.
+-   `flags`: Tensor flags (e.g., `PARAM`, `INPUT`, `OUTPUT`, `LEAF`).
+-   `src0`...`srcN`: The `node_id` (memory address) of the source tensors for this node, up to `GGML_MAX_SRC`.
+
 # llama.cpp
 
 ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)

@@ -8,6 +8,7 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 #include "gguf.h"
+#include "ggml-graph-logging.h"
 #include <memory>
 
 // Smart pointers for ggml types

@@ -0,0 +1,19 @@
+// tensor-footprint-estimation.h
+#pragma once
+
+#include <stdio.h>
+#include <stdint.h>
+
+// Forward declaration for ggml_cgraph
+struct ggml_cgraph;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Log the entire computation graph to CSV
+void ggml_log_graph(struct ggml_cgraph* cgraph);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
@@ -201,7 +201,8 @@ add_library(ggml-base
             ggml-threading.h
             ggml-quants.c
             ggml-quants.h
-            gguf.cpp)
+            gguf.cpp
+            ggml-graph-logging.c)
 
 target_include_directories(ggml-base PRIVATE .)
 if (GGML_BACKEND_DL)

diff --git a/ggml/src/ggml-graph-logging.c b/ggml/src/ggml-graph-logging.c
@@ -0,0 +1,150 @@
+// ggml-graph-logging.c
+#include "ggml-graph-logging.h"
+#include <stdlib.h> // for getenv
+#include <stdio.h>  // for fprintf, stderr
+#include <stdbool.h>
+#include <string.h>
+#include <inttypes.h>
+
+// Include the full definition of ggml structs
+#include "ggml.h"
+#include "ggml-impl.h" // This includes the full definition of ggml_cgraph
+
+
+//
+// Graph logging
+//
+// This is a simple logging system for the graph of a GGML model.
+//
+// The graph is logged to a CSV file.
+//
+// The CSV file contains the following columns:
+//
+// - node_id: The unique identifier for the node.
+// - name: The name of the node.
+// - op: The operation performed by the node.
+// - dim0, dim1, dim2, dim3: The dimensions of the node.
+// - bytes: The number of bytes in the node.
+// - flags: The flags of the node.
+// - src0..srcN: The source nodes of the node.
+//
+// The CSV file is written to the current working directory.
+// The CSV file is overwritten if it already exists.
+// The program will terminate after the graph is logged.
+//
+// The graph is logged when the environment variable GGML_LOG_GRAPH is set to 1.
+// The filename for the log file can be set using the environment variable GGML_LOG_GRAPH_FILENAME.
+//
+// The graph is logged using the ggml_log_graph function.
+//
+
+
+static FILE* ggml_graph_log_init(const char* filename) {    
+    FILE* file = fopen(filename, "w");
+    if (file) {
+        fprintf(stderr, "%s: Graph logging enabled, will write to '%s'\n", __func__, filename);
+
+        // Write CSV header - now with dynamic source columns
+        fprintf(file, "node_id,name,op,dim0,dim1,dim2,dim3,bytes,flags");
+
+        // Add source columns based on GGML_MAX_SRC
+        for (int i = 0; i < GGML_MAX_SRC; i++) {
+            fprintf(file, ",src%d", i);
+        }
+        fprintf(file, "\n");
+    } else {
+        fprintf(stderr, "%s: Error: Failed to open graph file '%s' for writing.\n", __func__, filename);
+    }
+    return file;
+}
+
+static void ggml_graph_log_free(FILE* file) {
+    if (file) {
+        fclose(file);
+    }
+}
+
+static void write_tensor_to_csv(struct ggml_tensor* tensor, const char* custom_flags, FILE* file) {
+    if (!tensor || !file) return;
+
+    // Get flags
+    const char* flags = custom_flags ? custom_flags : "-";
+    if (!custom_flags) {
+        if (tensor->flags & GGML_TENSOR_FLAG_PARAM) {
+            flags = "PARAM";
+        } else if (tensor->flags & GGML_TENSOR_FLAG_INPUT) {
+            flags = "INPUT";
+        } else if (tensor->flags & GGML_TENSOR_FLAG_OUTPUT) {
+            flags = "OUTPUT";
+        }
+    }
+
+    // Calculate size in bytes
+    size_t total_size = ggml_nbytes(tensor);
+
+    // Write base tensor info
+    fprintf(file, 
+            "%p,%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%.2f,%s",
+            (void*)tensor,                               // node_id (pointer for uniqueness)
+            tensor->name[0] ? tensor->name : "unnamed",  // name
+            ggml_op_name(tensor->op),                    // op
+            tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], // dimensions
+            (double)total_size,                          // bytes
+            flags);                                      // flags
+
+    // Write all source tensors dynamically
+    for (int i = 0; i < GGML_MAX_SRC; i++) {
+        fprintf(file, ",%p", (void*)tensor->src[i]);
+    }
+
+    fprintf(file, "\n");
+}
+
+void ggml_log_graph(struct ggml_cgraph* cgraph) {
+    const char* log_graph_env = getenv("GGML_LOG_GRAPH");
+    if (!log_graph_env || (strcmp(log_graph_env, "1") != 0 && strcmp(log_graph_env, "true") != 0)) {
+        return;
+    }    
+
+    // Get the filename from the environment variable, or use the default
+    const char* filename_env = getenv("GGML_LOG_GRAPH_FILENAME");
+    const char* filename = filename_env ? filename_env : "ggml_graph.csv";
+
+    FILE* file = ggml_graph_log_init(filename);
+    if (!file || !cgraph) {
+        return;
+    }
+
+    // Process all nodes in the graph
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        struct ggml_tensor* node = cgraph->nodes[i];
+        write_tensor_to_csv(node, NULL, file);
+    }
+
+    // Process all leaf nodes as well
+    for (int i = 0; i < cgraph->n_leafs; i++) {
+        struct ggml_tensor* leaf = cgraph->leafs[i];
+        if (!leaf) continue;
+
+        // Skip if already included in nodes
+        bool already_processed = false;
+        for (int j = 0; j < cgraph->n_nodes; j++) {
+            if (cgraph->nodes[j] == leaf) {
+                already_processed = true;
+                break;
+            }
+        }
+        if (already_processed) continue;
+
+        write_tensor_to_csv(leaf, "LEAF", file);
+    }
+
+    // Flush the file to ensure all data is written
+    fflush(file);
+    ggml_graph_log_free(file);
+
+    fprintf(stderr, "Graph logging complete: %d nodes and %d leafs written to CSV file. Terminating.\n", 
+            cgraph->n_nodes, cgraph->n_leafs);    
+    exit(0);
+}
+
@@ -13064,6 +13064,9 @@ llm_graph_result_ptr llama_model::build_graph(
     // add on pooling layer
     llm->build_pooling(gf, cls, cls_b, cls_out, cls_out_b);
 
+    // Log the entire computation graph after it's built
+    ggml_log_graph(gf);
+
     return std::move(llm->res);
 }