From 271369d94563cfaa020c03f933c3be00075870b9 Mon Sep 17 00:00:00 2001 From: luker Date: Thu, 17 Apr 2025 18:10:58 -0500 Subject: [PATCH 1/2] Add VSCode Interactive debugger files for ease of use --- .vscode/c_cpp_properties.json | 20 ++++++++++ .vscode/launch.json | 73 +++++++++++++++++++++++++++++++++++ .vscode/tasks.json | 42 ++++++++++++++++++++ 3 files changed, 135 insertions(+) create mode 100644 .vscode/c_cpp_properties.json create mode 100644 .vscode/launch.json create mode 100644 .vscode/tasks.json diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000000000..f686aaf566df0 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,20 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**", + "${workspaceFolder}/include/**", + "${workspaceFolder}/ggml/**" + ], + "defines": [], + "compilerPath": "/usr/bin/gcc", + "cStandard": "c11", + "cppStandard": "c++11", + "intelliSenseMode": "linux-gcc-x64", + "configurationProvider": "ms-vscode.cmake-tools", + "compileCommands": "${workspaceFolder}/build/compile_commands.json" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000000000..ba57704e5b629 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,73 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Debug llama-cli TinyLlama-1.1B-Chat-v1.0-GGUF", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/bin/llama-cli", + "args": [ + "-hf", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", + "-p", "Hello, I am a language model" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build", + "miDebuggerPath": "/usr/bin/gdb" + }, + { + "name": "Debug llama-server TinyLlama-1.1B-Chat-v1.0-GGUF", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/bin/llama-server", + "args": [ + "-hf", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build", + "miDebuggerPath": "/usr/bin/gdb" + }, + { + "name": "Debug llama-cli --list-devices", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/bin/llama-cli", + "args": ["--list-devices"], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build", + "miDebuggerPath": "/usr/bin/gdb" + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000000000..ff558300d2ebe --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,42 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "type": "shell", + "command": "cmake", + "args": [ + "--build", + "build", + "--config", + "Debug", + "-j8" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "problemMatcher": "$gcc", + "options": { + "cwd": "${workspaceFolder}" + }, + "dependsOn": [ + "cmake-configure" + ] + }, + { + "label": "cmake-configure", + "type": "shell", + "command": "cmake", + "args": [ + "-B", + "build", + "-DCMAKE_BUILD_TYPE=Debug" + ], + "problemMatcher": "$gcc", + "options": { + "cwd": "${workspaceFolder}" + } + } + ] +} \ No newline at end of file From a5410f046c01d01330c8a42f13b64a211fedf2c3 Mon Sep 17 00:00:00 2001 From: luker Date: Sun, 20 Apr 2025 15:19:19 -0500 Subject: [PATCH 2/2] Add graph logging feature to GGML This commit introduces a new feature to log the computation graph of the model to a CSV file. The logging can be enabled by setting the environment variable `GGML_LOG_GRAPH` to `1` or `true`. The output file can be customized using `GGML_LOG_GRAPH_FILENAME`. The CSV includes details such as node IDs, names, operations, dimensions, sizes, and flags for each tensor in the graph. The program will terminate after logging the graph. Additionally, the necessary header and source files for graph logging have been added, and the logging function is called after building the model graph. --- README.md | 32 +++++++ ggml/include/ggml-cpp.h | 1 + ggml/include/ggml-graph-logging.h | 19 ++++ ggml/src/CMakeLists.txt | 3 +- ggml/src/ggml-graph-logging.c | 150 ++++++++++++++++++++++++++++++ src/llama-model.cpp | 3 + 6 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 ggml/include/ggml-graph-logging.h create mode 100644 ggml/src/ggml-graph-logging.c diff --git a/README.md b/README.md index cf45f23cf4475..3586be309e1d0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,35 @@ +# GGML GRAPH LOGGING FORK OF LLAMA.CPP + +This fork includes a feature to log the computation graph of the model to a CSV file. + +## Graph Logging + +To enable graph logging, set the environment variable `GGML_LOG_GRAPH` to `1` or `true`: + +```bash +export GGML_LOG_GRAPH=1 +``` + +By default, the graph will be written to `ggml_graph.csv` in the current working directory. You can specify a different filename using the `GGML_LOG_GRAPH_FILENAME` environment variable: + +```bash +export GGML_LOG_GRAPH_FILENAME=/path/to/your/graph_log.csv +``` + +**Important:** When graph logging is enabled, the program will terminate immediately after writing the log file. + +### Output Format + +The output CSV file contains the following columns for each node (tensor) in the graph: + +- `node_id`: The memory address of the tensor, serving as a unique ID. +- `name`: The name assigned to the tensor (if any). +- `op`: The GGML operation that produces this tensor. +- `dim0`, `dim1`, `dim2`, `dim3`: The dimensions of the tensor. +- `bytes`: The size of the tensor data in bytes. +- `flags`: Tensor flags (e.g., `PARAM`, `INPUT`, `OUTPUT`, `LEAF`). +- `src0`...`srcN`: The `node_id` (memory address) of the source tensors for this node, up to `GGML_MAX_SRC`. + # llama.cpp ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) diff --git a/ggml/include/ggml-cpp.h b/ggml/include/ggml-cpp.h index a12342c25debe..7adf6706b6811 100644 --- a/ggml/include/ggml-cpp.h +++ b/ggml/include/ggml-cpp.h @@ -8,6 +8,7 @@ #include "ggml-alloc.h" #include "ggml-backend.h" #include "gguf.h" +#include "ggml-graph-logging.h" #include // Smart pointers for ggml types diff --git a/ggml/include/ggml-graph-logging.h b/ggml/include/ggml-graph-logging.h new file mode 100644 index 0000000000000..650e085bd24e8 --- /dev/null +++ b/ggml/include/ggml-graph-logging.h @@ -0,0 +1,19 @@ +// tensor-footprint-estimation.h +#pragma once + +#include +#include + +// Forward declaration for ggml_cgraph +struct ggml_cgraph; + +#ifdef __cplusplus +extern "C" { +#endif + +// Log the entire computation graph to CSV +void ggml_log_graph(struct ggml_cgraph* cgraph); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index f00700da71fcd..ccfb7837894ff 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -201,7 +201,8 @@ add_library(ggml-base ggml-threading.h ggml-quants.c ggml-quants.h - gguf.cpp) + gguf.cpp + ggml-graph-logging.c) target_include_directories(ggml-base PRIVATE .) if (GGML_BACKEND_DL) diff --git a/ggml/src/ggml-graph-logging.c b/ggml/src/ggml-graph-logging.c new file mode 100644 index 0000000000000..9dad46174f97f --- /dev/null +++ b/ggml/src/ggml-graph-logging.c @@ -0,0 +1,150 @@ +// ggml-graph-logging.c +#include "ggml-graph-logging.h" +#include // for getenv +#include // for fprintf, stderr +#include +#include +#include + +// Include the full definition of ggml structs +#include "ggml.h" +#include "ggml-impl.h" // This includes the full definition of ggml_cgraph + + +// +// Graph logging +// +// This is a simple logging system for the graph of a GGML model. +// +// The graph is logged to a CSV file. +// +// The CSV file contains the following columns: +// +// - node_id: The unique identifier for the node. +// - name: The name of the node. +// - op: The operation performed by the node. +// - dim0, dim1, dim2, dim3: The dimensions of the node. +// - bytes: The number of bytes in the node. +// - flags: The flags of the node. +// - src0..srcN: The source nodes of the node. +// +// The CSV file is written to the current working directory. +// The CSV file is overwritten if it already exists. +// The program will terminate after the graph is logged. +// +// The graph is logged when the environment variable GGML_LOG_GRAPH is set to 1. +// The filename for the log file can be set using the environment variable GGML_LOG_GRAPH_FILENAME. +// +// The graph is logged using the ggml_log_graph function. +// + + +static FILE* ggml_graph_log_init(const char* filename) { + FILE* file = fopen(filename, "w"); + if (file) { + fprintf(stderr, "%s: Graph logging enabled, will write to '%s'\n", __func__, filename); + + // Write CSV header - now with dynamic source columns + fprintf(file, "node_id,name,op,dim0,dim1,dim2,dim3,bytes,flags"); + + // Add source columns based on GGML_MAX_SRC + for (int i = 0; i < GGML_MAX_SRC; i++) { + fprintf(file, ",src%d", i); + } + fprintf(file, "\n"); + } else { + fprintf(stderr, "%s: Error: Failed to open graph file '%s' for writing.\n", __func__, filename); + } + return file; +} + +static void ggml_graph_log_free(FILE* file) { + if (file) { + fclose(file); + } +} + +static void write_tensor_to_csv(struct ggml_tensor* tensor, const char* custom_flags, FILE* file) { + if (!tensor || !file) return; + + // Get flags + const char* flags = custom_flags ? custom_flags : "-"; + if (!custom_flags) { + if (tensor->flags & GGML_TENSOR_FLAG_PARAM) { + flags = "PARAM"; + } else if (tensor->flags & GGML_TENSOR_FLAG_INPUT) { + flags = "INPUT"; + } else if (tensor->flags & GGML_TENSOR_FLAG_OUTPUT) { + flags = "OUTPUT"; + } + } + + // Calculate size in bytes + size_t total_size = ggml_nbytes(tensor); + + // Write base tensor info + fprintf(file, + "%p,%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%.2f,%s", + (void*)tensor, // node_id (pointer for uniqueness) + tensor->name[0] ? tensor->name : "unnamed", // name + ggml_op_name(tensor->op), // op + tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], // dimensions + (double)total_size, // bytes + flags); // flags + + // Write all source tensors dynamically + for (int i = 0; i < GGML_MAX_SRC; i++) { + fprintf(file, ",%p", (void*)tensor->src[i]); + } + + fprintf(file, "\n"); +} + +void ggml_log_graph(struct ggml_cgraph* cgraph) { + const char* log_graph_env = getenv("GGML_LOG_GRAPH"); + if (!log_graph_env || (strcmp(log_graph_env, "1") != 0 && strcmp(log_graph_env, "true") != 0)) { + return; + } + + // Get the filename from the environment variable, or use the default + const char* filename_env = getenv("GGML_LOG_GRAPH_FILENAME"); + const char* filename = filename_env ? filename_env : "ggml_graph.csv"; + + FILE* file = ggml_graph_log_init(filename); + if (!file || !cgraph) { + return; + } + + // Process all nodes in the graph + for (int i = 0; i < cgraph->n_nodes; i++) { + struct ggml_tensor* node = cgraph->nodes[i]; + write_tensor_to_csv(node, NULL, file); + } + + // Process all leaf nodes as well + for (int i = 0; i < cgraph->n_leafs; i++) { + struct ggml_tensor* leaf = cgraph->leafs[i]; + if (!leaf) continue; + + // Skip if already included in nodes + bool already_processed = false; + for (int j = 0; j < cgraph->n_nodes; j++) { + if (cgraph->nodes[j] == leaf) { + already_processed = true; + break; + } + } + if (already_processed) continue; + + write_tensor_to_csv(leaf, "LEAF", file); + } + + // Flush the file to ensure all data is written + fflush(file); + ggml_graph_log_free(file); + + fprintf(stderr, "Graph logging complete: %d nodes and %d leafs written to CSV file. Terminating.\n", + cgraph->n_nodes, cgraph->n_leafs); + exit(0); +} + diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 6b7bfecf3a1cf..460558887af95 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -13064,6 +13064,9 @@ llm_graph_result_ptr llama_model::build_graph( // add on pooling layer llm->build_pooling(gf, cls, cls_b, cls_out, cls_out_b); + // Log the entire computation graph after it's built + ggml_log_graph(gf); + return std::move(llm->res); }